diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..df8a8ec7dfcea3bb0563e56eae267c1cfb138b64 Binary files /dev/null and b/.DS_Store differ diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000000000000000000000000000000000000..ce24915a6037a46f90fad5a54c1d30ae0f225a85 --- /dev/null +++ b/.eslintignore @@ -0,0 +1,5 @@ +extensions +extensions-disabled +extensions-builtin/sd_forge_controlnet +repositories +venv \ No newline at end of file diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000000000000000000000000000000000000..9c70eff85fb383f06ba8aa4acb8114feab7ab385 --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,96 @@ +/* global module */ +module.exports = { + env: { + browser: true, + es2021: true, + }, + extends: "eslint:recommended", + parserOptions: { + ecmaVersion: "latest", + }, + rules: { + "arrow-spacing": "error", + "block-spacing": "error", + "brace-style": "error", + "comma-dangle": ["error", "only-multiline"], + "comma-spacing": "error", + "comma-style": ["error", "last"], + "curly": ["error", "multi-line", "consistent"], + "eol-last": "error", + "func-call-spacing": "error", + "function-call-argument-newline": ["error", "consistent"], + "function-paren-newline": ["error", "consistent"], + "indent": ["error", 4], + "key-spacing": "error", + "keyword-spacing": "error", + "linebreak-style": ["error", "unix"], + "no-extra-semi": "error", + "no-mixed-spaces-and-tabs": "error", + "no-multi-spaces": "error", + "no-redeclare": ["error", {builtinGlobals: false}], + "no-trailing-spaces": "error", + "no-unused-vars": "off", + "no-whitespace-before-property": "error", + "object-curly-newline": ["error", {consistent: true, multiline: true}], + "object-curly-spacing": ["error", "never"], + "operator-linebreak": ["error", "after"], + "quote-props": ["error", "consistent-as-needed"], + "semi": ["error", "always"], + "semi-spacing": "error", + "semi-style": ["error", "last"], + "space-before-blocks": "error", + "space-before-function-paren": ["error", "never"], + "space-in-parens": ["error", "never"], + "space-infix-ops": "error", + "space-unary-ops": "error", + "switch-colon-spacing": "error", + "template-curly-spacing": ["error", "never"], + "unicode-bom": "error", + }, + globals: { + //script.js + gradioApp: "readonly", + executeCallbacks: "readonly", + onAfterUiUpdate: "readonly", + onOptionsChanged: "readonly", + onUiLoaded: "readonly", + onUiUpdate: "readonly", + uiCurrentTab: "writable", + uiElementInSight: "readonly", + uiElementIsVisible: "readonly", + //ui.js + opts: "writable", + all_gallery_buttons: "readonly", + selected_gallery_button: "readonly", + selected_gallery_index: "readonly", + switch_to_txt2img: "readonly", + switch_to_img2img_tab: "readonly", + switch_to_img2img: "readonly", + switch_to_sketch: "readonly", + switch_to_inpaint: "readonly", + switch_to_inpaint_sketch: "readonly", + switch_to_extras: "readonly", + get_tab_index: "readonly", + create_submit_args: "readonly", + restart_reload: "readonly", + updateInput: "readonly", + onEdit: "readonly", + //extraNetworks.js + requestGet: "readonly", + popup: "readonly", + // from python + localization: "readonly", + // progrssbar.js + randomId: "readonly", + requestProgress: "readonly", + // imageviewer.js + modalPrevImage: "readonly", + modalNextImage: "readonly", + // localStorage.js + localSet: "readonly", + localGet: "readonly", + localRemove: "readonly", + // resizeHandle.js + setupResizeHandle: "writable" + } +}; diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000000000000000000000000000000000..4104da632b8fcacf3a6f52eba093e63059749725 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Apply ESlint +9c54b78d9dde5601e916f308d9a9d6953ec39430 \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000000000000000000000000000000000000..5876e941085d256cc6a3f4d9ec560d19e782e16e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,105 @@ +name: Bug Report +description: You think something is broken in the UI +title: "[Bug]: " +labels: ["bug-report"] + +body: + - type: markdown + attributes: + value: | + > The title of the bug report should be short and descriptive. + > Use relevant keywords for searchability. + > Do not leave it blank, but also do not put an entire error log in it. + - type: checkboxes + attributes: + label: Checklist + description: | + Please perform basic debugging to see if extensions or configuration is the cause of the issue. + Basic debug procedure +  1. Disable all third-party extensions - check if extension is the cause +  2. Update extensions and webui - sometimes things just need to be updated +  3. Backup and remove your config.json and ui-config.json - check if the issue is caused by bad configuration +  4. Delete venv with third-party extensions disabled - sometimes extensions might cause wrong libraries to be installed +  5. Try a fresh installation webui in a different directory - see if a clean installation solves the issue + Before making a issue report please, check that the issue hasn't been reported recently. + options: + - label: The issue exists after disabling all extensions + - label: The issue exists on a clean installation of webui + - label: The issue is caused by an extension, but I believe it is caused by a bug in the webui + - label: The issue exists in the current version of the webui + - label: The issue has not been reported before recently + - label: The issue has been reported before but has not been fixed yet + - type: markdown + attributes: + value: | + > Please fill this form with as much information as possible. Don't forget to "Upload Sysinfo" and "What browsers" and provide screenshots if possible + - type: textarea + id: what-did + attributes: + label: What happened? + description: Tell us what happened in a very clear and simple way + placeholder: | + txt2img is not working as intended. + validations: + required: true + - type: textarea + id: steps + attributes: + label: Steps to reproduce the problem + description: Please provide us with precise step by step instructions on how to reproduce the bug + placeholder: | + 1. Go to ... + 2. Press ... + 3. ... + validations: + required: true + - type: textarea + id: what-should + attributes: + label: What should have happened? + description: Tell us what you think the normal behavior should be + placeholder: | + WebUI should ... + validations: + required: true + - type: dropdown + id: browsers + attributes: + label: What browsers do you use to access the UI ? + multiple: true + options: + - Mozilla Firefox + - Google Chrome + - Brave + - Apple Safari + - Microsoft Edge + - Android + - iOS + - Other + - type: textarea + id: sysinfo + attributes: + label: Sysinfo + description: System info file, generated by WebUI. You can generate it in settings, on the Sysinfo page. Drag the file into the field to upload it. If you submit your report without including the sysinfo file, the report will be closed. If needed, review the report to make sure it includes no personal information you don't want to share. If you can't start WebUI, you can use --dump-sysinfo commandline argument to generate the file. + placeholder: | + 1. Go to WebUI Settings -> Sysinfo -> Download system info. + If WebUI fails to launch, use --dump-sysinfo commandline argument to generate the file + 2. Upload the Sysinfo as a attached file, Do NOT paste it in as plain text. + validations: + required: true + - type: textarea + id: logs + attributes: + label: Console logs + description: Please provide **full** cmd/terminal logs from the moment you started UI to the end of it, after the bug occured. If it's very long, provide a link to pastebin or similar service. + render: Shell + validations: + required: true + - type: textarea + id: misc + attributes: + label: Additional information + description: | + Please provide us with any relevant additional info or context. + Examples: +  I have updated my GPU driver recently. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..f58c94a9be6847193a971ac67aa83e9a6d75c0ae --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: WebUI Community Support + url: https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions + about: Please ask and answer questions here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000000000000000000000000000000000000..35a887408c1a0cb7d5bbf0a8444d0903a708be75 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,40 @@ +name: Feature request +description: Suggest an idea for this project +title: "[Feature Request]: " +labels: ["enhancement"] + +body: + - type: checkboxes + attributes: + label: Is there an existing issue for this? + description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit. + options: + - label: I have searched the existing issues and checked the recent builds/commits + required: true + - type: markdown + attributes: + value: | + *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible* + - type: textarea + id: feature + attributes: + label: What would your feature do ? + description: Tell us about your feature in a very clear and simple way, and what problem it would solve + validations: + required: true + - type: textarea + id: workflow + attributes: + label: Proposed workflow + description: Please provide us with step by step information on how you'd like the feature to be accessed and used + value: | + 1. Go to .... + 2. Press .... + 3. ... + validations: + required: true + - type: textarea + id: misc + attributes: + label: Additional information + description: Add any other context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000000000000000000000000000000000..c9fcda2e2790861c7bf4aa4cb37e01545c48fb95 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,15 @@ +## Description + +* a simple description of what you're trying to accomplish +* a summary of changes in code +* which issues it fixes, if any + +## Screenshots/videos: + + +## Checklist: + +- [ ] I have read [contributing wiki page](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Contributing) +- [ ] I have performed a self-review of my own code +- [ ] My code follows the [style guidelines](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Contributing#code-style) +- [ ] My code passes [tests](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Tests) diff --git a/.github/workflows/on_pull_request.yaml b/.github/workflows/on_pull_request.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e44c806ab353cf3166b6ddbc54df63a16995ff5 --- /dev/null +++ b/.github/workflows/on_pull_request.yaml @@ -0,0 +1,38 @@ +name: Linter + +on: + - push + - pull_request + +jobs: + lint-python: + name: ruff + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.11 + # NB: there's no cache: pip here since we're not installing anything + # from the requirements.txt file(s) in the repository; it's faster + # not to have GHA download an (at the time of writing) 4 GB cache + # of PyTorch and other dependencies. + - name: Install Ruff + run: pip install ruff==0.1.6 + - name: Run Ruff + run: ruff . + lint-js: + name: eslint + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Install Node.js + uses: actions/setup-node@v3 + with: + node-version: 18 + - run: npm i --ci + - run: npm run lint diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e075ba60db8e6742ef91fe9532189da47c192c7e --- /dev/null +++ b/.github/workflows/run_tests.yaml @@ -0,0 +1,107 @@ +name: Tests + +on: + - push + - pull_request + +env: + FORGE_CQ_TEST: "True" + +jobs: + test: + name: tests on CPU + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: 3.10.6 + cache: pip + cache-dependency-path: | + **/requirements*txt + launch.py + - name: Cache models + id: cache-models + uses: actions/cache@v3 + with: + path: models + key: "2023-12-30" + - name: Install test dependencies + run: pip install wait-for-it -r requirements-test.txt + env: + PIP_DISABLE_PIP_VERSION_CHECK: "1" + PIP_PROGRESS_BAR: "off" + - name: Setup environment + run: python launch.py --skip-torch-cuda-test --exit + env: + PIP_DISABLE_PIP_VERSION_CHECK: "1" + PIP_PROGRESS_BAR: "off" + TORCH_INDEX_URL: https://download.pytorch.org/whl/cpu + WEBUI_LAUNCH_LIVE_OUTPUT: "1" + PYTHONUNBUFFERED: "1" + - name: Print installed packages + run: pip freeze + - name: Download models + run: | + declare -a urls=( + "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/realisticVisionV51_v51VAE.safetensors" + ) + for url in "${urls[@]}"; do + filename="models/Stable-diffusion/${url##*/}" # Extracts the last part of the URL + if [ ! -f "$filename" ]; then + curl -Lo "$filename" "$url" + fi + done + # - name: Download ControlNet models + # run: | + # declare -a urls=( + # "https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_canny.pth" + # ) + + # for url in "${urls[@]}"; do + # filename="models/ControlNet/${url##*/}" # Extracts the last part of the URL + # if [ ! -f "$filename" ]; then + # curl -Lo "$filename" "$url" + # fi + # done + - name: Start test server + run: > + python -m coverage run + --data-file=.coverage.server + launch.py + --skip-prepare-environment + --skip-torch-cuda-test + --test-server + --do-not-download-clip + --no-half + --disable-opt-split-attention + --always-cpu + --api-server-stop + --ckpt models/Stable-diffusion/realisticVisionV51_v51VAE.safetensors + 2>&1 | tee output.txt & + - name: Run tests + run: | + wait-for-it --service 127.0.0.1:7860 -t 20 + python -m pytest -vv --junitxml=test/results.xml --cov . --cov-report=xml --verify-base-url test + # TODO(huchenlei): Enable ControlNet tests. Currently it is too slow to run these tests on CPU with + # real SD model. We need to find a way to load empty SD model. + # - name: Run ControlNet tests + # run: > + # python -m pytest + # --junitxml=test/results.xml + # --cov ./extensions-builtin/sd_forge_controlnet + # --cov-report=xml + # --verify-base-url + # ./extensions-builtin/sd_forge_controlnet/tests + - name: Kill test server + if: always() + run: curl -vv -XPOST http://127.0.0.1:7860/sdapi/v1/server-stop && sleep 10 + - name: Upload main app output + uses: actions/upload-artifact@v3 + if: always() + with: + name: output + path: output.txt diff --git a/.github/workflows/warns_merge_master.yml b/.github/workflows/warns_merge_master.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae2aab6ba8ce5684755b5fb4083267111bcd23cd --- /dev/null +++ b/.github/workflows/warns_merge_master.yml @@ -0,0 +1,19 @@ +name: Pull requests can't target master branch + +"on": + pull_request: + types: + - opened + - synchronize + - reopened + branches: + - master + +jobs: + check: + runs-on: ubuntu-latest + steps: + - name: Warning marge into master + run: | + echo -e "::warning::This pull request directly merge into \"master\" branch, normally development happens on \"dev\" branch." + exit 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ca7c47ee1acf2f5e7de217233bfce04186a20163 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +__pycache__ +*.ckpt +*.safetensors +*.pth +/ESRGAN/* +/SwinIR/* +/repositories +/venv +/tmp +/output +/model.ckpt +/models/**/* +/GFPGANv1.3.pth +/gfpgan/weights/*.pth +/ui-config.json +/outputs +/config.json +/log +/webui.settings.bat +/embeddings +/styles.csv +/params.txt +/styles.csv.bak +/webui-user.bat +/webui-user.sh +/interrogate +/user.css +/.idea +notification.mp3 +/SwinIR +/textual_inversion +.vscode +/extensions +/test/stdout.txt +/test/stderr.txt +/cache.json* +/config_states/ +/node_modules +/package-lock.json +/.coverage* +/test/test_outputs +/test/results.xml +coverage.xml +**/tests/**/expectations \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..53254e5dcfd871c8c0f0f4dec9dceeb1ba967eda --- /dev/null +++ b/.pylintrc @@ -0,0 +1,3 @@ +# See https://pylint.pycqa.org/en/latest/user_guide/messages/message_control.html +[MESSAGES CONTROL] +disable=C,R,W,E,I diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..b274aa428097b8f43f91a71aa9e887062ff71faf --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,674 @@ +## 1.7.0 + +### Features: +* settings tab rework: add search field, add categories, split UI settings page into many +* add altdiffusion-m18 support ([#13364](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13364)) +* support inference with LyCORIS GLora networks ([#13610](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13610)) +* add lora-embedding bundle system ([#13568](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13568)) +* option to move prompt from top row into generation parameters +* add support for SSD-1B ([#13865](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13865)) +* support inference with OFT networks ([#13692](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13692)) +* script metadata and DAG sorting mechanism ([#13944](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13944)) +* support HyperTile optimization ([#13948](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13948)) +* add support for SD 2.1 Turbo ([#14170](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14170)) +* remove Train->Preprocessing tab and put all its functionality into Extras tab +* initial IPEX support for Intel Arc GPU ([#14171](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14171)) + +### Minor: +* allow reading model hash from images in img2img batch mode ([#12767](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12767)) +* add option to align with sgm repo's sampling implementation ([#12818](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12818)) +* extra field for lora metadata viewer: `ss_output_name` ([#12838](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12838)) +* add action in settings page to calculate all SD checkpoint hashes ([#12909](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12909)) +* add button to copy prompt to style editor ([#12975](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12975)) +* add --skip-load-model-at-start option ([#13253](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13253)) +* write infotext to gif images +* read infotext from gif images ([#13068](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13068)) +* allow configuring the initial state of InputAccordion in ui-config.json ([#13189](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13189)) +* allow editing whitespace delimiters for ctrl+up/ctrl+down prompt editing ([#13444](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13444)) +* prevent accidentally closing popup dialogs ([#13480](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13480)) +* added option to play notification sound or not ([#13631](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13631)) +* show the preview image in the full screen image viewer if available ([#13459](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13459)) +* support for webui.settings.bat ([#13638](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13638)) +* add an option to not print stack traces on ctrl+c +* start/restart generation by Ctrl (Alt) + Enter ([#13644](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13644)) +* update prompts_from_file script to allow concatenating entries with the general prompt ([#13733](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13733)) +* added a visible checkbox to input accordion +* added an option to hide all txt2img/img2img parameters in an accordion ([#13826](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13826)) +* added 'Path' sorting option for Extra network cards ([#13968](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13968)) +* enable prompt hotkeys in style editor ([#13931](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13931)) +* option to show batch img2img results in UI ([#14009](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14009)) +* infotext updates: add option to disregard certain infotext fields, add option to not include VAE in infotext, add explanation to infotext settings page, move some options to infotext settings page +* add FP32 fallback support on sd_vae_approx ([#14046](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046)) +* support XYZ scripts / split hires path from unet ([#14126](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14126)) +* allow use of mutiple styles csv files ([#14125](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14125)) + +### Extensions and API: +* update gradio to 3.41.2 +* support installed extensions list api ([#12774](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12774)) +* update pnginfo API to return dict with parsed values +* add noisy latent to `ExtraNoiseParams` for callback ([#12856](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12856)) +* show extension datetime in UTC ([#12864](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12864), [#12865](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12865), [#13281](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13281)) +* add an option to choose how to combine hires fix and refiner +* include program version in info response. ([#13135](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13135)) +* sd_unet support for SDXL +* patch DDPM.register_betas so that users can put given_betas in model yaml ([#13276](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13276)) +* xyz_grid: add prepare ([#13266](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13266)) +* allow multiple localization files with same language in extensions ([#13077](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13077)) +* add onEdit function for js and rework token-counter.js to use it +* fix the key error exception when processing override_settings keys ([#13567](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13567)) +* ability for extensions to return custom data via api in response.images ([#13463](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13463)) +* call state.jobnext() before postproces*() ([#13762](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13762)) +* add option to set notification sound volume ([#13884](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13884)) +* update Ruff to 0.1.6 ([#14059](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14059)) +* add Block component creation callback ([#14119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14119)) +* catch uncaught exception with ui creation scripts ([#14120](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14120)) +* use extension name for determining an extension is installed in the index ([#14063](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14063)) +* update is_installed() from launch_utils.py to fix reinstalling already installed packages ([#14192](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14192)) + +### Bug Fixes: +* fix pix2pix producing bad results +* fix defaults settings page breaking when any of main UI tabs are hidden +* fix error that causes some extra networks to be disabled if both and are present in the prompt +* fix for Reload UI function: if you reload UI on one tab, other opened tabs will no longer stop working +* prevent duplicate resize handler ([#12795](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12795)) +* small typo: vae resolve bug ([#12797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12797)) +* hide broken image crop tool ([#12792](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12792)) +* don't show hidden samplers in dropdown for XYZ script ([#12780](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12780)) +* fix style editing dialog breaking if it's opened in both img2img and txt2img tabs +* hide --gradio-auth and --api-auth values from /internal/sysinfo report +* add missing infotext for RNG in options ([#12819](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12819)) +* fix notification not playing when built-in webui tab is inactive ([#12834](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12834)) +* honor `--skip-install` for extension installers ([#12832](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12832)) +* don't print blank stdout in extension installers ([#12833](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12833), [#12855](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12855)) +* get progressbar to display correctly in extensions tab +* keep order in list of checkpoints when loading model that doesn't have a checksum +* fix inpainting models in txt2img creating black pictures +* fix generation params regex ([#12876](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12876)) +* fix batch img2img output dir with script ([#12926](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12926)) +* fix #13080 - Hypernetwork/TI preview generation ([#13084](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13084)) +* fix bug with sigma min/max overrides. ([#12995](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12995)) +* more accurate check for enabling cuDNN benchmark on 16XX cards ([#12924](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12924)) +* don't use multicond parser for negative prompt counter ([#13118](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13118)) +* fix data-sort-name containing spaces ([#13412](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13412)) +* update card on correct tab when editing metadata ([#13411](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13411)) +* fix viewing/editing metadata when filename contains an apostrophe ([#13395](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13395)) +* fix: --sd_model in "Prompts from file or textbox" script is not working ([#13302](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13302)) +* better Support for Portable Git ([#13231](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13231)) +* fix issues when webui_dir is not work_dir ([#13210](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13210)) +* fix: lora-bias-backup don't reset cache ([#13178](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13178)) +* account for customizable extra network separators whyen removing extra network text from the prompt ([#12877](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12877)) +* re fix batch img2img output dir with script ([#13170](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13170)) +* fix `--ckpt-dir` path separator and option use `short name` for checkpoint dropdown ([#13139](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13139)) +* consolidated allowed preview formats, Fix extra network `.gif` not woking as preview ([#13121](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13121)) +* fix venv_dir=- environment variable not working as expected on linux ([#13469](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13469)) +* repair unload sd checkpoint button +* edit-attention fixes ([#13533](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13533)) +* fix bug when using --gfpgan-models-path ([#13718](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13718)) +* properly apply sort order for extra network cards when selected from dropdown +* fixes generation restart not working for some users when 'Ctrl+Enter' is pressed ([#13962](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13962)) +* thread safe extra network list_items ([#13014](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13014)) +* fix not able to exit metadata popup when pop up is too big ([#14156](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14156)) +* fix auto focal point crop for opencv >= 4.8 ([#14121](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14121)) +* make 'use-cpu all' actually apply to 'all' ([#14131](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14131)) +* extras tab batch: actually use original filename +* make webui not crash when running with --disable-all-extensions option + +### Other: +* non-local condition ([#12814](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12814)) +* fix minor typos ([#12827](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12827)) +* remove xformers Python version check ([#12842](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12842)) +* style: file-metadata word-break ([#12837](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12837)) +* revert SGM noise multiplier change for img2img because it breaks hires fix +* do not change quicksettings dropdown option when value returned is `None` ([#12854](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12854)) +* [RC 1.6.0 - zoom is partly hidden] Update style.css ([#12839](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12839)) +* chore: change extension time format ([#12851](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12851)) +* WEBUI.SH - Use torch 2.1.0 release candidate for Navi 3 ([#12929](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12929)) +* add Fallback at images.read_info_from_image if exif data was invalid ([#13028](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13028)) +* update cmd arg description ([#12986](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12986)) +* fix: update shared.opts.data when add_option ([#12957](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12957), [#13213](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13213)) +* restore missing tooltips ([#12976](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12976)) +* use default dropdown padding on mobile ([#12880](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12880)) +* put enable console prompts option into settings from commandline args ([#13119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13119)) +* fix some deprecated types ([#12846](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12846)) +* bump to torchsde==0.2.6 ([#13418](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13418)) +* update dragdrop.js ([#13372](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13372)) +* use orderdict as lru cache:opt/bug ([#13313](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13313)) +* XYZ if not include sub grids do not save sub grid ([#13282](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13282)) +* initialize state.time_start befroe state.job_count ([#13229](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13229)) +* fix fieldname regex ([#13458](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13458)) +* change denoising_strength default to None. ([#13466](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13466)) +* fix regression ([#13475](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13475)) +* fix IndexError ([#13630](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13630)) +* fix: checkpoints_loaded:{checkpoint:state_dict}, model.load_state_dict issue in dict value empty ([#13535](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13535)) +* update bug_report.yml ([#12991](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12991)) +* requirements_versions httpx==0.24.1 ([#13839](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13839)) +* fix parenthesis auto selection ([#13829](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13829)) +* fix #13796 ([#13797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13797)) +* corrected a typo in `modules/cmd_args.py` ([#13855](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13855)) +* feat: fix randn found element of type float at pos 2 ([#14004](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14004)) +* adds tqdm handler to logging_config.py for progress bar integration ([#13996](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13996)) +* hotfix: call shared.state.end() after postprocessing done ([#13977](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13977)) +* fix dependency address patch 1 ([#13929](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13929)) +* save sysinfo as .json ([#14035](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14035)) +* move exception_records related methods to errors.py ([#14084](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14084)) +* compatibility ([#13936](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13936)) +* json.dump(ensure_ascii=False) ([#14108](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14108)) +* dir buttons start with / so only the correct dir will be shown and no… ([#13957](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13957)) +* alternate implementation for unet forward replacement that does not depend on hijack being applied +* re-add `keyedit_delimiters_whitespace` setting lost as part of commit e294e46 ([#14178](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14178)) +* fix `save_samples` being checked early when saving masked composite ([#14177](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14177)) +* slight optimization for mask and mask_composite ([#14181](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14181)) +* add import_hook hack to work around basicsr/torchvision incompatibility ([#14186](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14186)) + +## 1.6.1 + +### Bug Fixes: + * fix an error causing the webui to fail to start ([#13839](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13839)) + +## 1.6.0 + +### Features: + * refiner support [#12371](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) + * add NV option for Random number generator source setting, which allows to generate same pictures on CPU/AMD/Mac as on NVidia videocards + * add style editor dialog + * hires fix: add an option to use a different checkpoint for second pass ([#12181](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12181)) + * option to keep multiple loaded models in memory ([#12227](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12227)) + * new samplers: Restart, DPM++ 2M SDE Exponential, DPM++ 2M SDE Heun, DPM++ 2M SDE Heun Karras, DPM++ 2M SDE Heun Exponential, DPM++ 3M SDE, DPM++ 3M SDE Karras, DPM++ 3M SDE Exponential ([#12300](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12300), [#12519](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12519), [#12542](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12542)) + * rework DDIM, PLMS, UniPC to use CFG denoiser same as in k-diffusion samplers: + * makes all of them work with img2img + * makes prompt composition posssible (AND) + * makes them available for SDXL + * always show extra networks tabs in the UI ([#11808](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/11808)) + * use less RAM when creating models ([#11958](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/11958), [#12599](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12599)) + * textual inversion inference support for SDXL + * extra networks UI: show metadata for SD checkpoints + * checkpoint merger: add metadata support + * prompt editing and attention: add support for whitespace after the number ([ red : green : 0.5 ]) (seed breaking change) ([#12177](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12177)) + * VAE: allow selecting own VAE for each checkpoint (in user metadata editor) + * VAE: add selected VAE to infotext + * options in main UI: add own separate setting for txt2img and img2img, correctly read values from pasted infotext, add setting for column count ([#12551](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12551)) + * add resize handle to txt2img and img2img tabs, allowing to change the amount of horizontable space given to generation parameters and resulting image gallery ([#12687](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12687), [#12723](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12723)) + * change default behavior for batching cond/uncond -- now it's on by default, and is disabled by an UI setting (Optimizatios -> Batch cond/uncond) - if you are on lowvram/medvram and are getting OOM exceptions, you will need to enable it + * show current position in queue and make it so that requests are processed in the order of arrival ([#12707](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12707)) + * add `--medvram-sdxl` flag that only enables `--medvram` for SDXL models + * prompt editing timeline has separate range for first pass and hires-fix pass (seed breaking change) ([#12457](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12457)) + +### Minor: + * img2img batch: RAM savings, VRAM savings, .tif, .tiff in img2img batch ([#12120](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12120), [#12514](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12514), [#12515](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12515)) + * postprocessing/extras: RAM savings ([#12479](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12479)) + * XYZ: in the axis labels, remove pathnames from model filenames + * XYZ: support hires sampler ([#12298](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12298)) + * XYZ: new option: use text inputs instead of dropdowns ([#12491](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12491)) + * add gradio version warning + * sort list of VAE checkpoints ([#12297](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12297)) + * use transparent white for mask in inpainting, along with an option to select the color ([#12326](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12326)) + * move some settings to their own section: img2img, VAE + * add checkbox to show/hide dirs for extra networks + * Add TAESD(or more) options for all the VAE encode/decode operation ([#12311](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12311)) + * gradio theme cache, new gradio themes, along with explanation that the user can input his own values ([#12346](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12346), [#12355](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12355)) + * sampler fixes/tweaks: s_tmax, s_churn, s_noise, s_tmax ([#12354](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12354), [#12356](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12356), [#12357](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12357), [#12358](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12358), [#12375](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12375), [#12521](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12521)) + * update README.md with correct instructions for Linux installation ([#12352](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12352)) + * option to not save incomplete images, on by default ([#12338](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12338)) + * enable cond cache by default + * git autofix for repos that are corrupted ([#12230](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12230)) + * allow to open images in new browser tab by middle mouse button ([#12379](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12379)) + * automatically open webui in browser when running "locally" ([#12254](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12254)) + * put commonly used samplers on top, make DPM++ 2M Karras the default choice + * zoom and pan: option to auto-expand a wide image, improved integration ([#12413](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12413), [#12727](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12727)) + * option to cache Lora networks in memory + * rework hires fix UI to use accordion + * face restoration and tiling moved to settings - use "Options in main UI" setting if you want them back + * change quicksettings items to have variable width + * Lora: add Norm module, add support for bias ([#12503](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12503)) + * Lora: output warnings in UI rather than fail for unfitting loras; switch to logging for error output in console + * support search and display of hashes for all extra network items ([#12510](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12510)) + * add extra noise param for img2img operations ([#12564](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12564)) + * support for Lora with bias ([#12584](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12584)) + * make interrupt quicker ([#12634](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12634)) + * configurable gallery height ([#12648](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12648)) + * make results column sticky ([#12645](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12645)) + * more hash filename patterns ([#12639](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12639)) + * make image viewer actually fit the whole page ([#12635](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12635)) + * make progress bar work independently from live preview display which results in it being updated a lot more often + * forbid Full live preview method for medvram and add a setting to undo the forbidding + * make it possible to localize tooltips and placeholders + * add option to align with sgm repo's sampling implementation ([#12818](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12818)) + * Restore faces and Tiling generation parameters have been moved to settings out of main UI + * if you want to put them back into main UI, use `Options in main UI` setting on the UI page. + +### Extensions and API: + * gradio 3.41.2 + * also bump versions for packages: transformers, GitPython, accelerate, scikit-image, timm, tomesd + * support tooltip kwarg for gradio elements: gr.Textbox(label='hello', tooltip='world') + * properly clear the total console progressbar when using txt2img and img2img from API + * add cmd_arg --disable-extra-extensions and --disable-all-extensions ([#12294](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12294)) + * shared.py and webui.py split into many files + * add --loglevel commandline argument for logging + * add a custom UI element that combines accordion and checkbox + * avoid importing gradio in tests because it spams warnings + * put infotext label for setting into OptionInfo definition rather than in a separate list + * make `StableDiffusionProcessingImg2Img.mask_blur` a property, make more inline with PIL `GaussianBlur` ([#12470](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12470)) + * option to make scripts UI without gr.Group + * add a way for scripts to register a callback for before/after just a single component's creation + * use dataclass for StableDiffusionProcessing + * store patches for Lora in a specialized module instead of inside torch + * support http/https URLs in API ([#12663](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12663), [#12698](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12698)) + * add extra noise callback ([#12616](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12616)) + * dump current stack traces when exiting with SIGINT + * add type annotations for extra fields of shared.sd_model + +### Bug Fixes: + * Don't crash if out of local storage quota for javascriot localStorage + * XYZ plot do not fail if an exception occurs + * fix missing TI hash in infotext if generation uses both negative and positive TI ([#12269](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12269)) + * localization fixes ([#12307](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12307)) + * fix sdxl model invalid configuration after the hijack + * correctly toggle extras checkbox for infotext paste ([#12304](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12304)) + * open raw sysinfo link in new page ([#12318](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12318)) + * prompt parser: Account for empty field in alternating words syntax ([#12319](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12319)) + * add tab and carriage return to invalid filename chars ([#12327](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12327)) + * fix api only Lora not working ([#12387](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12387)) + * fix options in main UI misbehaving when there's just one element + * make it possible to use a sampler from infotext even if it's hidden in the dropdown + * fix styles missing from the prompt in infotext when making a grid of batch of multiplie images + * prevent bogus progress output in console when calculating hires fix dimensions + * fix --use-textbox-seed + * fix broken `Lora/Networks: use old method` option ([#12466](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12466)) + * properly return `None` for VAE hash when using `--no-hashing` ([#12463](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12463)) + * MPS/macOS fixes and optimizations ([#12526](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12526)) + * add second_order to samplers that mistakenly didn't have it + * when refreshing cards in extra networks UI, do not discard user's custom resolution + * fix processing error that happens if batch_size is not a multiple of how many prompts/negative prompts there are ([#12509](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12509)) + * fix inpaint upload for alpha masks ([#12588](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12588)) + * fix exception when image sizes are not integers ([#12586](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12586)) + * fix incorrect TAESD Latent scale ([#12596](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12596)) + * auto add data-dir to gradio-allowed-path ([#12603](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12603)) + * fix exception if extensuions dir is missing ([#12607](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12607)) + * fix issues with api model-refresh and vae-refresh ([#12638](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12638)) + * fix img2img background color for transparent images option not being used ([#12633](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12633)) + * attempt to resolve NaN issue with unstable VAEs in fp32 mk2 ([#12630](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12630)) + * implement missing undo hijack for SDXL + * fix xyz swap axes ([#12684](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12684)) + * fix errors in backup/restore tab if any of config files are broken ([#12689](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12689)) + * fix SD VAE switch error after model reuse ([#12685](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12685)) + * fix trying to create images too large for the chosen format ([#12667](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12667)) + * create Gradio temp directory if necessary ([#12717](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12717)) + * prevent possible cache loss if exiting as it's being written by using an atomic operation to replace the cache with the new version + * set devices.dtype_unet correctly + * run RealESRGAN on GPU for non-CUDA devices ([#12737](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12737)) + * prevent extra network buttons being obscured by description for very small card sizes ([#12745](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12745)) + * fix error that causes some extra networks to be disabled if both and are present in the prompt + * fix defaults settings page breaking when any of main UI tabs are hidden + * fix incorrect save/display of new values in Defaults page in settings + * fix for Reload UI function: if you reload UI on one tab, other opened tabs will no longer stop working + * fix an error that prevents VAE being reloaded after an option change if a VAE near the checkpoint exists ([#12797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12737)) + * hide broken image crop tool ([#12792](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12737)) + * don't show hidden samplers in dropdown for XYZ script ([#12780](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12737)) + * fix style editing dialog breaking if it's opened in both img2img and txt2img tabs + * fix a bug allowing users to bypass gradio and API authentication (reported by vysecurity) + * fix notification not playing when built-in webui tab is inactive ([#12834](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12834)) + * honor `--skip-install` for extension installers ([#12832](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12832)) + * don't print blank stdout in extension installers ([#12833](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12832), [#12855](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12855)) + * do not change quicksettings dropdown option when value returned is `None` ([#12854](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12854)) + * get progressbar to display correctly in extensions tab + + +## 1.5.2 + +### Bug Fixes: + * fix memory leak when generation fails + * update doggettx cross attention optimization to not use an unreasonable amount of memory in some edge cases -- suggestion by MorkTheOrk + + +## 1.5.1 + +### Minor: + * support parsing text encoder blocks in some new LoRAs + * delete scale checker script due to user demand + +### Extensions and API: + * add postprocess_batch_list script callback + +### Bug Fixes: + * fix TI training for SD1 + * fix reload altclip model error + * prepend the pythonpath instead of overriding it + * fix typo in SD_WEBUI_RESTARTING + * if txt2img/img2img raises an exception, finally call state.end() + * fix composable diffusion weight parsing + * restyle Startup profile for black users + * fix webui not launching with --nowebui + * catch exception for non git extensions + * fix some options missing from /sdapi/v1/options + * fix for extension update status always saying "unknown" + * fix display of extra network cards that have `<>` in the name + * update lora extension to work with python 3.8 + + +## 1.5.0 + +### Features: + * SD XL support + * user metadata system for custom networks + * extended Lora metadata editor: set activation text, default weight, view tags, training info + * Lora extension rework to include other types of networks (all that were previously handled by LyCORIS extension) + * show github stars for extenstions + * img2img batch mode can read extra stuff from png info + * img2img batch works with subdirectories + * hotkeys to move prompt elements: alt+left/right + * restyle time taken/VRAM display + * add textual inversion hashes to infotext + * optimization: cache git extension repo information + * move generate button next to the generated picture for mobile clients + * hide cards for networks of incompatible Stable Diffusion version in Lora extra networks interface + * skip installing packages with pip if they all are already installed - startup speedup of about 2 seconds + +### Minor: + * checkbox to check/uncheck all extensions in the Installed tab + * add gradio user to infotext and to filename patterns + * allow gif for extra network previews + * add options to change colors in grid + * use natural sort for items in extra networks + * Mac: use empty_cache() from torch 2 to clear VRAM + * added automatic support for installing the right libraries for Navi3 (AMD) + * add option SWIN_torch_compile to accelerate SwinIR upscale + * suppress printing TI embedding info at start to console by default + * speedup extra networks listing + * added `[none]` filename token. + * removed thumbs extra networks view mode (use settings tab to change width/height/scale to get thumbs) + * add always_discard_next_to_last_sigma option to XYZ plot + * automatically switch to 32-bit float VAE if the generated picture has NaNs without the need for `--no-half-vae` commandline flag. + +### Extensions and API: + * api endpoints: /sdapi/v1/server-kill, /sdapi/v1/server-restart, /sdapi/v1/server-stop + * allow Script to have custom metaclass + * add model exists status check /sdapi/v1/options + * rename --add-stop-route to --api-server-stop + * add `before_hr` script callback + * add callback `after_extra_networks_activate` + * disable rich exception output in console for API by default, use WEBUI_RICH_EXCEPTIONS env var to enable + * return http 404 when thumb file not found + * allow replacing extensions index with environment variable + +### Bug Fixes: + * fix for catch errors when retrieving extension index #11290 + * fix very slow loading speed of .safetensors files when reading from network drives + * API cache cleanup + * fix UnicodeEncodeError when writing to file CLIP Interrogator batch mode + * fix warning of 'has_mps' deprecated from PyTorch + * fix problem with extra network saving images as previews losing generation info + * fix throwing exception when trying to resize image with I;16 mode + * fix for #11534: canvas zoom and pan extension hijacking shortcut keys + * fixed launch script to be runnable from any directory + * don't add "Seed Resize: -1x-1" to API image metadata + * correctly remove end parenthesis with ctrl+up/down + * fixing --subpath on newer gradio version + * fix: check fill size none zero when resize (fixes #11425) + * use submit and blur for quick settings textbox + * save img2img batch with images.save_image() + * prevent running preload.py for disabled extensions + * fix: previously, model name was added together with directory name to infotext and to [model_name] filename pattern; directory name is now not included + + +## 1.4.1 + +### Bug Fixes: + * add queue lock for refresh-checkpoints + +## 1.4.0 + +### Features: + * zoom controls for inpainting + * run basic torch calculation at startup in parallel to reduce the performance impact of first generation + * option to pad prompt/neg prompt to be same length + * remove taming_transformers dependency + * custom k-diffusion scheduler settings + * add an option to show selected settings in main txt2img/img2img UI + * sysinfo tab in settings + * infer styles from prompts when pasting params into the UI + * an option to control the behavior of the above + +### Minor: + * bump Gradio to 3.32.0 + * bump xformers to 0.0.20 + * Add option to disable token counters + * tooltip fixes & optimizations + * make it possible to configure filename for the zip download + * `[vae_filename]` pattern for filenames + * Revert discarding penultimate sigma for DPM-Solver++(2M) SDE + * change UI reorder setting to multiselect + * read version info form CHANGELOG.md if git version info is not available + * link footer API to Wiki when API is not active + * persistent conds cache (opt-in optimization) + +### Extensions: + * After installing extensions, webui properly restarts the process rather than reloads the UI + * Added VAE listing to web API. Via: /sdapi/v1/sd-vae + * custom unet support + * Add onAfterUiUpdate callback + * refactor EmbeddingDatabase.register_embedding() to allow unregistering + * add before_process callback for scripts + * add ability for alwayson scripts to specify section and let user reorder those sections + +### Bug Fixes: + * Fix dragging text to prompt + * fix incorrect quoting for infotext values with colon in them + * fix "hires. fix" prompt sharing same labels with txt2img_prompt + * Fix s_min_uncond default type int + * Fix for #10643 (Inpainting mask sometimes not working) + * fix bad styling for thumbs view in extra networks #10639 + * fix for empty list of optimizations #10605 + * small fixes to prepare_tcmalloc for Debian/Ubuntu compatibility + * fix --ui-debug-mode exit + * patch GitPython to not use leaky persistent processes + * fix duplicate Cross attention optimization after UI reload + * torch.cuda.is_available() check for SdOptimizationXformers + * fix hires fix using wrong conds in second pass if using Loras. + * handle exception when parsing generation parameters from png info + * fix upcast attention dtype error + * forcing Torch Version to 1.13.1 for RX 5000 series GPUs + * split mask blur into X and Y components, patch Outpainting MK2 accordingly + * don't die when a LoRA is a broken symlink + * allow activation of Generate Forever during generation + + +## 1.3.2 + +### Bug Fixes: + * fix files served out of tmp directory even if they are saved to disk + * fix postprocessing overwriting parameters + +## 1.3.1 + +### Features: + * revert default cross attention optimization to Doggettx + +### Bug Fixes: + * fix bug: LoRA don't apply on dropdown list sd_lora + * fix png info always added even if setting is not enabled + * fix some fields not applying in xyz plot + * fix "hires. fix" prompt sharing same labels with txt2img_prompt + * fix lora hashes not being added properly to infotex if there is only one lora + * fix --use-cpu failing to work properly at startup + * make --disable-opt-split-attention command line option work again + +## 1.3.0 + +### Features: + * add UI to edit defaults + * token merging (via dbolya/tomesd) + * settings tab rework: add a lot of additional explanations and links + * load extensions' Git metadata in parallel to loading the main program to save a ton of time during startup + * update extensions table: show branch, show date in separate column, and show version from tags if available + * TAESD - another option for cheap live previews + * allow choosing sampler and prompts for second pass of hires fix - hidden by default, enabled in settings + * calculate hashes for Lora + * add lora hashes to infotext + * when pasting infotext, use infotext's lora hashes to find local loras for `` entries whose hashes match loras the user has + * select cross attention optimization from UI + +### Minor: + * bump Gradio to 3.31.0 + * bump PyTorch to 2.0.1 for macOS and Linux AMD + * allow setting defaults for elements in extensions' tabs + * allow selecting file type for live previews + * show "Loading..." for extra networks when displaying for the first time + * suppress ENSD infotext for samplers that don't use it + * clientside optimizations + * add options to show/hide hidden files and dirs in extra networks, and to not list models/files in hidden directories + * allow whitespace in styles.csv + * add option to reorder tabs + * move some functionality (swap resolution and set seed to -1) to client + * option to specify editor height for img2img + * button to copy image resolution into img2img width/height sliders + * switch from pyngrok to ngrok-py + * lazy-load images in extra networks UI + * set "Navigate image viewer with gamepad" option to false by default, by request + * change upscalers to download models into user-specified directory (from commandline args) rather than the default models/<...> + * allow hiding buttons in ui-config.json + +### Extensions: + * add /sdapi/v1/script-info api + * use Ruff to lint Python code + * use ESlint to lint Javascript code + * add/modify CFG callbacks for Self-Attention Guidance extension + * add command and endpoint for graceful server stopping + * add some locals (prompts/seeds/etc) from processing function into the Processing class as fields + * rework quoting for infotext items that have commas in them to use JSON (should be backwards compatible except for cases where it didn't work previously) + * add /sdapi/v1/refresh-loras api checkpoint post request + * tests overhaul + +### Bug Fixes: + * fix an issue preventing the program from starting if the user specifies a bad Gradio theme + * fix broken prompts from file script + * fix symlink scanning for extra networks + * fix --data-dir ignored when launching via webui-user.bat COMMANDLINE_ARGS + * allow web UI to be ran fully offline + * fix inability to run with --freeze-settings + * fix inability to merge checkpoint without adding metadata + * fix extra networks' save preview image not adding infotext for jpeg/webm + * remove blinking effect from text in hires fix and scale resolution preview + * make links to `http://<...>.git` extensions work in the extension tab + * fix bug with webui hanging at startup due to hanging git process + + +## 1.2.1 + +### Features: + * add an option to always refer to LoRA by filenames + +### Bug Fixes: + * never refer to LoRA by an alias if multiple LoRAs have same alias or the alias is called none + * fix upscalers disappearing after the user reloads UI + * allow bf16 in safe unpickler (resolves problems with loading some LoRAs) + * allow web UI to be ran fully offline + * fix localizations not working + * fix error for LoRAs: `'LatentDiffusion' object has no attribute 'lora_layer_mapping'` + +## 1.2.0 + +### Features: + * do not wait for Stable Diffusion model to load at startup + * add filename patterns: `[denoising]` + * directory hiding for extra networks: dirs starting with `.` will hide their cards on extra network tabs unless specifically searched for + * LoRA: for the `<...>` text in prompt, use name of LoRA that is in the metdata of the file, if present, instead of filename (both can be used to activate LoRA) + * LoRA: read infotext params from kohya-ss's extension parameters if they are present and if his extension is not active + * LoRA: fix some LoRAs not working (ones that have 3x3 convolution layer) + * LoRA: add an option to use old method of applying LoRAs (producing same results as with kohya-ss) + * add version to infotext, footer and console output when starting + * add links to wiki for filename pattern settings + * add extended info for quicksettings setting and use multiselect input instead of a text field + +### Minor: + * bump Gradio to 3.29.0 + * bump PyTorch to 2.0.1 + * `--subpath` option for gradio for use with reverse proxy + * Linux/macOS: use existing virtualenv if already active (the VIRTUAL_ENV environment variable) + * do not apply localizations if there are none (possible frontend optimization) + * add extra `None` option for VAE in XYZ plot + * print error to console when batch processing in img2img fails + * create HTML for extra network pages only on demand + * allow directories starting with `.` to still list their models for LoRA, checkpoints, etc + * put infotext options into their own category in settings tab + * do not show licenses page when user selects Show all pages in settings + +### Extensions: + * tooltip localization support + * add API method to get LoRA models with prompt + +### Bug Fixes: + * re-add `/docs` endpoint + * fix gamepad navigation + * make the lightbox fullscreen image function properly + * fix squished thumbnails in extras tab + * keep "search" filter for extra networks when user refreshes the tab (previously it showed everthing after you refreshed) + * fix webui showing the same image if you configure the generation to always save results into same file + * fix bug with upscalers not working properly + * fix MPS on PyTorch 2.0.1, Intel Macs + * make it so that custom context menu from contextMenu.js only disappears after user's click, ignoring non-user click events + * prevent Reload UI button/link from reloading the page when it's not yet ready + * fix prompts from file script failing to read contents from a drag/drop file + + +## 1.1.1 +### Bug Fixes: + * fix an error that prevents running webui on PyTorch<2.0 without --disable-safe-unpickle + +## 1.1.0 +### Features: + * switch to PyTorch 2.0.0 (except for AMD GPUs) + * visual improvements to custom code scripts + * add filename patterns: `[clip_skip]`, `[hasprompt<>]`, `[batch_number]`, `[generation_number]` + * add support for saving init images in img2img, and record their hashes in infotext for reproducability + * automatically select current word when adjusting weight with ctrl+up/down + * add dropdowns for X/Y/Z plot + * add setting: Stable Diffusion/Random number generator source: makes it possible to make images generated from a given manual seed consistent across different GPUs + * support Gradio's theme API + * use TCMalloc on Linux by default; possible fix for memory leaks + * add optimization option to remove negative conditioning at low sigma values #9177 + * embed model merge metadata in .safetensors file + * extension settings backup/restore feature #9169 + * add "resize by" and "resize to" tabs to img2img + * add option "keep original size" to textual inversion images preprocess + * image viewer scrolling via analog stick + * button to restore the progress from session lost / tab reload + +### Minor: + * bump Gradio to 3.28.1 + * change "scale to" to sliders in Extras tab + * add labels to tool buttons to make it possible to hide them + * add tiled inference support for ScuNET + * add branch support for extension installation + * change Linux installation script to install into current directory rather than `/home/username` + * sort textual inversion embeddings by name (case-insensitive) + * allow styles.csv to be symlinked or mounted in docker + * remove the "do not add watermark to images" option + * make selected tab configurable with UI config + * make the extra networks UI fixed height and scrollable + * add `disable_tls_verify` arg for use with self-signed certs + +### Extensions: + * add reload callback + * add `is_hr_pass` field for processing + +### Bug Fixes: + * fix broken batch image processing on 'Extras/Batch Process' tab + * add "None" option to extra networks dropdowns + * fix FileExistsError for CLIP Interrogator + * fix /sdapi/v1/txt2img endpoint not working on Linux #9319 + * fix disappearing live previews and progressbar during slow tasks + * fix fullscreen image view not working properly in some cases + * prevent alwayson_scripts args param resizing script_arg list when they are inserted in it + * fix prompt schedule for second order samplers + * fix image mask/composite for weird resolutions #9628 + * use correct images for previews when using AND (see #9491) + * one broken image in img2img batch won't stop all processing + * fix image orientation bug in train/preprocess + * fix Ngrok recreating tunnels every reload + * fix `--realesrgan-models-path` and `--ldsr-models-path` not working + * fix `--skip-install` not working + * use SAMPLE file format in Outpainting Mk2 & Poorman + * do not fail all LoRAs if some have failed to load when making a picture + +## 1.0.0 + * everything diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000000000000000000000000000000000..2c781aff450c8604eb3cf876d2c3585a96a5a590 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,7 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - given-names: AUTOMATIC1111 +title: "Stable Diffusion Web UI" +date-released: 2022-08-22 +url: "https://github.com/AUTOMATIC1111/stable-diffusion-webui" diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000000000000000000000000000000000000..585eb87aa06cbf03b2fe940f25ad5b94853ec39d --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @lllyasviel diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..211d32e752cb61bd056436e8f7a806f12a626bb7 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,663 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (c) 2023 AUTOMATIC1111 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md index fad68f6a83a342393f279de0c849e8c8676338cd..38841305fcc72b0cc204846e602cd8ecf12e4e05 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,710 @@ --- -title: Stable Diffusion Webui Forge -emoji: 🌍 -colorFrom: gray -colorTo: red +title: stable-diffusion-webui-forge +app_file: webui.py sdk: gradio -sdk_version: 4.24.0 -app_file: app.py -pinned: false +sdk_version: 3.41.2 --- +# Stable Diffusion WebUI Forge -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Stable Diffusion WebUI Forge is a platform on top of [Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) (based on [Gradio](https://www.gradio.app/)) to make development easier, optimize resource management, and speed up inference. + +The name "Forge" is inspired from "Minecraft Forge". This project is aimed at becoming SD WebUI's Forge. + +Compared to original WebUI (for SDXL inference at 1024px), you can expect the below speed-ups: + +1. If you use common GPU like 8GB vram, you can expect to get about **30~45% speed up** in inference speed (it/s), the GPU memory peak (in task manager) will drop about 700MB to 1.3GB, the maximum diffusion resolution (that will not OOM) will increase about 2x to 3x, and the maximum diffusion batch size (that will not OOM) will increase about 4x to 6x. + +2. If you use less powerful GPU like 6GB vram, you can expect to get about **60~75% speed up** in inference speed (it/s), the GPU memory peak (in task manager) will drop about 800MB to 1.5GB, the maximum diffusion resolution (that will not OOM) will increase about 3x, and the maximum diffusion batch size (that will not OOM) will increase about 4x. + +3. If you use powerful GPU like 4090 with 24GB vram, you can expect to get about **3~6% speed up** in inference speed (it/s), the GPU memory peak (in task manager) will drop about 1GB to 1.4GB, the maximum diffusion resolution (that will not OOM) will increase about 1.6x, and the maximum diffusion batch size (that will not OOM) will increase about 2x. + +4. If you use ControlNet for SDXL, the maximum ControlNet count (that will not OOM) will increase about 2x, the speed with SDXL+ControlNet will **speed up about 30~45%**. + +Another very important change that Forge brings is **Unet Patcher**. Using Unet Patcher, methods like Self-Attention Guidance, Kohya High Res Fix, FreeU, StyleAlign, Hypertile can all be implemented in about 100 lines of codes. + +Thanks to Unet Patcher, many new things are possible now and supported in Forge, including SVD, Z123, masked Ip-adapter, masked controlnet, photomaker, etc. + +**No need to monkeypatch UNet and conflict other extensions anymore!** + +Forge also adds a few samplers, including but not limited to DDPM, DDPM Karras, DPM++ 2M Turbo, DPM++ 2M SDE Turbo, LCM Karras, Euler A Turbo, etc. (LCM is already in original webui since 1.7.0). + +Finally, Forge promise that we will only do our jobs. Forge will never add unnecessary opinioned changes to the user interface. You are still using 100% Automatic1111 WebUI. + +# Installing Forge + +If you are proficient in Git and you want to install Forge as another branch of SD-WebUI, please see [here](https://github.com/continue-revolution/sd-webui-animatediff/blob/forge/master/docs/how-to-use.md#you-have-a1111-and-you-know-git). In this way, you can reuse all SD checkpoints and all extensions you installed previously in your OG SD-WebUI, but you should know what you are doing. + +If you know what you are doing, you can install Forge using same method as SD-WebUI. (Install Git, Python, Git Clone the forge repo `https://github.com/lllyasviel/stable-diffusion-webui-forge.git` and then run webui-user.bat). + +**Or you can just use this one-click installation package (with git and python included).** + +[>>> Click Here to Download One-Click Package<<<](https://github.com/lllyasviel/stable-diffusion-webui-forge/releases/download/latest/webui_forge_cu121_torch21.7z) + +After you download, you uncompress, use `update.bat` to update, and use `run.bat` to run. + +Note that running `update.bat` is important, otherwise you may be using a previous version with potential bugs unfixed. + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/c49bd60d-82bd-4086-9859-88d472582b94) + +# Screenshots of Comparison + +I tested with several devices, and this is a typical result from 8GB VRAM (3070ti laptop) with SDXL. + +**This is original WebUI:** + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/16893937-9ed9-4f8e-b960-70cd5d1e288f) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/7bbc16fe-64ef-49e2-a595-d91bb658bd94) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/de1747fd-47bc-482d-a5c6-0728dd475943) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/96e5e171-2d74-41ba-9dcc-11bf68be7e16) + +(average about 7.4GB/8GB, peak at about 7.9GB/8GB) + +**This is WebUI Forge:** + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/ca5e05ed-bd86-4ced-8662-f41034648e8c) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/3629ee36-4a99-4d9b-b371-12efb260a283) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/6d13ebb7-c30d-4aa8-9242-c0b5a1af8c95) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/c4f723c3-6ea7-4539-980b-0708ed2a69aa) + +(average and peak are all 6.3GB/8GB) + +You can see that Forge does not change WebUI results. Installing Forge is not a seed breaking change. + +Forge can perfectly keep WebUI unchanged even for most complicated prompts like `fantasy landscape with a [mountain:lake:0.25] and [an oak:a christmas tree:0.75][ in foreground::0.6][ in background:0.25] [shoddy:masterful:0.5]`. + +All your previous works still work in Forge! + +# Forge Backend + +Forge backend removes all WebUI's codes related to resource management and reworked everything. All previous CMD flags like `medvram, lowvram, medvram-sdxl, precision full, no half, no half vae, attention_xxx, upcast unet`, ... are all **REMOVED**. Adding these flags will not cause error but they will not do anything now. **We highly encourage Forge users to remove all cmd flags and let Forge to decide how to load models.** + +Without any cmd flag, Forge can run SDXL with 4GB vram and SD1.5 with 2GB vram. + +**Some flags that you may still pay attention to:** + +1. `--always-offload-from-vram` (This flag will make things **slower** but less risky). This option will let Forge always unload models from VRAM. This can be useful if you use multiple software together and want Forge to use less VRAM and give some VRAM to other software, or when you are using some old extensions that will compete vram with Forge, or (very rarely) when you get OOM. + +2. `--cuda-malloc` (This flag will make things **faster** but more risky). This will ask pytorch to use *cudaMallocAsync* for tensor malloc. On some profilers I can observe performance gain at millisecond level, but the real speed up on most my devices are often unnoticed (about or less than 0.1 second per image). This cannot be set as default because many users reported issues that the async malloc will crash the program. Users need to enable this cmd flag at their own risk. + +3. `--cuda-stream` (This flag will make things **faster** but more risky). This will use pytorch CUDA streams (a special type of thread on GPU) to move models and compute tensors simultaneously. This can almost eliminate all model moving time, and speed up SDXL on 30XX/40XX devices with small VRAM (eg, RTX 4050 6GB, RTX 3060 Laptop 6GB, etc) by about 15\% to 25\%. However, this unfortunately cannot be set as default because I observe higher possibility of pure black images (Nan outputs) on 2060, and higher chance of OOM on 1080 and 2060. When the resolution is large, there is a chance that the computation time of one single attention layer is longer than the time for moving entire model to GPU. When that happens, the next attention layer will OOM since the GPU is filled with the entire model, and no remaining space is available for computing another attention layer. Most overhead detecting methods are not robust enough to be reliable on old devices (in my tests). Users need to enable this cmd flag at their own risk. + +4. `--pin-shared-memory` (This flag will make things **faster** but more risky). Effective only when used together with `--cuda-stream`. This will offload modules to Shared GPU Memory instead of system RAM when offloading models. On some 30XX/40XX devices with small VRAM (eg, RTX 4050 6GB, RTX 3060 Laptop 6GB, etc), I can observe significant (at least 20\%) speed-up for SDXL. However, this unfortunately cannot be set as default because the OOM of Shared GPU Memory is a much more severe problem than common GPU memory OOM. Pytorch does not provide any robust method to unload or detect Shared GPU Memory. Once the Shared GPU Memory OOM, the entire program will crash (observed with SDXL on GTX 1060/1050/1066), and there is no dynamic method to prevent or recover from the crash. Users need to enable this cmd flag at their own risk. + +If you really want to play with cmd flags, you can additionally control the GPU with: + +(extreme VRAM cases) + + --always-gpu + --always-cpu + +(rare attention cases) + + --attention-split + --attention-quad + --attention-pytorch + --disable-xformers + --disable-attention-upcast + +(float point type) + + --all-in-fp32 + --all-in-fp16 + --unet-in-bf16 + --unet-in-fp16 + --unet-in-fp8-e4m3fn + --unet-in-fp8-e5m2 + --vae-in-fp16 + --vae-in-fp32 + --vae-in-bf16 + --clip-in-fp8-e4m3fn + --clip-in-fp8-e5m2 + --clip-in-fp16 + --clip-in-fp32 + +(rare platforms) + + --directml + --disable-ipex-hijack + --pytorch-deterministic + +Again, Forge do not recommend users to use any cmd flags unless you are very sure that you really need these. + +# UNet Patcher + +Note that [Forge does not use any other software as backend](https://github.com/lllyasviel/stable-diffusion-webui-forge/discussions/169). The full name of the backend is `Stable Diffusion WebUI with Forge backend`, or for simplicity, the `Forge backend`. The API and python symbols are made similar to previous software only for reducing the learning cost of developers. + +Now developing an extension is super simple. We finally have a patchable UNet. + +Below is using one single file with 80 lines of codes to support FreeU: + +`extensions-builtin/sd_forge_freeu/scripts/forge_freeu.py` + +```python +import torch +import gradio as gr +from modules import scripts + + +def Fourier_filter(x, threshold, scale): + x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) + x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) + B, C, H, W = x_freq.shape + mask = torch.ones((B, C, H, W), device=x.device) + crow, ccol = H // 2, W //2 + mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale + x_freq = x_freq * mask + x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) + x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real + return x_filtered.to(x.dtype) + + +def set_freeu_v2_patch(model, b1, b2, s1, s2): + model_channels = model.model.model_config.unet_config["model_channels"] + scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} + + def output_block_patch(h, hsp, *args, **kwargs): + scale = scale_dict.get(h.shape[1], None) + if scale is not None: + hidden_mean = h.mean(1).unsqueeze(1) + B = hidden_mean.shape[0] + hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) + hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) + hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / \ + (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) + h[:, :h.shape[1] // 2] = h[:, :h.shape[1] // 2] * ((scale[0] - 1) * hidden_mean + 1) + hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) + return h, hsp + + m = model.clone() + m.set_model_output_block_patch(output_block_patch) + return m + + +class FreeUForForge(scripts.Script): + def title(self): + return "FreeU Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + freeu_enabled = gr.Checkbox(label='Enabled', value=False) + freeu_b1 = gr.Slider(label='B1', minimum=0, maximum=2, step=0.01, value=1.01) + freeu_b2 = gr.Slider(label='B2', minimum=0, maximum=2, step=0.01, value=1.02) + freeu_s1 = gr.Slider(label='S1', minimum=0, maximum=4, step=0.01, value=0.99) + freeu_s2 = gr.Slider(label='S2', minimum=0, maximum=4, step=0.01, value=0.95) + + return freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = script_args + + if not freeu_enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = set_freeu_v2_patch(unet, freeu_b1, freeu_b2, freeu_s1, freeu_s2) + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + freeu_enabled=freeu_enabled, + freeu_b1=freeu_b1, + freeu_b2=freeu_b2, + freeu_s1=freeu_s1, + freeu_s2=freeu_s2, + )) + + return +``` + +It looks like this: + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/277bac6e-5ea7-4bff-b71a-e55a60cfc03c) + +Similar components like HyperTile, KohyaHighResFix, SAG, can all be implemented within 100 lines of codes (see also the codes). + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/06472b03-b833-4816-ab47-70712ac024d3) + +ControlNets can finally be called by different extensions. + +Implementing Stable Video Diffusion and Zero123 are also super simple now (see also the codes). + +*Stable Video Diffusion:* + +`extensions-builtin/sd_forge_svd/scripts/forge_svd.py` + +```python +import torch +import gradio as gr +import os +import pathlib + +from modules import script_callbacks +from modules.paths import models_path +from modules.ui_common import ToolButton, refresh_symbol +from modules import shared + +from modules_forge.forge_util import numpy_to_pytorch, pytorch_to_numpy +from ldm_patched.modules.sd import load_checkpoint_guess_config +from ldm_patched.contrib.external_video_model import VideoLinearCFGGuidance, SVD_img2vid_Conditioning +from ldm_patched.contrib.external import KSampler, VAEDecode + + +opVideoLinearCFGGuidance = VideoLinearCFGGuidance() +opSVD_img2vid_Conditioning = SVD_img2vid_Conditioning() +opKSampler = KSampler() +opVAEDecode = VAEDecode() + +svd_root = os.path.join(models_path, 'svd') +os.makedirs(svd_root, exist_ok=True) +svd_filenames = [] + + +def update_svd_filenames(): + global svd_filenames + svd_filenames = [ + pathlib.Path(x).name for x in + shared.walk_files(svd_root, allowed_extensions=[".pt", ".ckpt", ".safetensors"]) + ] + return svd_filenames + + +@torch.inference_mode() +@torch.no_grad() +def predict(filename, width, height, video_frames, motion_bucket_id, fps, augmentation_level, + sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, + sampling_denoise, guidance_min_cfg, input_image): + filename = os.path.join(svd_root, filename) + model_raw, _, vae, clip_vision = \ + load_checkpoint_guess_config(filename, output_vae=True, output_clip=False, output_clipvision=True) + model = opVideoLinearCFGGuidance.patch(model_raw, guidance_min_cfg)[0] + init_image = numpy_to_pytorch(input_image) + positive, negative, latent_image = opSVD_img2vid_Conditioning.encode( + clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level) + output_latent = opKSampler.sample(model, sampling_seed, sampling_steps, sampling_cfg, + sampling_sampler_name, sampling_scheduler, positive, + negative, latent_image, sampling_denoise)[0] + output_pixels = opVAEDecode.decode(vae, output_latent)[0] + outputs = pytorch_to_numpy(output_pixels) + return outputs + + +def on_ui_tabs(): + with gr.Blocks() as svd_block: + with gr.Row(): + with gr.Column(): + input_image = gr.Image(label='Input Image', source='upload', type='numpy', height=400) + + with gr.Row(): + filename = gr.Dropdown(label="SVD Checkpoint Filename", + choices=svd_filenames, + value=svd_filenames[0] if len(svd_filenames) > 0 else None) + refresh_button = ToolButton(value=refresh_symbol, tooltip="Refresh") + refresh_button.click( + fn=lambda: gr.update(choices=update_svd_filenames), + inputs=[], outputs=filename) + + width = gr.Slider(label='Width', minimum=16, maximum=8192, step=8, value=1024) + height = gr.Slider(label='Height', minimum=16, maximum=8192, step=8, value=576) + video_frames = gr.Slider(label='Video Frames', minimum=1, maximum=4096, step=1, value=14) + motion_bucket_id = gr.Slider(label='Motion Bucket Id', minimum=1, maximum=1023, step=1, value=127) + fps = gr.Slider(label='Fps', minimum=1, maximum=1024, step=1, value=6) + augmentation_level = gr.Slider(label='Augmentation Level', minimum=0.0, maximum=10.0, step=0.01, + value=0.0) + sampling_steps = gr.Slider(label='Sampling Steps', minimum=1, maximum=200, step=1, value=20) + sampling_cfg = gr.Slider(label='CFG Scale', minimum=0.0, maximum=50.0, step=0.1, value=2.5) + sampling_denoise = gr.Slider(label='Sampling Denoise', minimum=0.0, maximum=1.0, step=0.01, value=1.0) + guidance_min_cfg = gr.Slider(label='Guidance Min Cfg', minimum=0.0, maximum=100.0, step=0.5, value=1.0) + sampling_sampler_name = gr.Radio(label='Sampler Name', + choices=['euler', 'euler_ancestral', 'heun', 'heunpp2', 'dpm_2', + 'dpm_2_ancestral', 'lms', 'dpm_fast', 'dpm_adaptive', + 'dpmpp_2s_ancestral', 'dpmpp_sde', 'dpmpp_sde_gpu', + 'dpmpp_2m', 'dpmpp_2m_sde', 'dpmpp_2m_sde_gpu', + 'dpmpp_3m_sde', 'dpmpp_3m_sde_gpu', 'ddpm', 'lcm', 'ddim', + 'uni_pc', 'uni_pc_bh2'], value='euler') + sampling_scheduler = gr.Radio(label='Scheduler', + choices=['normal', 'karras', 'exponential', 'sgm_uniform', 'simple', + 'ddim_uniform'], value='karras') + sampling_seed = gr.Number(label='Seed', value=12345, precision=0) + + generate_button = gr.Button(value="Generate") + + ctrls = [filename, width, height, video_frames, motion_bucket_id, fps, augmentation_level, + sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, + sampling_denoise, guidance_min_cfg, input_image] + + with gr.Column(): + output_gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', + visible=True, height=1024, columns=4) + + generate_button.click(predict, inputs=ctrls, outputs=[output_gallery]) + return [(svd_block, "SVD", "svd")] + + +update_svd_filenames() +script_callbacks.on_ui_tabs(on_ui_tabs) +``` + +Note that although the above codes look like independent codes, they actually will automatically offload/unload any other models. For example, below is me opening webui, load SDXL, generated an image, then go to SVD, then generated image frames. You can see that the GPU memory is perfectly managed and the SDXL is moved to RAM then SVD is moved to GPU. + +Note that this management is fully automatic. This makes writing extensions super simple. + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/de1a2d05-344a-44d7-bab8-9ecc0a58a8d3) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/14bcefcf-599f-42c3-bce9-3fd5e428dd91) + +Similarly, Zero123: + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/7685019c-7239-47fb-9cb5-2b7b33943285) + +### Write a simple ControlNet: + +Below is a simple extension to have a completely independent pass of ControlNet that never conflicts any other extensions: + +`extensions-builtin/sd_forge_controlnet_example/scripts/sd_forge_controlnet_example.py` + +Note that this extension is hidden because it is only for developers. To see it in UI, use `--show-controlnet-example`. + +The memory optimization in this example is fully automatic. You do not need to care about memory and inference speed, but you may want to cache objects if you wish. + +```python +# Use --show-controlnet-example to see this extension. + +import cv2 +import gradio as gr +import torch + +from modules import scripts +from modules.shared_cmd_options import cmd_opts +from modules_forge.shared import supported_preprocessors +from modules.modelloader import load_file_from_url +from ldm_patched.modules.controlnet import load_controlnet +from modules_forge.controlnet import apply_controlnet_advanced +from modules_forge.forge_util import numpy_to_pytorch +from modules_forge.shared import controlnet_dir + + +class ControlNetExampleForge(scripts.Script): + model = None + + def title(self): + return "ControlNet Example for Developers" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + gr.HTML('This is an example controlnet extension for developers.') + gr.HTML('You see this extension because you used --show-controlnet-example') + input_image = gr.Image(source='upload', type='numpy') + funny_slider = gr.Slider(label='This slider does nothing. It just shows you how to transfer parameters.', + minimum=0.0, maximum=1.0, value=0.5) + + return input_image, funny_slider + + def process(self, p, *script_args, **kwargs): + input_image, funny_slider = script_args + + # This slider does nothing. It just shows you how to transfer parameters. + del funny_slider + + if input_image is None: + return + + # controlnet_canny_path = load_file_from_url( + # url='https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/sai_xl_canny_256lora.safetensors', + # model_dir=model_dir, + # file_name='sai_xl_canny_256lora.safetensors' + # ) + controlnet_canny_path = load_file_from_url( + url='https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/control_v11p_sd15_canny_fp16.safetensors', + model_dir=controlnet_dir, + file_name='control_v11p_sd15_canny_fp16.safetensors' + ) + print('The model [control_v11p_sd15_canny_fp16.safetensors] download finished.') + + self.model = load_controlnet(controlnet_canny_path) + print('Controlnet loaded.') + + return + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + input_image, funny_slider = script_args + + if input_image is None or self.model is None: + return + + B, C, H, W = kwargs['noise'].shape # latent_shape + height = H * 8 + width = W * 8 + batch_size = p.batch_size + + preprocessor = supported_preprocessors['canny'] + + # detect control at certain resolution + control_image = preprocessor( + input_image, resolution=512, slider_1=100, slider_2=200, slider_3=None) + + # here we just use nearest neighbour to align input shape. + # You may want crop and resize, or crop and fill, or others. + control_image = cv2.resize( + control_image, (width, height), interpolation=cv2.INTER_NEAREST) + + # Output preprocessor result. Now called every sampling. Cache in your own way. + p.extra_result_images.append(control_image) + + print('Preprocessor Canny finished.') + + control_image_bchw = numpy_to_pytorch(control_image).movedim(-1, 1) + + unet = p.sd_model.forge_objects.unet + + # Unet has input, middle, output blocks, and we can give different weights + # to each layers in all blocks. + # Below is an example for stronger control in middle block. + # This is helpful for some high-res fix passes. (p.is_hr_pass) + positive_advanced_weighting = { + 'input': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2], + 'middle': [1.0], + 'output': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2] + } + negative_advanced_weighting = { + 'input': [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 1.15, 1.25], + 'middle': [1.05], + 'output': [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 1.15, 1.25] + } + + # The advanced_frame_weighting is a weight applied to each image in a batch. + # The length of this list must be same with batch size + # For example, if batch size is 5, the below list is [0.2, 0.4, 0.6, 0.8, 1.0] + # If you view the 5 images as 5 frames in a video, this will lead to + # progressively stronger control over time. + advanced_frame_weighting = [float(i + 1) / float(batch_size) for i in range(batch_size)] + + # The advanced_sigma_weighting allows you to dynamically compute control + # weights given diffusion timestep (sigma). + # For example below code can softly make beginning steps stronger than ending steps. + sigma_max = unet.model.model_sampling.sigma_max + sigma_min = unet.model.model_sampling.sigma_min + advanced_sigma_weighting = lambda s: (s - sigma_min) / (sigma_max - sigma_min) + + # You can even input a tensor to mask all control injections + # The mask will be automatically resized during inference in UNet. + # The size should be B 1 H W and the H and W are not important + # because they will be resized automatically + advanced_mask_weighting = torch.ones(size=(1, 1, 512, 512)) + + # But in this simple example we do not use them + positive_advanced_weighting = None + negative_advanced_weighting = None + advanced_frame_weighting = None + advanced_sigma_weighting = None + advanced_mask_weighting = None + + unet = apply_controlnet_advanced(unet=unet, controlnet=self.model, image_bchw=control_image_bchw, + strength=0.6, start_percent=0.0, end_percent=0.8, + positive_advanced_weighting=positive_advanced_weighting, + negative_advanced_weighting=negative_advanced_weighting, + advanced_frame_weighting=advanced_frame_weighting, + advanced_sigma_weighting=advanced_sigma_weighting, + advanced_mask_weighting=advanced_mask_weighting) + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + controlnet_info='You should see these texts below output images!', + )) + + return + + +# Use --show-controlnet-example to see this extension. +if not cmd_opts.show_controlnet_example: + del ControlNetExampleForge + +``` + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/822fa2fc-c9f4-4f58-8669-4b6680b91063) + + +### Add a preprocessor + +Below is the full codes to add a normalbae preprocessor with perfect memory managements. + +You can use arbitrary independent extensions to add a preprocessor. + +Your preprocessor will be read by all other extensions using `modules_forge.shared.preprocessors` + +Below codes are in `extensions-builtin\forge_preprocessor_normalbae\scripts\preprocessor_normalbae.py` + +```python +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import preprocessor_dir, add_supported_preprocessor +from modules_forge.forge_util import resize_image_with_pad +from modules.modelloader import load_file_from_url + +import types +import torch +import numpy as np + +from einops import rearrange +from annotator.normalbae.models.NNET import NNET +from annotator.normalbae import load_checkpoint +from torchvision import transforms + + +class PreprocessorNormalBae(Preprocessor): + def __init__(self): + super().__init__() + self.name = 'normalbae' + self.tags = ['NormalMap'] + self.model_filename_filters = ['normal'] + self.slider_resolution = PreprocessorParameter( + label='Resolution', minimum=128, maximum=2048, value=512, step=8, visible=True) + self.slider_1 = PreprocessorParameter(visible=False) + self.slider_2 = PreprocessorParameter(visible=False) + self.slider_3 = PreprocessorParameter(visible=False) + self.show_control_mode = True + self.do_not_need_model = False + self.sorting_priority = 100 # higher goes to top in the list + + def load_model(self): + if self.model_patcher is not None: + return + + model_path = load_file_from_url( + "https://huggingface.co/lllyasviel/Annotators/resolve/main/scannet.pt", + model_dir=preprocessor_dir) + + args = types.SimpleNamespace() + args.mode = 'client' + args.architecture = 'BN' + args.pretrained = 'scannet' + args.sampling_ratio = 0.4 + args.importance_ratio = 0.7 + model = NNET(args) + model = load_checkpoint(model_path, model) + self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + self.model_patcher = self.setup_model_patcher(model) + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + input_image, remove_pad = resize_image_with_pad(input_image, resolution) + + self.load_model() + + self.move_all_model_patchers_to_gpu() + + assert input_image.ndim == 3 + image_normal = input_image + + with torch.no_grad(): + image_normal = self.send_tensor_to_model_device(torch.from_numpy(image_normal)) + image_normal = image_normal / 255.0 + image_normal = rearrange(image_normal, 'h w c -> 1 c h w') + image_normal = self.norm(image_normal) + + normal = self.model_patcher.model(image_normal) + normal = normal[0][-1][:, :3] + normal = ((normal + 1) * 0.5).clip(0, 1) + + normal = rearrange(normal[0], 'c h w -> h w c').cpu().numpy() + normal_image = (normal * 255.0).clip(0, 255).astype(np.uint8) + + return remove_pad(normal_image) + + +add_supported_preprocessor(PreprocessorNormalBae()) + +``` + +# New features (that are not available in original WebUI) + +Thanks to Unet Patcher, many new things are possible now and supported in Forge, including SVD, Z123, masked Ip-adapter, masked controlnet, photomaker, etc. + +Masked Ip-Adapter + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/d26630f9-922d-4483-8bf9-f364dca5fd50) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/03580ef7-235c-4b03-9ca6-a27677a5a175) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/d9ed4a01-70d4-45b4-a6a7-2f765f158fae) + +Masked ControlNet + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/872d4785-60e4-4431-85c7-665c781dddaa) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/335a3b33-1ef8-46ff-a462-9f1b4f2c49fc) + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/b3684a15-8895-414e-8188-487269dfcada) + +PhotoMaker + +(Note that photomaker is a special control that need you to add the trigger word "photomaker". Your prompt should be like "a photo of photomaker") + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/07b0b626-05b5-473b-9d69-3657624d59be) + +Marigold Depth + +![image](https://github.com/lllyasviel/stable-diffusion-webui-forge/assets/19834515/bdf54148-892d-410d-8ed9-70b4b121b6e7) + +# New Samplers (that are not in origin) + + DDPM + DDPM Karras + DPM++ 2M Turbo + DPM++ 2M SDE Turbo + LCM Karras + Euler A Turbo + +# About Extensions + +ControlNet and TiledVAE are integrated, and you should uninstall these two extensions: + + sd-webui-controlnet + multidiffusion-upscaler-for-automatic1111 + +Note that **AnimateDiff** is under construction by [continue-revolution](https://github.com/continue-revolution) at [sd-webui-animatediff forge/master branch](https://github.com/continue-revolution/sd-webui-animatediff/tree/forge/master) and [sd-forge-animatediff](https://github.com/continue-revolution/sd-forge-animatediff) (they are in sync). (continue-revolution original words: prompt travel, inf t2v, controlnet v2v have been proven to work well; motion lora, i2i batch still under construction and may be finished in a week") + +Other extensions should work without problems, like: + + canvas-zoom + translations/localizations + Dynamic Prompts + Adetailer + Ultimate SD Upscale + Reactor + +However, if newer extensions use Forge, their codes can be much shorter. + +Usually if an old extension rework using Forge's unet patcher, 80% codes can be removed, especially when they need to call controlnet. + +# Contribution + +Forge uses a bot to get commits and codes from https://github.com/AUTOMATIC1111/stable-diffusion-webui/tree/dev every afternoon (if merge is automatically successful by a git bot, or by my compiler, or by my ChatGPT bot) or mid-night (if my compiler and my ChatGPT bot both failed to merge and I review it manually). + +All PRs that can be implemented in https://github.com/AUTOMATIC1111/stable-diffusion-webui/tree/dev should submit PRs there. + +Feel free to submit PRs related to the functionality of Forge here. diff --git a/configs/alt-diffusion-inference.yaml b/configs/alt-diffusion-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfbee72d71bfd7deed2075e423ca51bd1da0521c --- /dev/null +++ b/configs/alt-diffusion-inference.yaml @@ -0,0 +1,72 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: modules.xlmr.BertSeriesModelWithTransformation + params: + name: "XLMR-Large" \ No newline at end of file diff --git a/configs/alt-diffusion-m18-inference.yaml b/configs/alt-diffusion-m18-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41a031d55f03b9946b543e930b881017c7e1cca6 --- /dev/null +++ b/configs/alt-diffusion-m18-inference.yaml @@ -0,0 +1,73 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: modules.xlmr_m18.BertSeriesModelWithTransformation + params: + name: "XLMR-Large" diff --git a/configs/instruct-pix2pix.yaml b/configs/instruct-pix2pix.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e896879dd7ac5697b89cb323ec43eb41c03596c --- /dev/null +++ b/configs/instruct-pix2pix.yaml @@ -0,0 +1,98 @@ +# File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion). +# See more details in LICENSE. + +model: + base_learning_rate: 1.0e-04 + target: modules.models.diffusion.ddpm_edit.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: edited + cond_stage_key: edit + # image_size: 64 + # image_size: 32 + image_size: 16 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: false + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 0 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 8 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + +data: + target: main.DataModuleFromConfig + params: + batch_size: 128 + num_workers: 1 + wrap: false + validation: + target: edit_dataset.EditDataset + params: + path: data/clip-filtered-dataset + cache_dir: data/ + cache_name: data_10k + split: val + min_text_sim: 0.2 + min_image_sim: 0.75 + min_direction_sim: 0.2 + max_samples_per_prompt: 1 + min_resize_res: 512 + max_resize_res: 512 + crop_res: 512 + output_as_edit: False + real_input: True diff --git a/configs/sd_xl_inpaint.yaml b/configs/sd_xl_inpaint.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bad372186f1142cba61fe78cf2e572df0b1d50b --- /dev/null +++ b/configs/sd_xl_inpaint.yaml @@ -0,0 +1,98 @@ +model: + target: sgm.models.diffusion.DiffusionEngine + params: + scale_factor: 0.13025 + disable_first_stage_autocast: True + + denoiser_config: + target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser + params: + num_idx: 1000 + + weighting_config: + target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting + scaling_config: + target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling + discretization_config: + target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization + + network_config: + target: sgm.modules.diffusionmodules.openaimodel.UNetModel + params: + adm_in_channels: 2816 + num_classes: sequential + use_checkpoint: True + in_channels: 9 + out_channels: 4 + model_channels: 320 + attention_resolutions: [4, 2] + num_res_blocks: 2 + channel_mult: [1, 2, 4] + num_head_channels: 64 + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16 + context_dim: 2048 + spatial_transformer_attn_type: softmax-xformers + legacy: False + + conditioner_config: + target: sgm.modules.GeneralConditioner + params: + emb_models: + # crossattn cond + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: hidden + layer_idx: 11 + # crossattn and vector cond + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 + params: + arch: ViT-bigG-14 + version: laion2b_s39b_b160k + freeze: True + layer: penultimate + always_return_pooled: True + legacy: False + # vector cond + - is_trainable: False + input_key: original_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + # vector cond + - is_trainable: False + input_key: crop_coords_top_left + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + # vector cond + - is_trainable: False + input_key: target_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + + first_stage_config: + target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + attn_type: vanilla-xformers + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: [1, 2, 4, 4] + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity diff --git a/configs/v1-inference.yaml b/configs/v1-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4effe569e897369918625f9d8be5603a0e6a0d6 --- /dev/null +++ b/configs/v1-inference.yaml @@ -0,0 +1,70 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9eec37d24bce33ce92320a782d16ae72308190a --- /dev/null +++ b/configs/v1-inpainting-inference.yaml @@ -0,0 +1,70 @@ +model: + base_learning_rate: 7.5e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid # important + monitor: val/loss_simple_ema + scale_factor: 0.18215 + finetune_keys: null + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 9 # 4 data + 4 downscaled image + 1 mask + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/environment-wsl2.yaml b/environment-wsl2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c4ae6809997ec38e7cf62cf0f71360b8cb61a7e --- /dev/null +++ b/environment-wsl2.yaml @@ -0,0 +1,11 @@ +name: automatic +channels: + - pytorch + - defaults +dependencies: + - python=3.10 + - pip=23.0 + - cudatoolkit=11.8 + - pytorch=2.0 + - torchvision=0.15 + - numpy=1.23 diff --git a/extensions-builtin/LDSR/ldsr_model_arch.py b/extensions-builtin/LDSR/ldsr_model_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..7cac36ce55ae295c6d0e444a93ea12bf8cfe893c --- /dev/null +++ b/extensions-builtin/LDSR/ldsr_model_arch.py @@ -0,0 +1,250 @@ +import os +import gc +import time + +import numpy as np +import torch +import torchvision +from PIL import Image +from einops import rearrange, repeat +from omegaconf import OmegaConf +import safetensors.torch + +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.util import instantiate_from_config, ismap +from modules import shared, sd_hijack, devices + +cached_ldsr_model: torch.nn.Module = None + + +# Create LDSR Class +class LDSR: + def load_model_from_config(self, half_attention): + global cached_ldsr_model + + if shared.opts.ldsr_cached and cached_ldsr_model is not None: + print("Loading model from cache") + model: torch.nn.Module = cached_ldsr_model + else: + print(f"Loading model from {self.modelPath}") + _, extension = os.path.splitext(self.modelPath) + if extension.lower() == ".safetensors": + pl_sd = safetensors.torch.load_file(self.modelPath, device="cpu") + else: + pl_sd = torch.load(self.modelPath, map_location="cpu") + sd = pl_sd["state_dict"] if "state_dict" in pl_sd else pl_sd + config = OmegaConf.load(self.yamlPath) + config.model.target = "ldm.models.diffusion.ddpm.LatentDiffusionV1" + model: torch.nn.Module = instantiate_from_config(config.model) + model.load_state_dict(sd, strict=False) + model = model.to(shared.device) + if half_attention: + model = model.half() + if shared.cmd_opts.opt_channelslast: + model = model.to(memory_format=torch.channels_last) + + sd_hijack.model_hijack.hijack(model) # apply optimization + model.eval() + + if shared.opts.ldsr_cached: + cached_ldsr_model = model + + return {"model": model} + + def __init__(self, model_path, yaml_path): + self.modelPath = model_path + self.yamlPath = yaml_path + + @staticmethod + def run(model, selected_path, custom_steps, eta): + example = get_cond(selected_path) + + n_runs = 1 + guider = None + ckwargs = None + ddim_use_x0_pred = False + temperature = 1. + eta = eta + custom_shape = None + + height, width = example["image"].shape[1:3] + split_input = height >= 128 and width >= 128 + + if split_input: + ks = 128 + stride = 64 + vqf = 4 # + model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride), + "vqf": vqf, + "patch_distributed_vq": True, + "tie_braker": False, + "clip_max_weight": 0.5, + "clip_min_weight": 0.01, + "clip_max_tie_weight": 0.5, + "clip_min_tie_weight": 0.01} + else: + if hasattr(model, "split_input_params"): + delattr(model, "split_input_params") + + x_t = None + logs = None + for _ in range(n_runs): + if custom_shape is not None: + x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device) + x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0]) + + logs = make_convolutional_sample(example, model, + custom_steps=custom_steps, + eta=eta, quantize_x0=False, + custom_shape=custom_shape, + temperature=temperature, noise_dropout=0., + corrector=guider, corrector_kwargs=ckwargs, x_T=x_t, + ddim_use_x0_pred=ddim_use_x0_pred + ) + return logs + + def super_resolution(self, image, steps=100, target_scale=2, half_attention=False): + model = self.load_model_from_config(half_attention) + + # Run settings + diffusion_steps = int(steps) + eta = 1.0 + + + gc.collect() + devices.torch_gc() + + im_og = image + width_og, height_og = im_og.size + # If we can adjust the max upscale size, then the 4 below should be our variable + down_sample_rate = target_scale / 4 + wd = width_og * down_sample_rate + hd = height_og * down_sample_rate + width_downsampled_pre = int(np.ceil(wd)) + height_downsampled_pre = int(np.ceil(hd)) + + if down_sample_rate != 1: + print( + f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]') + im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS) + else: + print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)") + + # pad width and height to multiples of 64, pads with the edge values of image to avoid artifacts + pad_w, pad_h = np.max(((2, 2), np.ceil(np.array(im_og.size) / 64).astype(int)), axis=0) * 64 - im_og.size + im_padded = Image.fromarray(np.pad(np.array(im_og), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge')) + + logs = self.run(model["model"], im_padded, diffusion_steps, eta) + + sample = logs["sample"] + sample = sample.detach().cpu() + sample = torch.clamp(sample, -1., 1.) + sample = (sample + 1.) / 2. * 255 + sample = sample.numpy().astype(np.uint8) + sample = np.transpose(sample, (0, 2, 3, 1)) + a = Image.fromarray(sample[0]) + + # remove padding + a = a.crop((0, 0) + tuple(np.array(im_og.size) * 4)) + + del model + gc.collect() + devices.torch_gc() + + return a + + +def get_cond(selected_path): + example = {} + up_f = 4 + c = selected_path.convert('RGB') + c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0) + c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]], + antialias=True) + c_up = rearrange(c_up, '1 c h w -> 1 h w c') + c = rearrange(c, '1 c h w -> 1 h w c') + c = 2. * c - 1. + + c = c.to(shared.device) + example["LR_image"] = c + example["image"] = c_up + + return example + + +@torch.no_grad() +def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None, + mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None, + corrector_kwargs=None, x_t=None + ): + ddim = DDIMSampler(model) + bs = shape[0] + shape = shape[1:] + print(f"Sampling with eta = {eta}; steps: {steps}") + samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback, + normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta, + mask=mask, x0=x0, temperature=temperature, verbose=False, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, x_t=x_t) + + return samples, intermediates + + +@torch.no_grad() +def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None, + corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False): + log = {} + + z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=not (hasattr(model, 'split_input_params') + and model.cond_stage_key == 'coordinates_bbox'), + return_original_cond=True) + + if custom_shape is not None: + z = torch.randn(custom_shape) + print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}") + + z0 = None + + log["input"] = x + log["reconstruction"] = xrec + + if ismap(xc): + log["original_conditioning"] = model.to_rgb(xc) + if hasattr(model, 'cond_stage_key'): + log[model.cond_stage_key] = model.to_rgb(xc) + + else: + log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x) + if model.cond_stage_model: + log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x) + if model.cond_stage_key == 'class_label': + log[model.cond_stage_key] = xc[model.cond_stage_key] + + with model.ema_scope("Plotting"): + t0 = time.time() + + sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape, + eta=eta, + quantize_x0=quantize_x0, mask=None, x0=z0, + temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs, + x_t=x_T) + t1 = time.time() + + if ddim_use_x0_pred: + sample = intermediates['pred_x0'][-1] + + x_sample = model.decode_first_stage(sample) + + try: + x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True) + log["sample_noquant"] = x_sample_noquant + log["sample_diff"] = torch.abs(x_sample_noquant - x_sample) + except Exception: + pass + + log["sample"] = x_sample + log["time"] = t1 - t0 + + return log diff --git a/extensions-builtin/LDSR/preload.py b/extensions-builtin/LDSR/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..cfd478d545ed12ef74e73fa40b6defe0156859da --- /dev/null +++ b/extensions-builtin/LDSR/preload.py @@ -0,0 +1,6 @@ +import os +from modules import paths + + +def preload(parser): + parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(paths.models_path, 'LDSR')) diff --git a/extensions-builtin/LDSR/scripts/ldsr_model.py b/extensions-builtin/LDSR/scripts/ldsr_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ae4d4ba0dcb5ed638473e3f95d1af5731f9631 --- /dev/null +++ b/extensions-builtin/LDSR/scripts/ldsr_model.py @@ -0,0 +1,70 @@ +import os + +from modules.modelloader import load_file_from_url +from modules.upscaler import Upscaler, UpscalerData +from modules_forge.forge_util import prepare_free_memory +from ldsr_model_arch import LDSR +from modules import shared, script_callbacks, errors +import sd_hijack_autoencoder # noqa: F401 +import sd_hijack_ddpm_v1 # noqa: F401 + + +class UpscalerLDSR(Upscaler): + def __init__(self, user_path): + self.name = "LDSR" + self.user_path = user_path + self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1" + self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1" + super().__init__() + scaler_data = UpscalerData("LDSR", None, self) + self.scalers = [scaler_data] + + def load_model(self, path: str): + # Remove incorrect project.yaml file if too big + yaml_path = os.path.join(self.model_path, "project.yaml") + old_model_path = os.path.join(self.model_path, "model.pth") + new_model_path = os.path.join(self.model_path, "model.ckpt") + + local_model_paths = self.find_models(ext_filter=[".ckpt", ".safetensors"]) + local_ckpt_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.ckpt")]), None) + local_safetensors_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.safetensors")]), None) + local_yaml_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("project.yaml")]), None) + + if os.path.exists(yaml_path): + statinfo = os.stat(yaml_path) + if statinfo.st_size >= 10485760: + print("Removing invalid LDSR YAML file.") + os.remove(yaml_path) + + if os.path.exists(old_model_path): + print("Renaming model from model.pth to model.ckpt") + os.rename(old_model_path, new_model_path) + + if local_safetensors_path is not None and os.path.exists(local_safetensors_path): + model = local_safetensors_path + else: + model = local_ckpt_path or load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name="model.ckpt") + + yaml = local_yaml_path or load_file_from_url(self.yaml_url, model_dir=self.model_download_path, file_name="project.yaml") + + return LDSR(model, yaml) + + def do_upscale(self, img, path): + prepare_free_memory(aggressive=True) + try: + ldsr = self.load_model(path) + except Exception: + errors.report(f"Failed loading LDSR model {path}", exc_info=True) + return img + ddim_steps = shared.opts.ldsr_steps + return ldsr.super_resolution(img, ddim_steps, self.scale) + + +def on_ui_settings(): + import gradio as gr + + shared.opts.add_option("ldsr_steps", shared.OptionInfo(100, "LDSR processing steps. Lower = faster", gr.Slider, {"minimum": 1, "maximum": 200, "step": 1}, section=('upscaling', "Upscaling"))) + shared.opts.add_option("ldsr_cached", shared.OptionInfo(False, "Cache LDSR model in memory", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling"))) + + +script_callbacks.on_ui_settings(on_ui_settings) diff --git a/extensions-builtin/LDSR/sd_hijack_autoencoder.py b/extensions-builtin/LDSR/sd_hijack_autoencoder.py new file mode 100644 index 0000000000000000000000000000000000000000..c29d274da825d2500b77a2022db3421b40b18886 --- /dev/null +++ b/extensions-builtin/LDSR/sd_hijack_autoencoder.py @@ -0,0 +1,293 @@ +# The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo +# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo +# As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder +import numpy as np +import torch +import pytorch_lightning as pl +import torch.nn.functional as F +from contextlib import contextmanager + +from torch.optim.lr_scheduler import LambdaLR + +from ldm.modules.ema import LitEma +from vqvae_quantize import VectorQuantizer2 as VectorQuantizer +from ldm.modules.diffusionmodules.model import Encoder, Decoder +from ldm.util import instantiate_from_config + +import ldm.models.autoencoder +from packaging import version + +class VQModel(pl.LightningModule): + def __init__(self, + ddconfig, + lossconfig, + n_embed, + embed_dim, + ckpt_path=None, + ignore_keys=None, + image_key="image", + colorize_nlabels=None, + monitor=None, + batch_resize_range=None, + scheduler_config=None, + lr_g_factor=1.0, + remap=None, + sane_index_shape=False, # tell vector quantizer to return indices as bhw + use_ema=False + ): + super().__init__() + self.embed_dim = embed_dim + self.n_embed = n_embed + self.image_key = image_key + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + self.loss = instantiate_from_config(lossconfig) + self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, + remap=remap, + sane_index_shape=sane_index_shape) + self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + if colorize_nlabels is not None: + assert type(colorize_nlabels)==int + self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) + if monitor is not None: + self.monitor = monitor + self.batch_resize_range = batch_resize_range + if self.batch_resize_range is not None: + print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.") + + self.use_ema = use_ema + if self.use_ema: + self.model_ema = LitEma(self) + print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") + + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or []) + self.scheduler_config = scheduler_config + self.lr_g_factor = lr_g_factor + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.parameters()) + self.model_ema.copy_to(self) + if context is not None: + print(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.parameters()) + if context is not None: + print(f"{context}: Restored training weights") + + def init_from_ckpt(self, path, ignore_keys=None): + sd = torch.load(path, map_location="cpu")["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys or []: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if missing: + print(f"Missing Keys: {missing}") + if unexpected: + print(f"Unexpected Keys: {unexpected}") + + def on_train_batch_end(self, *args, **kwargs): + if self.use_ema: + self.model_ema(self) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + quant, emb_loss, info = self.quantize(h) + return quant, emb_loss, info + + def encode_to_prequant(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, quant): + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def decode_code(self, code_b): + quant_b = self.quantize.embed_code(code_b) + dec = self.decode(quant_b) + return dec + + def forward(self, input, return_pred_indices=False): + quant, diff, (_,_,ind) = self.encode(input) + dec = self.decode(quant) + if return_pred_indices: + return dec, diff, ind + return dec, diff + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float() + if self.batch_resize_range is not None: + lower_size = self.batch_resize_range[0] + upper_size = self.batch_resize_range[1] + if self.global_step <= 4: + # do the first few batches with max size to avoid later oom + new_resize = upper_size + else: + new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16)) + if new_resize != x.shape[2]: + x = F.interpolate(x, size=new_resize, mode="bicubic") + x = x.detach() + return x + + def training_step(self, batch, batch_idx, optimizer_idx): + # https://github.com/pytorch/pytorch/issues/37142 + # try not to fool the heuristics + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + + if optimizer_idx == 0: + # autoencode + aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train", + predicted_indices=ind) + + self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return aeloss + + if optimizer_idx == 1: + # discriminator + discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step, + last_layer=self.get_last_layer(), split="train") + self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True) + return discloss + + def validation_step(self, batch, batch_idx): + log_dict = self._validation_step(batch, batch_idx) + with self.ema_scope(): + self._validation_step(batch, batch_idx, suffix="_ema") + return log_dict + + def _validation_step(self, batch, batch_idx, suffix=""): + x = self.get_input(batch, self.image_key) + xrec, qloss, ind = self(x, return_pred_indices=True) + aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0, + self.global_step, + last_layer=self.get_last_layer(), + split="val"+suffix, + predicted_indices=ind + ) + + discloss, log_dict_disc = self.loss(qloss, x, xrec, 1, + self.global_step, + last_layer=self.get_last_layer(), + split="val"+suffix, + predicted_indices=ind + ) + rec_loss = log_dict_ae[f"val{suffix}/rec_loss"] + self.log(f"val{suffix}/rec_loss", rec_loss, + prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) + self.log(f"val{suffix}/aeloss", aeloss, + prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True) + if version.parse(pl.__version__) >= version.parse('1.4.0'): + del log_dict_ae[f"val{suffix}/rec_loss"] + self.log_dict(log_dict_ae) + self.log_dict(log_dict_disc) + return self.log_dict + + def configure_optimizers(self): + lr_d = self.learning_rate + lr_g = self.lr_g_factor*self.learning_rate + print("lr_d", lr_d) + print("lr_g", lr_g) + opt_ae = torch.optim.Adam(list(self.encoder.parameters())+ + list(self.decoder.parameters())+ + list(self.quantize.parameters())+ + list(self.quant_conv.parameters())+ + list(self.post_quant_conv.parameters()), + lr=lr_g, betas=(0.5, 0.9)) + opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(), + lr=lr_d, betas=(0.5, 0.9)) + + if self.scheduler_config is not None: + scheduler = instantiate_from_config(self.scheduler_config) + + print("Setting up LambdaLR scheduler...") + scheduler = [ + { + 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }, + { + 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }, + ] + return [opt_ae, opt_disc], scheduler + return [opt_ae, opt_disc], [] + + def get_last_layer(self): + return self.decoder.conv_out.weight + + def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): + log = {} + x = self.get_input(batch, self.image_key) + x = x.to(self.device) + if only_inputs: + log["inputs"] = x + return log + xrec, _ = self(x) + if x.shape[1] > 3: + # colorize with random projection + assert xrec.shape[1] > 3 + x = self.to_rgb(x) + xrec = self.to_rgb(xrec) + log["inputs"] = x + log["reconstructions"] = xrec + if plot_ema: + with self.ema_scope(): + xrec_ema, _ = self(x) + if x.shape[1] > 3: + xrec_ema = self.to_rgb(xrec_ema) + log["reconstructions_ema"] = xrec_ema + return log + + def to_rgb(self, x): + assert self.image_key == "segmentation" + if not hasattr(self, "colorize"): + self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x)) + x = F.conv2d(x, weight=self.colorize) + x = 2.*(x-x.min())/(x.max()-x.min()) - 1. + return x + + +class VQModelInterface(VQModel): + def __init__(self, embed_dim, *args, **kwargs): + super().__init__(*args, embed_dim=embed_dim, **kwargs) + self.embed_dim = embed_dim + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + +ldm.models.autoencoder.VQModel = VQModel +ldm.models.autoencoder.VQModelInterface = VQModelInterface diff --git a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..04adc5eb2cfe9aa1d5f75e5653624456c5e37a47 --- /dev/null +++ b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py @@ -0,0 +1,1443 @@ +# This script is copied from the compvis/stable-diffusion repo (aka the SD V1 repo) +# Original filename: ldm/models/diffusion/ddpm.py +# The purpose to reinstate the old DDPM logic which works with VQ, whereas the V2 one doesn't +# Some models such as LDSR require VQ to work correctly +# The classes are suffixed with "V1" and added back to the "ldm.models.diffusion.ddpm" module + +import torch +import torch.nn as nn +import numpy as np +import pytorch_lightning as pl +from torch.optim.lr_scheduler import LambdaLR +from einops import rearrange, repeat +from contextlib import contextmanager +from functools import partial +from tqdm import tqdm +from torchvision.utils import make_grid +from pytorch_lightning.utilities.distributed import rank_zero_only + +from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config +from ldm.modules.ema import LitEma +from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution +from ldm.models.autoencoder import VQModelInterface, IdentityFirstStage, AutoencoderKL +from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like +from ldm.models.diffusion.ddim import DDIMSampler + +import ldm.models.diffusion.ddpm + +__conditioning_keys__ = {'concat': 'c_concat', + 'crossattn': 'c_crossattn', + 'adm': 'y'} + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def uniform_on_device(r1, r2, shape, device): + return (r1 - r2) * torch.rand(*shape, device=device) + r2 + + +class DDPMV1(pl.LightningModule): + # classic DDPM with Gaussian diffusion, in image space + def __init__(self, + unet_config, + timesteps=1000, + beta_schedule="linear", + loss_type="l2", + ckpt_path=None, + ignore_keys=None, + load_only_unet=False, + monitor="val/loss", + use_ema=True, + first_stage_key="image", + image_size=256, + channels=3, + log_every_t=100, + clip_denoised=True, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + given_betas=None, + original_elbo_weight=0., + v_posterior=0., # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta + l_simple_weight=1., + conditioning_key=None, + parameterization="eps", # all assuming fixed variance schedules + scheduler_config=None, + use_positional_encodings=False, + learn_logvar=False, + logvar_init=0., + ): + super().__init__() + assert parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"' + self.parameterization = parameterization + print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") + self.cond_stage_model = None + self.clip_denoised = clip_denoised + self.log_every_t = log_every_t + self.first_stage_key = first_stage_key + self.image_size = image_size # try conv? + self.channels = channels + self.use_positional_encodings = use_positional_encodings + self.model = DiffusionWrapperV1(unet_config, conditioning_key) + count_params(self.model, verbose=True) + self.use_ema = use_ema + if self.use_ema: + self.model_ema = LitEma(self.model) + print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") + + self.use_scheduler = scheduler_config is not None + if self.use_scheduler: + self.scheduler_config = scheduler_config + + self.v_posterior = v_posterior + self.original_elbo_weight = original_elbo_weight + self.l_simple_weight = l_simple_weight + + if monitor is not None: + self.monitor = monitor + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [], only_model=load_only_unet) + + self.register_schedule(given_betas=given_betas, beta_schedule=beta_schedule, timesteps=timesteps, + linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) + + self.loss_type = loss_type + + self.learn_logvar = learn_logvar + self.logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,)) + if self.learn_logvar: + self.logvar = nn.Parameter(self.logvar, requires_grad=True) + + + def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, + cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / ( + 1. - alphas_cumprod) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + if self.parameterization == "eps": + lvlb_weights = self.betas ** 2 / ( + 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) + elif self.parameterization == "x0": + lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod)) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer('lvlb_weights', lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.model.parameters()) + self.model_ema.copy_to(self.model) + if context is not None: + print(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.model.parameters()) + if context is not None: + print(f"{context}: Restored training weights") + + def init_from_ckpt(self, path, ignore_keys=None, only_model=False): + sd = torch.load(path, map_location="cpu") + if "state_dict" in list(sd.keys()): + sd = sd["state_dict"] + keys = list(sd.keys()) + for k in keys: + for ik in ignore_keys or []: + if k.startswith(ik): + print("Deleting key {} from state_dict.".format(k)) + del sd[k] + missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict( + sd, strict=False) + print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys") + if missing: + print(f"Missing Keys: {missing}") + if unexpected: + print(f"Unexpected Keys: {unexpected}") + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool): + model_out = self.model(x, t) + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + if clip_denoised: + x_recon.clamp_(-1., 1.) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_loop(self, shape, return_intermediates=False): + device = self.betas.device + b = shape[0] + img = torch.randn(shape, device=device) + intermediates = [img] + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps): + img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long), + clip_denoised=self.clip_denoised) + if i % self.log_every_t == 0 or i == self.num_timesteps - 1: + intermediates.append(img) + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, batch_size=16, return_intermediates=False): + image_size = self.image_size + channels = self.channels + return self.p_sample_loop((batch_size, channels, image_size, image_size), + return_intermediates=return_intermediates) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def get_loss(self, pred, target, mean=True): + if self.loss_type == 'l1': + loss = (target - pred).abs() + if mean: + loss = loss.mean() + elif self.loss_type == 'l2': + if mean: + loss = torch.nn.functional.mse_loss(target, pred) + else: + loss = torch.nn.functional.mse_loss(target, pred, reduction='none') + else: + raise NotImplementedError("unknown loss type '{loss_type}'") + + return loss + + def p_losses(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_out = self.model(x_noisy, t) + + loss_dict = {} + if self.parameterization == "eps": + target = noise + elif self.parameterization == "x0": + target = x_start + else: + raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported") + + loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3]) + + log_prefix = 'train' if self.training else 'val' + + loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()}) + loss_simple = loss.mean() * self.l_simple_weight + + loss_vlb = (self.lvlb_weights[t] * loss).mean() + loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb}) + + loss = loss_simple + self.original_elbo_weight * loss_vlb + + loss_dict.update({f'{log_prefix}/loss': loss}) + + return loss, loss_dict + + def forward(self, x, *args, **kwargs): + # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size + # assert h == img_size and w == img_size, f'height and width of image must be {img_size}' + t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() + return self.p_losses(x, t, *args, **kwargs) + + def get_input(self, batch, k): + x = batch[k] + if len(x.shape) == 3: + x = x[..., None] + x = rearrange(x, 'b h w c -> b c h w') + x = x.to(memory_format=torch.contiguous_format).float() + return x + + def shared_step(self, batch): + x = self.get_input(batch, self.first_stage_key) + loss, loss_dict = self(x) + return loss, loss_dict + + def training_step(self, batch, batch_idx): + loss, loss_dict = self.shared_step(batch) + + self.log_dict(loss_dict, prog_bar=True, + logger=True, on_step=True, on_epoch=True) + + self.log("global_step", self.global_step, + prog_bar=True, logger=True, on_step=True, on_epoch=False) + + if self.use_scheduler: + lr = self.optimizers().param_groups[0]['lr'] + self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False) + + return loss + + @torch.no_grad() + def validation_step(self, batch, batch_idx): + _, loss_dict_no_ema = self.shared_step(batch) + with self.ema_scope(): + _, loss_dict_ema = self.shared_step(batch) + loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema} + self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) + self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True) + + def on_train_batch_end(self, *args, **kwargs): + if self.use_ema: + self.model_ema(self.model) + + def _get_rows_from_list(self, samples): + n_imgs_per_row = len(samples) + denoise_grid = rearrange(samples, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs): + log = {} + x = self.get_input(batch, self.first_stage_key) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + x = x.to(self.device)[:N] + log["inputs"] = x + + # get diffusion row + diffusion_row = [] + x_start = x[:n_row] + + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(x_start) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + diffusion_row.append(x_noisy) + + log["diffusion_row"] = self._get_rows_from_list(diffusion_row) + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, denoise_row = self.sample(batch_size=N, return_intermediates=True) + + log["samples"] = samples + log["denoise_row"] = self._get_rows_from_list(denoise_row) + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def configure_optimizers(self): + lr = self.learning_rate + params = list(self.model.parameters()) + if self.learn_logvar: + params = params + [self.logvar] + opt = torch.optim.AdamW(params, lr=lr) + return opt + + +class LatentDiffusionV1(DDPMV1): + """main class""" + def __init__(self, + first_stage_config, + cond_stage_config, + num_timesteps_cond=None, + cond_stage_key="image", + cond_stage_trainable=False, + concat_mode=True, + cond_stage_forward=None, + conditioning_key=None, + scale_factor=1.0, + scale_by_std=False, + *args, **kwargs): + self.num_timesteps_cond = default(num_timesteps_cond, 1) + self.scale_by_std = scale_by_std + assert self.num_timesteps_cond <= kwargs['timesteps'] + # for backwards compatibility after implementation of DiffusionWrapper + if conditioning_key is None: + conditioning_key = 'concat' if concat_mode else 'crossattn' + if cond_stage_config == '__is_unconditional__': + conditioning_key = None + ckpt_path = kwargs.pop("ckpt_path", None) + ignore_keys = kwargs.pop("ignore_keys", []) + super().__init__(*args, conditioning_key=conditioning_key, **kwargs) + self.concat_mode = concat_mode + self.cond_stage_trainable = cond_stage_trainable + self.cond_stage_key = cond_stage_key + try: + self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1 + except Exception: + self.num_downs = 0 + if not scale_by_std: + self.scale_factor = scale_factor + else: + self.register_buffer('scale_factor', torch.tensor(scale_factor)) + self.instantiate_first_stage(first_stage_config) + self.instantiate_cond_stage(cond_stage_config) + self.cond_stage_forward = cond_stage_forward + self.clip_denoised = False + self.bbox_tokenizer = None + + self.restarted_from_ckpt = False + if ckpt_path is not None: + self.init_from_ckpt(ckpt_path, ignore_keys) + self.restarted_from_ckpt = True + + def make_cond_schedule(self, ): + self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long) + ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long() + self.cond_ids[:self.num_timesteps_cond] = ids + + @rank_zero_only + @torch.no_grad() + def on_train_batch_start(self, batch, batch_idx, dataloader_idx): + # only for very first batch + if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt: + assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously' + # set rescale weight to 1./std of encodings + print("### USING STD-RESCALING ###") + x = super().get_input(batch, self.first_stage_key) + x = x.to(self.device) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + del self.scale_factor + self.register_buffer('scale_factor', 1. / z.flatten().std()) + print(f"setting self.scale_factor to {self.scale_factor}") + print("### USING STD-RESCALING ###") + + def register_schedule(self, + given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s) + + self.shorten_cond_schedule = self.num_timesteps_cond > 1 + if self.shorten_cond_schedule: + self.make_cond_schedule() + + def instantiate_first_stage(self, config): + model = instantiate_from_config(config) + self.first_stage_model = model.eval() + self.first_stage_model.train = disabled_train + for param in self.first_stage_model.parameters(): + param.requires_grad = False + + def instantiate_cond_stage(self, config): + if not self.cond_stage_trainable: + if config == "__is_first_stage__": + print("Using first stage also as cond stage.") + self.cond_stage_model = self.first_stage_model + elif config == "__is_unconditional__": + print(f"Training {self.__class__.__name__} as an unconditional model.") + self.cond_stage_model = None + # self.be_unconditional = True + else: + model = instantiate_from_config(config) + self.cond_stage_model = model.eval() + self.cond_stage_model.train = disabled_train + for param in self.cond_stage_model.parameters(): + param.requires_grad = False + else: + assert config != '__is_first_stage__' + assert config != '__is_unconditional__' + model = instantiate_from_config(config) + self.cond_stage_model = model + + def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): + denoise_row = [] + for zd in tqdm(samples, desc=desc): + denoise_row.append(self.decode_first_stage(zd.to(self.device), + force_not_quantize=force_no_decoder_quantization)) + n_imgs_per_row = len(denoise_row) + denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W + denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w') + denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w') + denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row) + return denoise_grid + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented") + return self.scale_factor * z + + def get_learned_conditioning(self, c): + if self.cond_stage_forward is None: + if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode): + c = self.cond_stage_model.encode(c) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + else: + c = self.cond_stage_model(c) + else: + assert hasattr(self.cond_stage_model, self.cond_stage_forward) + c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) + return c + + def meshgrid(self, h, w): + y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1) + x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1) + + arr = torch.cat([y, x], dim=-1) + return arr + + def delta_border(self, h, w): + """ + :param h: height + :param w: width + :return: normalized distance to image border, + wtith min distance = 0 at border and max dist = 0.5 at image center + """ + lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) + arr = self.meshgrid(h, w) / lower_right_corner + dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0] + dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0] + edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0] + return edge_dist + + def get_weighting(self, h, w, Ly, Lx, device): + weighting = self.delta_border(h, w) + weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"], + self.split_input_params["clip_max_weight"], ) + weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device) + + if self.split_input_params["tie_braker"]: + L_weighting = self.delta_border(Ly, Lx) + L_weighting = torch.clip(L_weighting, + self.split_input_params["clip_min_tie_weight"], + self.split_input_params["clip_max_tie_weight"]) + + L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device) + weighting = weighting * L_weighting + return weighting + + def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code + """ + :param x: img of size (bs, c, h, w) + :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1]) + """ + bs, nc, h, w = x.shape + + # number of crops in image + Ly = (h - kernel_size[0]) // stride[0] + 1 + Lx = (w - kernel_size[1]) // stride[1] + 1 + + if uf == 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) + + weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) + + elif uf > 1 and df == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), + dilation=1, padding=0, + stride=(stride[0] * uf, stride[1] * uf)) + fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx)) + + elif df > 1 and uf == 1: + fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride) + unfold = torch.nn.Unfold(**fold_params) + + fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df), + dilation=1, padding=0, + stride=(stride[0] // df, stride[1] // df)) + fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2) + + weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype) + normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap + weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx)) + + else: + raise NotImplementedError + + return fold, unfold, normalization, weighting + + @torch.no_grad() + def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False, + cond_key=None, return_original_cond=False, bs=None): + x = super().get_input(batch, k) + if bs is not None: + x = x[:bs] + x = x.to(self.device) + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + + if self.model.conditioning_key is not None: + if cond_key is None: + cond_key = self.cond_stage_key + if cond_key != self.first_stage_key: + if cond_key in ['caption', 'coordinates_bbox']: + xc = batch[cond_key] + elif cond_key == 'class_label': + xc = batch + else: + xc = super().get_input(batch, cond_key).to(self.device) + else: + xc = x + if not self.cond_stage_trainable or force_c_encode: + if isinstance(xc, dict) or isinstance(xc, list): + # import pudb; pudb.set_trace() + c = self.get_learned_conditioning(xc) + else: + c = self.get_learned_conditioning(xc.to(self.device)) + else: + c = xc + if bs is not None: + c = c[:bs] + + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + ckey = __conditioning_keys__[self.model.conditioning_key] + c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y} + + else: + c = None + xc = None + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + c = {'pos_x': pos_x, 'pos_y': pos_y} + out = [z, c] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + # same as above but without decorator + def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, 'b h w c -> b c h w').contiguous() + + z = 1. / self.scale_factor * z + + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + uf = self.split_input_params["vqf"] + bs, nc, h, w = z.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf) + + z = unfold(z) # (bn, nc * prod(**ks), L) + # 1. Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + # 2. apply model loop over last dim + if isinstance(self.first_stage_model, VQModelInterface): + output_list = [self.first_stage_model.decode(z[:, :, :, :, i], + force_not_quantize=predict_cids or force_not_quantize) + for i in range(z.shape[-1])] + else: + + output_list = [self.first_stage_model.decode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L) + o = o * weighting + # Reverse 1. reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization # norm is shape (1, 1, h, w) + return decoded + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + else: + if isinstance(self.first_stage_model, VQModelInterface): + return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize) + else: + return self.first_stage_model.decode(z) + + @torch.no_grad() + def encode_first_stage(self, x): + if hasattr(self, "split_input_params"): + if self.split_input_params["patch_distributed_vq"]: + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + df = self.split_input_params["vqf"] + self.split_input_params['original_image_size'] = x.shape[-2:] + bs, nc, h, w = x.shape + if ks[0] > h or ks[1] > w: + ks = (min(ks[0], h), min(ks[1], w)) + print("reducing Kernel") + + if stride[0] > h or stride[1] > w: + stride = (min(stride[0], h), min(stride[1], w)) + print("reducing stride") + + fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df) + z = unfold(x) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + output_list = [self.first_stage_model.encode(z[:, :, :, :, i]) + for i in range(z.shape[-1])] + + o = torch.stack(output_list, axis=-1) + o = o * weighting + + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + decoded = fold(o) + decoded = decoded / normalization + return decoded + + else: + return self.first_stage_model.encode(x) + else: + return self.first_stage_model.encode(x) + + def shared_step(self, batch, **kwargs): + x, c = self.get_input(batch, self.first_stage_key) + loss = self(x, c) + return loss + + def forward(self, x, c, *args, **kwargs): + t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() + if self.model.conditioning_key is not None: + assert c is not None + if self.cond_stage_trainable: + c = self.get_learned_conditioning(c) + if self.shorten_cond_schedule: # TODO: drop this option + tc = self.cond_ids[t].to(self.device) + c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float())) + return self.p_losses(x, c, t, *args, **kwargs) + + def apply_model(self, x_noisy, t, cond, return_ids=False): + + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn' + cond = {key: cond} + + if hasattr(self, "split_input_params"): + assert len(cond) == 1 # todo can only deal with one conditioning atm + assert not return_ids + ks = self.split_input_params["ks"] # eg. (128, 128) + stride = self.split_input_params["stride"] # eg. (64, 64) + + h, w = x_noisy.shape[-2:] + + fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride) + + z = unfold(x_noisy) # (bn, nc * prod(**ks), L) + # Reshape to img shape + z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])] + + if self.cond_stage_key in ["image", "LR_image", "segmentation", + 'bbox_img'] and self.model.conditioning_key: # todo check for completeness + c_key = next(iter(cond.keys())) # get key + c = next(iter(cond.values())) # get value + assert (len(c) == 1) # todo extend to list with more than one elem + c = c[0] # get element + + c = unfold(c) + c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L ) + + cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])] + + elif self.cond_stage_key == 'coordinates_bbox': + assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size' + + # assuming padding of unfold is always 0 and its dilation is always 1 + n_patches_per_row = int((w - ks[0]) / stride[0] + 1) + full_img_h, full_img_w = self.split_input_params['original_image_size'] + # as we are operating on latents, we need the factor from the original image size to the + # spatial latent size to properly rescale the crops for regenerating the bbox annotations + num_downs = self.first_stage_model.encoder.num_resolutions - 1 + rescale_latent = 2 ** (num_downs) + + # get top left postions of patches as conforming for the bbbox tokenizer, therefore we + # need to rescale the tl patch coordinates to be in between (0,1) + tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w, + rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h) + for patch_nr in range(z.shape[-1])] + + # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w) + patch_limits = [(x_tl, y_tl, + rescale_latent * ks[0] / full_img_w, + rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates] + # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates] + + # tokenize crop coordinates for the bounding boxes of the respective patches + patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device) + for bbox in patch_limits] # list of length l with tensors of shape (1, 2) + print(patch_limits_tknzd[0].shape) + # cut tknzd crop position from conditioning + assert isinstance(cond, dict), 'cond must be dict to be fed into model' + cut_cond = cond['c_crossattn'][0][..., :-2].to(self.device) + print(cut_cond.shape) + + adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd]) + adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n') + print(adapted_cond.shape) + adapted_cond = self.get_learned_conditioning(adapted_cond) + print(adapted_cond.shape) + adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1]) + print(adapted_cond.shape) + + cond_list = [{'c_crossattn': [e]} for e in adapted_cond] + + else: + cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient + + # apply model by loop over crops + output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])] + assert not isinstance(output_list[0], + tuple) # todo cant deal with multiple model outputs check this never happens + + o = torch.stack(output_list, axis=-1) + o = o * weighting + # Reverse reshape to img shape + o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L) + # stitch crops together + x_recon = fold(o) / normalization + + else: + x_recon = self.model(x_noisy, t, **cond) + + if isinstance(x_recon, tuple) and not return_ids: + return x_recon[0] + else: + return x_recon + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \ + extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + This term can't be optimized, as it only depends on the encoder. + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def p_losses(self, x_start, cond, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise) + model_output = self.apply_model(x_noisy, t, cond) + + loss_dict = {} + prefix = 'train' if self.training else 'val' + + if self.parameterization == "x0": + target = x_start + elif self.parameterization == "eps": + target = noise + else: + raise NotImplementedError() + + loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3]) + loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()}) + + logvar_t = self.logvar[t].to(self.device) + loss = loss_simple / torch.exp(logvar_t) + logvar_t + # loss = loss_simple / torch.exp(self.logvar) + self.logvar + if self.learn_logvar: + loss_dict.update({f'{prefix}/loss_gamma': loss.mean()}) + loss_dict.update({'logvar': self.logvar.data.mean()}) + + loss = self.l_simple_weight * loss.mean() + + loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3)) + loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean() + loss_dict.update({f'{prefix}/loss_vlb': loss_vlb}) + loss += (self.original_elbo_weight * loss_vlb) + loss_dict.update({f'{prefix}/loss': loss}) + + return loss, loss_dict + + def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False, + return_x0=False, score_corrector=None, corrector_kwargs=None): + t_in = t + model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) + + if score_corrector is not None: + assert self.parameterization == "eps" + model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs) + + if return_codebook_ids: + model_out, logits = model_out + + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + else: + raise NotImplementedError() + + if clip_denoised: + x_recon.clamp_(-1., 1.) + if quantize_denoised: + x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t) + if return_codebook_ids: + return model_mean, posterior_variance, posterior_log_variance, logits + elif return_x0: + return model_mean, posterior_variance, posterior_log_variance, x_recon + else: + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, + return_codebook_ids=False, quantize_denoised=False, return_x0=False, + temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None): + b, *_, device = *x.shape, x.device + outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if return_codebook_ids: + raise DeprecationWarning("Support dropped.") + model_mean, _, model_log_variance, logits = outputs + elif return_x0: + model_mean, _, model_log_variance, x0 = outputs + else: + model_mean, _, model_log_variance = outputs + + noise = noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + # no noise when t == 0 + nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))) + + if return_codebook_ids: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1) + if return_x0: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0 + else: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False, + img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0., + score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None, + log_every_t=None): + if not log_every_t: + log_every_t = self.log_every_t + timesteps = self.num_timesteps + if batch_size is not None: + b = batch_size if batch_size is not None else shape[0] + shape = [batch_size] + list(shape) + else: + b = batch_size = shape[0] + if x_T is None: + img = torch.randn(shape, device=self.device) + else: + img = x_T + intermediates = [] + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + [x[:batch_size] for x in cond[key]] for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation', + total=timesteps) if verbose else reversed( + range(0, timesteps)) + if type(temperature) == float: + temperature = [temperature] * timesteps + + for i in iterator: + ts = torch.full((b,), i, device=self.device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img, x0_partial = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, return_x0=True, + temperature=temperature[i], noise_dropout=noise_dropout, + score_corrector=score_corrector, corrector_kwargs=corrector_kwargs) + if mask is not None: + assert x0 is not None + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(x0_partial) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + return img, intermediates + + @torch.no_grad() + def p_sample_loop(self, cond, shape, return_intermediates=False, + x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False, + mask=None, x0=None, img_callback=None, start_T=None, + log_every_t=None): + + if not log_every_t: + log_every_t = self.log_every_t + device = self.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, device=device) + else: + img = x_T + + intermediates = [img] + if timesteps is None: + timesteps = self.num_timesteps + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed( + range(0, timesteps)) + + if mask is not None: + assert x0 is not None + assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match + + for i in iterator: + ts = torch.full((b,), i, device=device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != 'hybrid' + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img = self.p_sample(img, cond, ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised) + if mask is not None: + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1. - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(img) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None, + verbose=True, timesteps=None, quantize_denoised=False, + mask=None, x0=None, shape=None,**kwargs): + if shape is None: + shape = (batch_size, self.channels, self.image_size, self.image_size) + if cond is not None: + if isinstance(cond, dict): + cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else + [x[:batch_size] for x in cond[key]] for key in cond} + else: + cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] + return self.p_sample_loop(cond, + shape, + return_intermediates=return_intermediates, x_T=x_T, + verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised, + mask=mask, x0=x0) + + @torch.no_grad() + def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs): + + if ddim: + ddim_sampler = DDIMSampler(self) + shape = (self.channels, self.image_size, self.image_size) + samples, intermediates =ddim_sampler.sample(ddim_steps,batch_size, + shape,cond,verbose=False,**kwargs) + + else: + samples, intermediates = self.sample(cond=cond, batch_size=batch_size, + return_intermediates=True,**kwargs) + + return samples, intermediates + + + @torch.no_grad() + def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None, + quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True, + plot_diffusion_rows=True, **kwargs): + + use_ddim = ddim_steps is not None + + log = {} + z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=N) + N = min(x.shape[0], N) + n_row = min(x.shape[0], n_row) + log["inputs"] = x + log["reconstruction"] = xrec + if self.model.conditioning_key is not None: + if hasattr(self.cond_stage_model, "decode"): + xc = self.cond_stage_model.decode(c) + log["conditioning"] = xc + elif self.cond_stage_key in ["caption"]: + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"]) + log["conditioning"] = xc + elif self.cond_stage_key == 'class_label': + xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"]) + log['conditioning'] = xc + elif isimage(xc): + log["conditioning"] = xc + if ismap(xc): + log["original_conditioning"] = self.to_rgb(xc) + + if plot_diffusion_rows: + # get diffusion row + diffusion_row = [] + z_start = z[:n_row] + for t in range(self.num_timesteps): + if t % self.log_every_t == 0 or t == self.num_timesteps - 1: + t = repeat(torch.tensor([t]), '1 -> b', b=n_row) + t = t.to(self.device).long() + noise = torch.randn_like(z_start) + z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise) + diffusion_row.append(self.decode_first_stage(z_noisy)) + + diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W + diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w') + diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w') + diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0]) + log["diffusion_row"] = diffusion_grid + + if sample: + # get denoise row + with self.ema_scope("Plotting"): + samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim, + ddim_steps=ddim_steps,eta=ddim_eta) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True) + x_samples = self.decode_first_stage(samples) + log["samples"] = x_samples + if plot_denoise_rows: + denoise_grid = self._get_denoise_row_from_list(z_denoise_row) + log["denoise_row"] = denoise_grid + + if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance( + self.first_stage_model, IdentityFirstStage): + # also display when quantizing x0 while sampling + with self.ema_scope("Plotting Quantized Denoised"): + samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim, + ddim_steps=ddim_steps,eta=ddim_eta, + quantize_denoised=True) + # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True, + # quantize_denoised=True) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_x0_quantized"] = x_samples + + if inpaint: + # make a simple center square + h, w = z.shape[2], z.shape[3] + mask = torch.ones(N, h, w).to(self.device) + # zeros will be filled in + mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0. + mask = mask[:, None, ...] + with self.ema_scope("Plotting Inpaint"): + + samples, _ = self.sample_log(cond=c,batch_size=N,ddim=use_ddim, eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_inpainting"] = x_samples + log["mask"] = mask + + # outpaint + with self.ema_scope("Plotting Outpaint"): + samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,eta=ddim_eta, + ddim_steps=ddim_steps, x0=z[:N], mask=mask) + x_samples = self.decode_first_stage(samples.to(self.device)) + log["samples_outpainting"] = x_samples + + if plot_progressive_rows: + with self.ema_scope("Plotting Progressives"): + img, progressives = self.progressive_denoising(c, + shape=(self.channels, self.image_size, self.image_size), + batch_size=N) + prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation") + log["progressive_row"] = prog_row + + if return_keys: + if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0: + return log + else: + return {key: log[key] for key in return_keys} + return log + + def configure_optimizers(self): + lr = self.learning_rate + params = list(self.model.parameters()) + if self.cond_stage_trainable: + print(f"{self.__class__.__name__}: Also optimizing conditioner params!") + params = params + list(self.cond_stage_model.parameters()) + if self.learn_logvar: + print('Diffusion model optimizing logvar') + params.append(self.logvar) + opt = torch.optim.AdamW(params, lr=lr) + if self.use_scheduler: + assert 'target' in self.scheduler_config + scheduler = instantiate_from_config(self.scheduler_config) + + print("Setting up LambdaLR scheduler...") + scheduler = [ + { + 'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), + 'interval': 'step', + 'frequency': 1 + }] + return [opt], scheduler + return opt + + @torch.no_grad() + def to_rgb(self, x): + x = x.float() + if not hasattr(self, "colorize"): + self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x) + x = nn.functional.conv2d(x, weight=self.colorize) + x = 2. * (x - x.min()) / (x.max() - x.min()) - 1. + return x + + +class DiffusionWrapperV1(pl.LightningModule): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + self.diffusion_model = instantiate_from_config(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm'] + + def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == 'concat': + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == 'crossattn': + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == 'hybrid': + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif self.conditioning_key == 'adm': + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out + + +class Layout2ImgDiffusionV1(LatentDiffusionV1): + # TODO: move all layout-specific hacks to this class + def __init__(self, cond_stage_key, *args, **kwargs): + assert cond_stage_key == 'coordinates_bbox', 'Layout2ImgDiffusion only for cond_stage_key="coordinates_bbox"' + super().__init__(*args, cond_stage_key=cond_stage_key, **kwargs) + + def log_images(self, batch, N=8, *args, **kwargs): + logs = super().log_images(*args, batch=batch, N=N, **kwargs) + + key = 'train' if self.training else 'validation' + dset = self.trainer.datamodule.datasets[key] + mapper = dset.conditional_builders[self.cond_stage_key] + + bbox_imgs = [] + map_fn = lambda catno: dset.get_textual_label(dset.get_category_id(catno)) + for tknzd_bbox in batch[self.cond_stage_key][:N]: + bboximg = mapper.plot(tknzd_bbox.detach().cpu(), map_fn, (256, 256)) + bbox_imgs.append(bboximg) + + cond_img = torch.stack(bbox_imgs, dim=0) + logs['bbox_image'] = cond_img + return logs + +ldm.models.diffusion.ddpm.DDPMV1 = DDPMV1 +ldm.models.diffusion.ddpm.LatentDiffusionV1 = LatentDiffusionV1 +ldm.models.diffusion.ddpm.DiffusionWrapperV1 = DiffusionWrapperV1 +ldm.models.diffusion.ddpm.Layout2ImgDiffusionV1 = Layout2ImgDiffusionV1 diff --git a/extensions-builtin/LDSR/vqvae_quantize.py b/extensions-builtin/LDSR/vqvae_quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..dd14b8fda5ce25a8cea8b70eb1d387b9c46c80d8 --- /dev/null +++ b/extensions-builtin/LDSR/vqvae_quantize.py @@ -0,0 +1,147 @@ +# Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py, +# where the license is as follows: +# +# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE./ + +import torch +import torch.nn as nn +import numpy as np +from einops import rearrange + + +class VectorQuantizer2(nn.Module): + """ + Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly + avoids costly matrix multiplications and allows for post-hoc remapping of indices. + """ + + # NOTE: due to a bug the beta term was applied to the wrong term. for + # backwards compatibility we use the buggy version by default, but you can + # specify legacy=False to fix it. + def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", + sane_index_shape=False, legacy=True): + super().__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + self.legacy = legacy + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + self.remap = remap + if self.remap is not None: + self.register_buffer("used", torch.tensor(np.load(self.remap))) + self.re_embed = self.used.shape[0] + self.unknown_index = unknown_index # "random" or "extra" or integer + if self.unknown_index == "extra": + self.unknown_index = self.re_embed + self.re_embed = self.re_embed + 1 + print(f"Remapping {self.n_e} indices to {self.re_embed} indices. " + f"Using {self.unknown_index} for unknown indices.") + else: + self.re_embed = n_e + + self.sane_index_shape = sane_index_shape + + def remap_to_used(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape(ishape[0], -1) + used = self.used.to(inds) + match = (inds[:, :, None] == used[None, None, ...]).long() + new = match.argmax(-1) + unknown = match.sum(2) < 1 + if self.unknown_index == "random": + new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device) + else: + new[unknown] = self.unknown_index + return new.reshape(ishape) + + def unmap_to_all(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape(ishape[0], -1) + used = self.used.to(inds) + if self.re_embed > self.used.shape[0]: # extra token + inds[inds >= self.used.shape[0]] = 0 # simply set to zero + back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds) + return back.reshape(ishape) + + def forward(self, z, temp=None, rescale_logits=False, return_logits=False): + assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel" + assert rescale_logits is False, "Only for interface compatible with Gumbel" + assert return_logits is False, "Only for interface compatible with Gumbel" + # reshape z -> (batch, height, width, channel) and flatten + z = rearrange(z, 'b c h w -> b h w c').contiguous() + z_flattened = z.view(-1, self.e_dim) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \ + torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \ + torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n')) + + min_encoding_indices = torch.argmin(d, dim=1) + z_q = self.embedding(min_encoding_indices).view(z.shape) + perplexity = None + min_encodings = None + + # compute loss for embedding + if not self.legacy: + loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \ + torch.mean((z_q - z.detach()) ** 2) + else: + loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \ + torch.mean((z_q - z.detach()) ** 2) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # reshape back to match original input shape + z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous() + + if self.remap is not None: + min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1) # add batch axis + min_encoding_indices = self.remap_to_used(min_encoding_indices) + min_encoding_indices = min_encoding_indices.reshape(-1, 1) # flatten + + if self.sane_index_shape: + min_encoding_indices = min_encoding_indices.reshape( + z_q.shape[0], z_q.shape[2], z_q.shape[3]) + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + if self.remap is not None: + indices = indices.reshape(shape[0], -1) # add batch axis + indices = self.unmap_to_all(indices) + indices = indices.reshape(-1) # flatten again + + # get quantized latent vectors + z_q = self.embedding(indices) + + if shape is not None: + z_q = z_q.view(shape) + # reshape back to match original input shape + z_q = z_q.permute(0, 3, 1, 2).contiguous() + + return z_q diff --git a/extensions-builtin/Lora/extra_networks_lora.py b/extensions-builtin/Lora/extra_networks_lora.py new file mode 100644 index 0000000000000000000000000000000000000000..88425009c7150f303b10bec8a42a3aa7a8c4ff93 --- /dev/null +++ b/extensions-builtin/Lora/extra_networks_lora.py @@ -0,0 +1,67 @@ +from modules import extra_networks, shared +import networks + + +class ExtraNetworkLora(extra_networks.ExtraNetwork): + def __init__(self): + super().__init__('lora') + + self.errors = {} + """mapping of network names to the number of errors the network had during operation""" + + def activate(self, p, params_list): + additional = shared.opts.sd_lora + + self.errors.clear() + + if additional != "None" and additional in networks.available_networks and not any(x for x in params_list if x.items[0] == additional): + p.all_prompts = [x + f"" for x in p.all_prompts] + params_list.append(extra_networks.ExtraNetworkParams(items=[additional, shared.opts.extra_networks_default_multiplier])) + + names = [] + te_multipliers = [] + unet_multipliers = [] + dyn_dims = [] + for params in params_list: + assert params.items + + names.append(params.positional[0]) + + te_multiplier = float(params.positional[1]) if len(params.positional) > 1 else 1.0 + te_multiplier = float(params.named.get("te", te_multiplier)) + + unet_multiplier = float(params.positional[2]) if len(params.positional) > 2 else te_multiplier + unet_multiplier = float(params.named.get("unet", unet_multiplier)) + + dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None + dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim + + te_multipliers.append(te_multiplier) + unet_multipliers.append(unet_multiplier) + dyn_dims.append(dyn_dim) + + networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) + + if shared.opts.lora_add_hashes_to_infotext: + network_hashes = [] + for item in networks.loaded_networks: + shorthash = item.network_on_disk.shorthash + if not shorthash: + continue + + alias = item.mentioned_name + if not alias: + continue + + alias = alias.replace(":", "").replace(",", "") + + network_hashes.append(f"{alias}: {shorthash}") + + if network_hashes: + p.extra_generation_params["Lora hashes"] = ", ".join(network_hashes) + + def deactivate(self, p): + if self.errors: + p.comment("Networks with errors: " + ", ".join(f"{k} ({v})" for k, v in self.errors.items())) + + self.errors.clear() diff --git a/extensions-builtin/Lora/lora.py b/extensions-builtin/Lora/lora.py new file mode 100644 index 0000000000000000000000000000000000000000..6186538e956e39c843a2a22a77c5ab53fdfec3c7 --- /dev/null +++ b/extensions-builtin/Lora/lora.py @@ -0,0 +1,9 @@ +import networks + +list_available_loras = networks.list_available_networks + +available_loras = networks.available_networks +available_lora_aliases = networks.available_network_aliases +available_lora_hash_lookup = networks.available_network_hash_lookup +forbidden_lora_aliases = networks.forbidden_network_aliases +loaded_loras = networks.loaded_networks diff --git a/extensions-builtin/Lora/lora_logger.py b/extensions-builtin/Lora/lora_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..d51de29704f72b80958dbabda021c6648aef8177 --- /dev/null +++ b/extensions-builtin/Lora/lora_logger.py @@ -0,0 +1,33 @@ +import sys +import copy +import logging + + +class ColoredFormatter(logging.Formatter): + COLORS = { + "DEBUG": "\033[0;36m", # CYAN + "INFO": "\033[0;32m", # GREEN + "WARNING": "\033[0;33m", # YELLOW + "ERROR": "\033[0;31m", # RED + "CRITICAL": "\033[0;37;41m", # WHITE ON RED + "RESET": "\033[0m", # RESET COLOR + } + + def format(self, record): + colored_record = copy.copy(record) + levelname = colored_record.levelname + seq = self.COLORS.get(levelname, self.COLORS["RESET"]) + colored_record.levelname = f"{seq}{levelname}{self.COLORS['RESET']}" + return super().format(colored_record) + + +logger = logging.getLogger("lora") +logger.propagate = False + + +if not logger.handlers: + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter( + ColoredFormatter("[%(name)s]-%(levelname)s: %(message)s") + ) + logger.addHandler(handler) diff --git a/extensions-builtin/Lora/lora_patches.py b/extensions-builtin/Lora/lora_patches.py new file mode 100644 index 0000000000000000000000000000000000000000..544cf4516b272a07f9b8fb85231eb2215ff41a2a --- /dev/null +++ b/extensions-builtin/Lora/lora_patches.py @@ -0,0 +1,6 @@ +class LoraPatches: + def __init__(self): + pass + + def undo(self): + pass diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py new file mode 100644 index 0000000000000000000000000000000000000000..5eb7de96bc86b854140d8e41e98a9571770cb1d2 --- /dev/null +++ b/extensions-builtin/Lora/network.py @@ -0,0 +1,190 @@ +from __future__ import annotations +import os +from collections import namedtuple +import enum + +import torch.nn as nn +import torch.nn.functional as F + +from modules import sd_models, cache, errors, hashes, shared + +NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module']) + +metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20} + + +class SdVersion(enum.Enum): + Unknown = 1 + SD1 = 2 + SD2 = 3 + SDXL = 4 + + +class NetworkOnDisk: + def __init__(self, name, filename): + self.name = name + self.filename = filename + self.metadata = {} + self.is_safetensors = os.path.splitext(filename)[1].lower() == ".safetensors" + + def read_metadata(): + metadata = sd_models.read_metadata_from_safetensors(filename) + metadata.pop('ssmd_cover_images', None) # those are cover images, and they are too big to display in UI as text + + return metadata + + if self.is_safetensors: + try: + self.metadata = cache.cached_data_for_file('safetensors-metadata', "lora/" + self.name, filename, read_metadata) + except Exception as e: + errors.display(e, f"reading lora {filename}") + + if self.metadata: + m = {} + for k, v in sorted(self.metadata.items(), key=lambda x: metadata_tags_order.get(x[0], 999)): + m[k] = v + + self.metadata = m + + self.alias = self.metadata.get('ss_output_name', self.name) + + self.hash = None + self.shorthash = None + self.set_hash( + self.metadata.get('sshs_model_hash') or + hashes.sha256_from_cache(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or + '' + ) + + self.sd_version = self.detect_version() + + def detect_version(self): + if str(self.metadata.get('ss_base_model_version', "")).startswith("sdxl_"): + return SdVersion.SDXL + elif str(self.metadata.get('ss_v2', "")) == "True": + return SdVersion.SD2 + elif len(self.metadata): + return SdVersion.SD1 + + return SdVersion.Unknown + + def set_hash(self, v): + self.hash = v + self.shorthash = self.hash[0:12] + + if self.shorthash: + import networks + networks.available_network_hash_lookup[self.shorthash] = self + + def read_hash(self): + if not self.hash: + self.set_hash(hashes.sha256(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or '') + + def get_alias(self): + import networks + if shared.opts.lora_preferred_name == "Filename" or self.alias.lower() in networks.forbidden_network_aliases: + return self.name + else: + return self.alias + + +class Network: # LoraModule + def __init__(self, name, network_on_disk: NetworkOnDisk): + self.name = name + self.network_on_disk = network_on_disk + self.te_multiplier = 1.0 + self.unet_multiplier = 1.0 + self.dyn_dim = None + self.modules = {} + self.bundle_embeddings = {} + self.mtime = None + + self.mentioned_name = None + """the text that was used to add the network to prompt - can be either name or an alias""" + + +class ModuleType: + def create_module(self, net: Network, weights: NetworkWeights) -> Network | None: + return None + + +class NetworkModule: + def __init__(self, net: Network, weights: NetworkWeights): + self.network = net + self.network_key = weights.network_key + self.sd_key = weights.sd_key + self.sd_module = weights.sd_module + + if hasattr(self.sd_module, 'weight'): + self.shape = self.sd_module.weight.shape + + self.ops = None + self.extra_kwargs = {} + if isinstance(self.sd_module, nn.Conv2d): + self.ops = F.conv2d + self.extra_kwargs = { + 'stride': self.sd_module.stride, + 'padding': self.sd_module.padding + } + elif isinstance(self.sd_module, nn.Linear): + self.ops = F.linear + elif isinstance(self.sd_module, nn.LayerNorm): + self.ops = F.layer_norm + self.extra_kwargs = { + 'normalized_shape': self.sd_module.normalized_shape, + 'eps': self.sd_module.eps + } + elif isinstance(self.sd_module, nn.GroupNorm): + self.ops = F.group_norm + self.extra_kwargs = { + 'num_groups': self.sd_module.num_groups, + 'eps': self.sd_module.eps + } + + self.dim = None + self.bias = weights.w.get("bias") + self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None + self.scale = weights.w["scale"].item() if "scale" in weights.w else None + + def multiplier(self): + if 'transformer' in self.sd_key[:20]: + return self.network.te_multiplier + else: + return self.network.unet_multiplier + + def calc_scale(self): + if self.scale is not None: + return self.scale + if self.dim is not None and self.alpha is not None: + return self.alpha / self.dim + + return 1.0 + + def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None): + if self.bias is not None: + updown = updown.reshape(self.bias.shape) + updown += self.bias.to(orig_weight.device, dtype=updown.dtype) + updown = updown.reshape(output_shape) + + if len(output_shape) == 4: + updown = updown.reshape(output_shape) + + if orig_weight.size().numel() == updown.size().numel(): + updown = updown.reshape(orig_weight.shape) + + if ex_bias is not None: + ex_bias = ex_bias * self.multiplier() + + return updown * self.calc_scale() * self.multiplier(), ex_bias + + def calc_updown(self, target): + raise NotImplementedError() + + def forward(self, x, y): + """A general forward implementation for all modules""" + if self.ops is None: + raise NotImplementedError() + else: + updown, ex_bias = self.calc_updown(self.sd_module.weight) + return y + self.ops(x, weight=updown, bias=ex_bias, **self.extra_kwargs) + diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..0ad0b0194bea9dc54d00009bebc1e31becd530da --- /dev/null +++ b/extensions-builtin/Lora/networks.py @@ -0,0 +1,219 @@ +import os +import re + +import lora_patches +import functools +import network + +import torch +from typing import Union + +from modules import shared, sd_models, errors, scripts +from ldm_patched.modules.utils import load_torch_file +from ldm_patched.modules.sd import load_lora_for_models + + +@functools.lru_cache(maxsize=5) +def load_lora_state_dict(filename): + return load_torch_file(filename, safe_load=True) + + +def convert_diffusers_name_to_compvis(key, is_sd2): + pass + + +def assign_network_names_to_compvis_modules(sd_model): + pass + + +def load_network(name, network_on_disk): + net = network.Network(name, network_on_disk) + net.mtime = os.path.getmtime(network_on_disk.filename) + + return net + + +def purge_networks_from_memory(): + pass + + +def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None): + global lora_state_dict_cache + + current_sd = sd_models.model_data.get_sd_model() + if current_sd is None: + return + + loaded_networks.clear() + + networks_on_disk = [available_networks.get(name, None) if name.lower() in forbidden_network_aliases else available_network_aliases.get(name, None) for name in names] + if any(x is None for x in networks_on_disk): + list_available_networks() + networks_on_disk = [available_networks.get(name, None) if name.lower() in forbidden_network_aliases else available_network_aliases.get(name, None) for name in names] + + for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)): + try: + net = load_network(name, network_on_disk) + except Exception as e: + errors.display(e, f"loading network {network_on_disk.filename}") + continue + net.mentioned_name = name + network_on_disk.read_hash() + loaded_networks.append(net) + + compiled_lora_targets = [] + for a, b, c in zip(networks_on_disk, unet_multipliers, te_multipliers): + compiled_lora_targets.append([a.filename, b, c]) + + compiled_lora_targets_hash = str(compiled_lora_targets) + + if current_sd.current_lora_hash == compiled_lora_targets_hash: + return + + current_sd.current_lora_hash = compiled_lora_targets_hash + current_sd.forge_objects.unet = current_sd.forge_objects_original.unet + current_sd.forge_objects.clip = current_sd.forge_objects_original.clip + + for filename, strength_model, strength_clip in compiled_lora_targets: + lora_sd = load_lora_state_dict(filename) + current_sd.forge_objects.unet, current_sd.forge_objects.clip = load_lora_for_models( + current_sd.forge_objects.unet, current_sd.forge_objects.clip, lora_sd, strength_model, strength_clip, + filename=filename) + + current_sd.forge_objects_after_applying_lora = current_sd.forge_objects.shallow_copy() + return + + +def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, torch.nn.MultiheadAttention]): + pass + + +def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, torch.nn.MultiheadAttention]): + pass + + +def network_forward(org_module, input, original_forward): + pass + + +def network_reset_cached_weight(self: Union[torch.nn.Conv2d, torch.nn.Linear]): + pass + + +def network_Linear_forward(self, input): + pass + + +def network_Linear_load_state_dict(self, *args, **kwargs): + pass + + +def network_Conv2d_forward(self, input): + pass + + +def network_Conv2d_load_state_dict(self, *args, **kwargs): + pass + + +def network_GroupNorm_forward(self, input): + pass + + +def network_GroupNorm_load_state_dict(self, *args, **kwargs): + pass + + +def network_LayerNorm_forward(self, input): + pass + + +def network_LayerNorm_load_state_dict(self, *args, **kwargs): + pass + + +def network_MultiheadAttention_forward(self, *args, **kwargs): + pass + + +def network_MultiheadAttention_load_state_dict(self, *args, **kwargs): + pass + + +def list_available_networks(): + available_networks.clear() + available_network_aliases.clear() + forbidden_network_aliases.clear() + available_network_hash_lookup.clear() + forbidden_network_aliases.update({"none": 1, "Addams": 1}) + + os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True) + + candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"])) + for filename in candidates: + if os.path.isdir(filename): + continue + + name = os.path.splitext(os.path.basename(filename))[0] + try: + entry = network.NetworkOnDisk(name, filename) + except OSError: # should catch FileNotFoundError and PermissionError etc. + errors.report(f"Failed to load network {name} from {filename}", exc_info=True) + continue + + available_networks[name] = entry + + if entry.alias in available_network_aliases: + forbidden_network_aliases[entry.alias.lower()] = 1 + + available_network_aliases[name] = entry + available_network_aliases[entry.alias] = entry + + +re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)") + + +def infotext_pasted(infotext, params): + if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]: + return # if the other extension is active, it will handle those fields, no need to do anything + + added = [] + + for k in params: + if not k.startswith("AddNet Model "): + continue + + num = k[13:] + + if params.get("AddNet Module " + num) != "LoRA": + continue + + name = params.get("AddNet Model " + num) + if name is None: + continue + + m = re_network_name.match(name) + if m: + name = m.group(1) + + multiplier = params.get("AddNet Weight A " + num, "1.0") + + added.append(f"") + + if added: + params["Prompt"] += "\n" + "".join(added) + + +originals: lora_patches.LoraPatches = None + +extra_network_lora = None + +available_networks = {} +available_network_aliases = {} +loaded_networks = [] +loaded_bundle_embeddings = {} +networks_in_memory = {} +available_network_hash_lookup = {} +forbidden_network_aliases = {} + +list_available_networks() diff --git a/extensions-builtin/Lora/preload.py b/extensions-builtin/Lora/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..763f9421c28905d7763370e8075fd3b3250f3524 --- /dev/null +++ b/extensions-builtin/Lora/preload.py @@ -0,0 +1,8 @@ +import os +from modules import paths +from modules.paths_internal import normalized_filepath + + +def preload(parser): + parser.add_argument("--lora-dir", type=normalized_filepath, help="Path to directory with Lora networks.", default=os.path.join(paths.models_path, 'Lora')) + parser.add_argument("--lyco-dir-backcompat", type=normalized_filepath, help="Path to directory with LyCORIS networks (for backawards compatibility; can also use --lyco-dir).", default=os.path.join(paths.models_path, 'LyCORIS')) diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9f9f57b261a492db676b09926ef05306911974 --- /dev/null +++ b/extensions-builtin/Lora/scripts/lora_script.py @@ -0,0 +1,100 @@ +import re + +import gradio as gr +from fastapi import FastAPI + +import network +import networks +import lora # noqa:F401 +import lora_patches +import extra_networks_lora +import ui_extra_networks_lora +from modules import script_callbacks, ui_extra_networks, extra_networks, shared + + +def unload(): + networks.originals.undo() + + +def before_ui(): + ui_extra_networks.register_page(ui_extra_networks_lora.ExtraNetworksPageLora()) + + networks.extra_network_lora = extra_networks_lora.ExtraNetworkLora() + extra_networks.register_extra_network(networks.extra_network_lora) + + +networks.originals = lora_patches.LoraPatches() + +script_callbacks.on_model_loaded(networks.assign_network_names_to_compvis_modules) +script_callbacks.on_script_unloaded(unload) +script_callbacks.on_before_ui(before_ui) +script_callbacks.on_infotext_pasted(networks.infotext_pasted) + + +shared.options_templates.update(shared.options_section(('extra_networks', "Extra Networks"), { + "sd_lora": shared.OptionInfo("None", "Add network to prompt", gr.Dropdown, lambda: {"choices": ["None", *networks.available_networks]}, refresh=networks.list_available_networks), + "lora_preferred_name": shared.OptionInfo("Alias from file", "When adding to prompt, refer to Lora by", gr.Radio, {"choices": ["Alias from file", "Filename"]}), + "lora_add_hashes_to_infotext": shared.OptionInfo(True, "Add Lora hashes to infotext"), + "lora_show_all": shared.OptionInfo(False, "Always show all networks on the Lora page").info("otherwise, those detected as for incompatible version of Stable Diffusion will be hidden"), + "lora_hide_unknown_for_versions": shared.OptionInfo([], "Hide networks of unknown versions for model versions", gr.CheckboxGroup, {"choices": ["SD1", "SD2", "SDXL"]}), + "lora_in_memory_limit": shared.OptionInfo(0, "Number of Lora networks to keep cached in memory", gr.Number, {"precision": 0}), + "lora_not_found_warning_console": shared.OptionInfo(False, "Lora not found warning in console"), + "lora_not_found_gradio_warning": shared.OptionInfo(False, "Lora not found warning popup in webui"), +})) + + +shared.options_templates.update(shared.options_section(('compatibility', "Compatibility"), { + "lora_functional": shared.OptionInfo(False, "Lora/Networks: use old method that takes longer when you have multiple Loras active and produces same results as kohya-ss/sd-webui-additional-networks extension"), +})) + + +def create_lora_json(obj: network.NetworkOnDisk): + return { + "name": obj.name, + "alias": obj.alias, + "path": obj.filename, + "metadata": obj.metadata, + } + + +def api_networks(_: gr.Blocks, app: FastAPI): + @app.get("/sdapi/v1/loras") + async def get_loras(): + return [create_lora_json(obj) for obj in networks.available_networks.values()] + + @app.post("/sdapi/v1/refresh-loras") + async def refresh_loras(): + return networks.list_available_networks() + + +script_callbacks.on_app_started(api_networks) + +re_lora = re.compile("= 16 + + +re_word = re.compile(r"[-_\w']+") +re_comma = re.compile(r" *, *") + + +def build_tags(metadata): + tags = {} + + for _, tags_dict in metadata.get("ss_tag_frequency", {}).items(): + for tag, tag_count in tags_dict.items(): + tag = tag.strip() + tags[tag] = tags.get(tag, 0) + int(tag_count) + + if tags and is_non_comma_tagset(tags): + new_tags = {} + + for text, text_count in tags.items(): + for word in re.findall(re_word, text): + if len(word) < 3: + continue + + new_tags[word] = new_tags.get(word, 0) + text_count + + tags = new_tags + + ordered_tags = sorted(tags.keys(), key=tags.get, reverse=True) + + return [(tag, tags[tag]) for tag in ordered_tags] + + +class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor): + def __init__(self, ui, tabname, page): + super().__init__(ui, tabname, page) + + self.select_sd_version = None + + self.taginfo = None + self.edit_activation_text = None + self.slider_preferred_weight = None + self.edit_notes = None + + def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, negative_text, notes): + user_metadata = self.get_user_metadata(name) + user_metadata["description"] = desc + user_metadata["sd version"] = sd_version + user_metadata["activation text"] = activation_text + user_metadata["preferred weight"] = preferred_weight + user_metadata["negative text"] = negative_text + user_metadata["notes"] = notes + + self.write_user_metadata(name, user_metadata) + + def get_metadata_table(self, name): + table = super().get_metadata_table(name) + item = self.page.items.get(name, {}) + metadata = item.get("metadata") or {} + + keys = { + 'ss_output_name': "Output name:", + 'ss_sd_model_name': "Model:", + 'ss_clip_skip': "Clip skip:", + 'ss_network_module': "Kohya module:", + } + + for key, label in keys.items(): + value = metadata.get(key, None) + if value is not None and str(value) != "None": + table.append((label, html.escape(value))) + + ss_training_started_at = metadata.get('ss_training_started_at') + if ss_training_started_at: + table.append(("Date trained:", datetime.datetime.utcfromtimestamp(float(ss_training_started_at)).strftime('%Y-%m-%d %H:%M'))) + + ss_bucket_info = metadata.get("ss_bucket_info") + if ss_bucket_info and "buckets" in ss_bucket_info: + resolutions = {} + for _, bucket in ss_bucket_info["buckets"].items(): + resolution = bucket["resolution"] + resolution = f'{resolution[1]}x{resolution[0]}' + + resolutions[resolution] = resolutions.get(resolution, 0) + int(bucket["count"]) + + resolutions_list = sorted(resolutions.keys(), key=resolutions.get, reverse=True) + resolutions_text = html.escape(", ".join(resolutions_list[0:4])) + if len(resolutions) > 4: + resolutions_text += ", ..." + resolutions_text = f"{resolutions_text}" + + table.append(('Resolutions:' if len(resolutions_list) > 1 else 'Resolution:', resolutions_text)) + + image_count = 0 + for _, params in metadata.get("ss_dataset_dirs", {}).items(): + image_count += int(params.get("img_count", 0)) + + if image_count: + table.append(("Dataset size:", image_count)) + + return table + + def put_values_into_components(self, name): + user_metadata = self.get_user_metadata(name) + values = super().put_values_into_components(name) + + item = self.page.items.get(name, {}) + metadata = item.get("metadata") or {} + + tags = build_tags(metadata) + gradio_tags = [(tag, str(count)) for tag, count in tags[0:24]] + + return [ + *values[0:5], + item.get("sd_version", "Unknown"), + gr.HighlightedText.update(value=gradio_tags, visible=True if tags else False), + user_metadata.get('activation text', ''), + float(user_metadata.get('preferred weight', 0.0)), + user_metadata.get('negative text', ''), + gr.update(visible=True if tags else False), + gr.update(value=self.generate_random_prompt_from_tags(tags), visible=True if tags else False), + ] + + def generate_random_prompt(self, name): + item = self.page.items.get(name, {}) + metadata = item.get("metadata") or {} + tags = build_tags(metadata) + + return self.generate_random_prompt_from_tags(tags) + + def generate_random_prompt_from_tags(self, tags): + max_count = None + res = [] + for tag, count in tags: + if not max_count: + max_count = count + + v = random.random() * max_count + if count > v: + res.append(tag) + + return ", ".join(sorted(res)) + + def create_extra_default_items_in_left_column(self): + + # this would be a lot better as gr.Radio but I can't make it work + self.select_sd_version = gr.Dropdown(['SD1', 'SD2', 'SDXL', 'Unknown'], value='Unknown', label='Stable Diffusion version', interactive=True) + + def create_editor(self): + self.create_default_editor_elems() + + self.taginfo = gr.HighlightedText(label="Training dataset tags") + self.edit_activation_text = gr.Text(label='Activation text', info="Will be added to prompt along with Lora") + self.slider_preferred_weight = gr.Slider(label='Preferred weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01) + self.edit_negative_text = gr.Text(label='Negative prompt', info="Will be added to negative prompts") + with gr.Row() as row_random_prompt: + with gr.Column(scale=8): + random_prompt = gr.Textbox(label='Random prompt', lines=4, max_lines=4, interactive=False) + + with gr.Column(scale=1, min_width=120): + generate_random_prompt = gr.Button('Generate', size="lg", scale=1) + + self.edit_notes = gr.TextArea(label='Notes', lines=4) + + generate_random_prompt.click(fn=self.generate_random_prompt, inputs=[self.edit_name_input], outputs=[random_prompt], show_progress=False) + + def select_tag(activation_text, evt: gr.SelectData): + tag = evt.value[0] + + words = re.split(re_comma, activation_text) + if tag in words: + words = [x for x in words if x != tag and x.strip()] + return ", ".join(words) + + return activation_text + ", " + tag if activation_text else tag + + self.taginfo.select(fn=select_tag, inputs=[self.edit_activation_text], outputs=[self.edit_activation_text], show_progress=False) + + self.create_default_buttons() + + viewed_components = [ + self.edit_name, + self.edit_description, + self.html_filedata, + self.html_preview, + self.edit_notes, + self.select_sd_version, + self.taginfo, + self.edit_activation_text, + self.slider_preferred_weight, + self.edit_negative_text, + row_random_prompt, + random_prompt, + ] + + self.button_edit\ + .click(fn=self.put_values_into_components, inputs=[self.edit_name_input], outputs=viewed_components)\ + .then(fn=lambda: gr.update(visible=True), inputs=[], outputs=[self.box]) + + edited_components = [ + self.edit_description, + self.select_sd_version, + self.edit_activation_text, + self.slider_preferred_weight, + self.edit_negative_text, + self.edit_notes, + ] + + + self.setup_save_handler(self.button_save, self.save_lora_user_metadata, edited_components) diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py new file mode 100644 index 0000000000000000000000000000000000000000..3425c4e816c03623aefe673b3c1a73d2774f81e0 --- /dev/null +++ b/extensions-builtin/Lora/ui_extra_networks_lora.py @@ -0,0 +1,90 @@ +import os + +import network +import networks + +from modules import shared, ui_extra_networks +from modules.ui_extra_networks import quote_js +from ui_edit_user_metadata import LoraUserMetadataEditor + + +class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage): + def __init__(self): + super().__init__('Lora') + + def refresh(self): + networks.list_available_networks() + + def create_item(self, name, index=None, enable_filter=True): + lora_on_disk = networks.available_networks.get(name) + if lora_on_disk is None: + return + + path, ext = os.path.splitext(lora_on_disk.filename) + + alias = lora_on_disk.get_alias() + + search_terms = [self.search_terms_from_path(lora_on_disk.filename)] + if lora_on_disk.hash: + search_terms.append(lora_on_disk.hash) + item = { + "name": name, + "filename": lora_on_disk.filename, + "shorthash": lora_on_disk.shorthash, + "preview": self.find_preview(path), + "description": self.find_description(path), + "search_terms": search_terms, + "local_preview": f"{path}.{shared.opts.samples_format}", + "metadata": lora_on_disk.metadata, + "sort_keys": {'default': index, **self.get_sort_keys(lora_on_disk.filename)}, + "sd_version": lora_on_disk.sd_version.name, + } + + self.read_user_metadata(item) + activation_text = item["user_metadata"].get("activation text") + preferred_weight = item["user_metadata"].get("preferred weight", 0.0) + item["prompt"] = quote_js(f"") + + if activation_text: + item["prompt"] += " + " + quote_js(" " + activation_text) + + negative_prompt = item["user_metadata"].get("negative text") + item["negative_prompt"] = quote_js("") + if negative_prompt: + item["negative_prompt"] = quote_js('(' + negative_prompt + ':1)') + + sd_version = item["user_metadata"].get("sd version") + if sd_version in network.SdVersion.__members__: + item["sd_version"] = sd_version + sd_version = network.SdVersion[sd_version] + else: + sd_version = lora_on_disk.sd_version + + if shared.opts.lora_show_all or not enable_filter: + pass + elif sd_version == network.SdVersion.Unknown: + model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1 + if model_version.name in shared.opts.lora_hide_unknown_for_versions: + return None + elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL: + return None + elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2: + return None + elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1: + return None + + return item + + def list_items(self): + # instantiate a list to protect against concurrent modification + names = list(networks.available_networks) + for index, name in enumerate(names): + item = self.create_item(name, index) + if item is not None: + yield item + + def allowed_directories_for_previews(self): + return [shared.cmd_opts.lora_dir] + + def create_user_metadata_editor(self, ui, tabname): + return LoraUserMetadataEditor(ui, tabname, self) diff --git a/extensions-builtin/ScuNET/preload.py b/extensions-builtin/ScuNET/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce82b1d4349b24192b1915d022ed4fda9f31e5c --- /dev/null +++ b/extensions-builtin/ScuNET/preload.py @@ -0,0 +1,6 @@ +import os +from modules import paths + + +def preload(parser): + parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(paths.models_path, 'ScuNET')) diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..fe5e5a19265cfc3eb2527abbff04dc79a74feb7e --- /dev/null +++ b/extensions-builtin/ScuNET/scripts/scunet_model.py @@ -0,0 +1,74 @@ +import sys + +import PIL.Image + +import modules.upscaler +from modules import devices, errors, modelloader, script_callbacks, shared, upscaler_utils + + +class UpscalerScuNET(modules.upscaler.Upscaler): + def __init__(self, dirname): + self.name = "ScuNET" + self.model_name = "ScuNET GAN" + self.model_name2 = "ScuNET PSNR" + self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_gan.pth" + self.model_url2 = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_psnr.pth" + self.user_path = dirname + super().__init__() + model_paths = self.find_models(ext_filter=[".pth"]) + scalers = [] + add_model2 = True + for file in model_paths: + if file.startswith("http"): + name = self.model_name + else: + name = modelloader.friendly_name(file) + if name == self.model_name2 or file == self.model_url2: + add_model2 = False + try: + scaler_data = modules.upscaler.UpscalerData(name, file, self, 4) + scalers.append(scaler_data) + except Exception: + errors.report(f"Error loading ScuNET model: {file}", exc_info=True) + if add_model2: + scaler_data2 = modules.upscaler.UpscalerData(self.model_name2, self.model_url2, self) + scalers.append(scaler_data2) + self.scalers = scalers + + def do_upscale(self, img: PIL.Image.Image, selected_file): + devices.torch_gc() + try: + model = self.load_model(selected_file) + except Exception as e: + print(f"ScuNET: Unable to load model from {selected_file}: {e}", file=sys.stderr) + return img + + img = upscaler_utils.upscale_2( + img, + model, + tile_size=shared.opts.SCUNET_tile, + tile_overlap=shared.opts.SCUNET_tile_overlap, + scale=1, # ScuNET is a denoising model, not an upscaler + desc='ScuNET', + ) + devices.torch_gc() + return img + + def load_model(self, path: str): + device = devices.get_device_for('scunet') + if path.startswith("http"): + # TODO: this doesn't use `path` at all? + filename = modelloader.load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name=f"{self.name}.pth") + else: + filename = path + return modelloader.load_spandrel_model(filename, device=device, expected_architecture='SCUNet') + + +def on_ui_settings(): + import gradio as gr + + shared.opts.add_option("SCUNET_tile", shared.OptionInfo(256, "Tile size for SCUNET upscalers.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, section=('upscaling', "Upscaling")).info("0 = no tiling")) + shared.opts.add_option("SCUNET_tile_overlap", shared.OptionInfo(8, "Tile overlap for SCUNET upscalers.", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, section=('upscaling', "Upscaling")).info("Low values = visible seam")) + + +script_callbacks.on_ui_settings(on_ui_settings) diff --git a/extensions-builtin/SwinIR/preload.py b/extensions-builtin/SwinIR/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..e912c6402bc80faa797cf2e95183101fb9a10286 --- /dev/null +++ b/extensions-builtin/SwinIR/preload.py @@ -0,0 +1,6 @@ +import os +from modules import paths + + +def preload(parser): + parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(paths.models_path, 'SwinIR')) diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3b51ee8603cde269a4da75ece61b50260d61e0bd --- /dev/null +++ b/extensions-builtin/SwinIR/scripts/swinir_model.py @@ -0,0 +1,98 @@ +import logging +import sys + +import torch +from PIL import Image + +from modules import devices, modelloader, script_callbacks, shared, upscaler_utils +from modules.upscaler import Upscaler, UpscalerData +from modules_forge.forge_util import prepare_free_memory + +SWINIR_MODEL_URL = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth" + +logger = logging.getLogger(__name__) + + +class UpscalerSwinIR(Upscaler): + def __init__(self, dirname): + self._cached_model = None # keep the model when SWIN_torch_compile is on to prevent re-compile every runs + self._cached_model_config = None # to clear '_cached_model' when changing model (v1/v2) or settings + self.name = "SwinIR" + self.model_url = SWINIR_MODEL_URL + self.model_name = "SwinIR 4x" + self.user_path = dirname + super().__init__() + scalers = [] + model_files = self.find_models(ext_filter=[".pt", ".pth"]) + for model in model_files: + if model.startswith("http"): + name = self.model_name + else: + name = modelloader.friendly_name(model) + model_data = UpscalerData(name, model, self) + scalers.append(model_data) + self.scalers = scalers + + def do_upscale(self, img: Image.Image, model_file: str) -> Image.Image: + prepare_free_memory() + + current_config = (model_file, shared.opts.SWIN_tile) + + if self._cached_model_config == current_config: + model = self._cached_model + else: + try: + model = self.load_model(model_file) + except Exception as e: + print(f"Failed loading SwinIR model {model_file}: {e}", file=sys.stderr) + return img + self._cached_model = model + self._cached_model_config = current_config + + img = upscaler_utils.upscale_2( + img, + model, + tile_size=shared.opts.SWIN_tile, + tile_overlap=shared.opts.SWIN_tile_overlap, + scale=model.scale, + desc="SwinIR", + ) + devices.torch_gc() + return img + + def load_model(self, path, scale=4): + if path.startswith("http"): + filename = modelloader.load_file_from_url( + url=path, + model_dir=self.model_download_path, + file_name=f"{self.model_name.replace(' ', '_')}.pth", + ) + else: + filename = path + + model_descriptor = modelloader.load_spandrel_model( + filename, + device=self._get_device(), + prefer_half=(devices.dtype == torch.float16), + expected_architecture="SwinIR", + ) + if getattr(shared.opts, 'SWIN_torch_compile', False): + try: + model_descriptor.model.compile() + except Exception: + logger.warning("Failed to compile SwinIR model, fallback to JIT", exc_info=True) + return model_descriptor + + def _get_device(self): + return devices.get_device_for('swinir') + + +def on_ui_settings(): + import gradio as gr + + shared.opts.add_option("SWIN_tile", shared.OptionInfo(192, "Tile size for all SwinIR.", gr.Slider, {"minimum": 16, "maximum": 512, "step": 16}, section=('upscaling', "Upscaling"))) + shared.opts.add_option("SWIN_tile_overlap", shared.OptionInfo(8, "Tile overlap, in pixels for SwinIR. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}, section=('upscaling', "Upscaling"))) + shared.opts.add_option("SWIN_torch_compile", shared.OptionInfo(False, "Use torch.compile to accelerate SwinIR.", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling")).info("Takes longer on first run")) + + +script_callbacks.on_ui_settings(on_ui_settings) diff --git a/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js b/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js new file mode 100644 index 0000000000000000000000000000000000000000..df60c1a177557fe4f5e5896f5f1e566778b63e3c --- /dev/null +++ b/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js @@ -0,0 +1,968 @@ +onUiLoaded(async() => { + const elementIDs = { + img2imgTabs: "#mode_img2img .tab-nav", + inpaint: "#img2maskimg", + inpaintSketch: "#inpaint_sketch", + rangeGroup: "#img2img_column_size", + sketch: "#img2img_sketch" + }; + const tabNameToElementId = { + "Inpaint sketch": elementIDs.inpaintSketch, + "Inpaint": elementIDs.inpaint, + "Sketch": elementIDs.sketch + }; + + + // Helper functions + // Get active tab + + /** + * Waits for an element to be present in the DOM. + */ + const waitForElement = (id) => new Promise(resolve => { + const checkForElement = () => { + const element = document.querySelector(id); + if (element) return resolve(element); + setTimeout(checkForElement, 100); + }; + checkForElement(); + }); + + function getActiveTab(elements, all = false) { + const tabs = elements.img2imgTabs.querySelectorAll("button"); + + if (all) return tabs; + + for (let tab of tabs) { + if (tab.classList.contains("selected")) { + return tab; + } + } + } + + // Get tab ID + function getTabId(elements) { + const activeTab = getActiveTab(elements); + return tabNameToElementId[activeTab.innerText]; + } + + // Wait until opts loaded + async function waitForOpts() { + for (; ;) { + if (window.opts && Object.keys(window.opts).length) { + return window.opts; + } + await new Promise(resolve => setTimeout(resolve, 100)); + } + } + + // Detect whether the element has a horizontal scroll bar + function hasHorizontalScrollbar(element) { + return element.scrollWidth > element.clientWidth; + } + + // Function for defining the "Ctrl", "Shift" and "Alt" keys + function isModifierKey(event, key) { + switch (key) { + case "Ctrl": + return event.ctrlKey; + case "Shift": + return event.shiftKey; + case "Alt": + return event.altKey; + default: + return false; + } + } + + // Check if hotkey is valid + function isValidHotkey(value) { + const specialKeys = ["Ctrl", "Alt", "Shift", "Disable"]; + return ( + (typeof value === "string" && + value.length === 1 && + /[a-z]/i.test(value)) || + specialKeys.includes(value) + ); + } + + // Normalize hotkey + function normalizeHotkey(hotkey) { + return hotkey.length === 1 ? "Key" + hotkey.toUpperCase() : hotkey; + } + + // Format hotkey for display + function formatHotkeyForDisplay(hotkey) { + return hotkey.startsWith("Key") ? hotkey.slice(3) : hotkey; + } + + // Create hotkey configuration with the provided options + function createHotkeyConfig(defaultHotkeysConfig, hotkeysConfigOpts) { + const result = {}; // Resulting hotkey configuration + const usedKeys = new Set(); // Set of used hotkeys + + // Iterate through defaultHotkeysConfig keys + for (const key in defaultHotkeysConfig) { + const userValue = hotkeysConfigOpts[key]; // User-provided hotkey value + const defaultValue = defaultHotkeysConfig[key]; // Default hotkey value + + // Apply appropriate value for undefined, boolean, or object userValue + if ( + userValue === undefined || + typeof userValue === "boolean" || + typeof userValue === "object" || + userValue === "disable" + ) { + result[key] = + userValue === undefined ? defaultValue : userValue; + } else if (isValidHotkey(userValue)) { + const normalizedUserValue = normalizeHotkey(userValue); + + // Check for conflicting hotkeys + if (!usedKeys.has(normalizedUserValue)) { + usedKeys.add(normalizedUserValue); + result[key] = normalizedUserValue; + } else { + console.error( + `Hotkey: ${formatHotkeyForDisplay( + userValue + )} for ${key} is repeated and conflicts with another hotkey. The default hotkey is used: ${formatHotkeyForDisplay( + defaultValue + )}` + ); + result[key] = defaultValue; + } + } else { + console.error( + `Hotkey: ${formatHotkeyForDisplay( + userValue + )} for ${key} is not valid. The default hotkey is used: ${formatHotkeyForDisplay( + defaultValue + )}` + ); + result[key] = defaultValue; + } + } + + return result; + } + + // Disables functions in the config object based on the provided list of function names + function disableFunctions(config, disabledFunctions) { + // Bind the hasOwnProperty method to the functionMap object to avoid errors + const hasOwnProperty = + Object.prototype.hasOwnProperty.bind(functionMap); + + // Loop through the disabledFunctions array and disable the corresponding functions in the config object + disabledFunctions.forEach(funcName => { + if (hasOwnProperty(funcName)) { + const key = functionMap[funcName]; + config[key] = "disable"; + } + }); + + // Return the updated config object + return config; + } + + /** + * The restoreImgRedMask function displays a red mask around an image to indicate the aspect ratio. + * If the image display property is set to 'none', the mask breaks. To fix this, the function + * temporarily sets the display property to 'block' and then hides the mask again after 300 milliseconds + * to avoid breaking the canvas. Additionally, the function adjusts the mask to work correctly on + * very long images. + */ + function restoreImgRedMask(elements) { + const mainTabId = getTabId(elements); + + if (!mainTabId) return; + + const mainTab = gradioApp().querySelector(mainTabId); + const img = mainTab.querySelector("img"); + const imageARPreview = gradioApp().querySelector("#imageARPreview"); + + if (!img || !imageARPreview) return; + + imageARPreview.style.transform = ""; + if (parseFloat(mainTab.style.width) > 865) { + const transformString = mainTab.style.transform; + const scaleMatch = transformString.match( + /scale\(([-+]?[0-9]*\.?[0-9]+)\)/ + ); + let zoom = 1; // default zoom + + if (scaleMatch && scaleMatch[1]) { + zoom = Number(scaleMatch[1]); + } + + imageARPreview.style.transformOrigin = "0 0"; + imageARPreview.style.transform = `scale(${zoom})`; + } + + if (img.style.display !== "none") return; + + img.style.display = "block"; + + setTimeout(() => { + img.style.display = "none"; + }, 400); + } + + const hotkeysConfigOpts = await waitForOpts(); + + // Default config + const defaultHotkeysConfig = { + canvas_hotkey_zoom: "Alt", + canvas_hotkey_adjust: "Ctrl", + canvas_hotkey_reset: "KeyR", + canvas_hotkey_fullscreen: "KeyS", + canvas_hotkey_move: "KeyF", + canvas_hotkey_overlap: "KeyO", + canvas_hotkey_shrink_brush: "KeyQ", + canvas_hotkey_grow_brush: "KeyW", + canvas_disabled_functions: [], + canvas_show_tooltip: true, + canvas_auto_expand: true, + canvas_blur_prompt: false, + }; + + const functionMap = { + "Zoom": "canvas_hotkey_zoom", + "Adjust brush size": "canvas_hotkey_adjust", + "Hotkey shrink brush": "canvas_hotkey_shrink_brush", + "Hotkey enlarge brush": "canvas_hotkey_grow_brush", + "Moving canvas": "canvas_hotkey_move", + "Fullscreen": "canvas_hotkey_fullscreen", + "Reset Zoom": "canvas_hotkey_reset", + "Overlap": "canvas_hotkey_overlap" + }; + + // Loading the configuration from opts + const preHotkeysConfig = createHotkeyConfig( + defaultHotkeysConfig, + hotkeysConfigOpts + ); + + // Disable functions that are not needed by the user + const hotkeysConfig = disableFunctions( + preHotkeysConfig, + preHotkeysConfig.canvas_disabled_functions + ); + + let isMoving = false; + let mouseX, mouseY; + let activeElement; + + const elements = Object.fromEntries( + Object.keys(elementIDs).map(id => [ + id, + gradioApp().querySelector(elementIDs[id]) + ]) + ); + const elemData = {}; + + // Apply functionality to the range inputs. Restore redmask and correct for long images. + const rangeInputs = elements.rangeGroup ? + Array.from(elements.rangeGroup.querySelectorAll("input")) : + [ + gradioApp().querySelector("#img2img_width input[type='range']"), + gradioApp().querySelector("#img2img_height input[type='range']") + ]; + + for (const input of rangeInputs) { + input?.addEventListener("input", () => restoreImgRedMask(elements)); + } + + function applyZoomAndPan(elemId, isExtension = true) { + const targetElement = gradioApp().querySelector(elemId); + + if (!targetElement) { + console.log("Element not found"); + return; + } + + targetElement.style.transformOrigin = "0 0"; + + elemData[elemId] = { + zoom: 1, + panX: 0, + panY: 0 + }; + let fullScreenMode = false; + + // Create tooltip + function createTooltip() { + const toolTipElemnt = + targetElement.querySelector(".image-container"); + const tooltip = document.createElement("div"); + tooltip.className = "canvas-tooltip"; + + // Creating an item of information + const info = document.createElement("i"); + info.className = "canvas-tooltip-info"; + info.textContent = ""; + + // Create a container for the contents of the tooltip + const tooltipContent = document.createElement("div"); + tooltipContent.className = "canvas-tooltip-content"; + + // Define an array with hotkey information and their actions + const hotkeysInfo = [ + { + configKey: "canvas_hotkey_zoom", + action: "Zoom canvas", + keySuffix: " + wheel" + }, + { + configKey: "canvas_hotkey_adjust", + action: "Adjust brush size", + keySuffix: " + wheel" + }, + {configKey: "canvas_hotkey_reset", action: "Reset zoom"}, + { + configKey: "canvas_hotkey_fullscreen", + action: "Fullscreen mode" + }, + {configKey: "canvas_hotkey_move", action: "Move canvas"}, + {configKey: "canvas_hotkey_overlap", action: "Overlap"} + ]; + + // Create hotkeys array with disabled property based on the config values + const hotkeys = hotkeysInfo.map(info => { + const configValue = hotkeysConfig[info.configKey]; + const key = info.keySuffix ? + `${configValue}${info.keySuffix}` : + configValue.charAt(configValue.length - 1); + return { + key, + action: info.action, + disabled: configValue === "disable" + }; + }); + + for (const hotkey of hotkeys) { + if (hotkey.disabled) { + continue; + } + + const p = document.createElement("p"); + p.innerHTML = `${hotkey.key} - ${hotkey.action}`; + tooltipContent.appendChild(p); + } + + // Add information and content elements to the tooltip element + tooltip.appendChild(info); + tooltip.appendChild(tooltipContent); + + // Add a hint element to the target element + toolTipElemnt.appendChild(tooltip); + } + + //Show tool tip if setting enable + if (hotkeysConfig.canvas_show_tooltip) { + createTooltip(); + } + + // In the course of research, it was found that the tag img is very harmful when zooming and creates white canvases. This hack allows you to almost never think about this problem, it has no effect on webui. + function fixCanvas() { + const activeTab = getActiveTab(elements).textContent.trim(); + + if (activeTab !== "img2img") { + const img = targetElement.querySelector(`${elemId} img`); + + if (img && img.style.display !== "none") { + img.style.display = "none"; + img.style.visibility = "hidden"; + } + } + } + + // Reset the zoom level and pan position of the target element to their initial values + function resetZoom() { + elemData[elemId] = { + zoomLevel: 1, + panX: 0, + panY: 0 + }; + + if (isExtension) { + targetElement.style.overflow = "hidden"; + } + + targetElement.isZoomed = false; + + fixCanvas(); + targetElement.style.transform = `scale(${elemData[elemId].zoomLevel}) translate(${elemData[elemId].panX}px, ${elemData[elemId].panY}px)`; + + const canvas = gradioApp().querySelector( + `${elemId} canvas[key="interface"]` + ); + + toggleOverlap("off"); + fullScreenMode = false; + + const closeBtn = targetElement.querySelector("button[aria-label='Remove Image']"); + if (closeBtn) { + closeBtn.addEventListener("click", resetZoom); + } + + if (canvas && isExtension) { + const parentElement = targetElement.closest('[id^="component-"]'); + if ( + canvas && + parseFloat(canvas.style.width) > parentElement.offsetWidth && + parseFloat(targetElement.style.width) > parentElement.offsetWidth + ) { + fitToElement(); + return; + } + + } + + if ( + canvas && + !isExtension && + parseFloat(canvas.style.width) > 865 && + parseFloat(targetElement.style.width) > 865 + ) { + fitToElement(); + return; + } + + targetElement.style.width = ""; + } + + // Toggle the zIndex of the target element between two values, allowing it to overlap or be overlapped by other elements + function toggleOverlap(forced = "") { + const zIndex1 = "0"; + const zIndex2 = "998"; + + targetElement.style.zIndex = + targetElement.style.zIndex !== zIndex2 ? zIndex2 : zIndex1; + + if (forced === "off") { + targetElement.style.zIndex = zIndex1; + } else if (forced === "on") { + targetElement.style.zIndex = zIndex2; + } + } + + // Adjust the brush size based on the deltaY value from a mouse wheel event + function adjustBrushSize( + elemId, + deltaY, + withoutValue = false, + percentage = 5 + ) { + const input = + gradioApp().querySelector( + `${elemId} input[aria-label='Brush radius']` + ) || + gradioApp().querySelector( + `${elemId} button[aria-label="Use brush"]` + ); + + if (input) { + input.click(); + if (!withoutValue) { + const maxValue = + parseFloat(input.getAttribute("max")) || 100; + const changeAmount = maxValue * (percentage / 100); + const newValue = + parseFloat(input.value) + + (deltaY > 0 ? -changeAmount : changeAmount); + input.value = Math.min(Math.max(newValue, 0), maxValue); + input.dispatchEvent(new Event("change")); + } + } + } + + // Reset zoom when uploading a new image + const fileInput = gradioApp().querySelector( + `${elemId} input[type="file"][accept="image/*"].svelte-116rqfv` + ); + fileInput.addEventListener("click", resetZoom); + + // Update the zoom level and pan position of the target element based on the values of the zoomLevel, panX and panY variables + function updateZoom(newZoomLevel, mouseX, mouseY) { + newZoomLevel = Math.max(0.1, Math.min(newZoomLevel, 15)); + + elemData[elemId].panX += + mouseX - (mouseX * newZoomLevel) / elemData[elemId].zoomLevel; + elemData[elemId].panY += + mouseY - (mouseY * newZoomLevel) / elemData[elemId].zoomLevel; + + targetElement.style.transformOrigin = "0 0"; + targetElement.style.transform = `translate(${elemData[elemId].panX}px, ${elemData[elemId].panY}px) scale(${newZoomLevel})`; + + toggleOverlap("on"); + if (isExtension) { + targetElement.style.overflow = "visible"; + } + + return newZoomLevel; + } + + // Change the zoom level based on user interaction + function changeZoomLevel(operation, e) { + if (isModifierKey(e, hotkeysConfig.canvas_hotkey_zoom)) { + e.preventDefault(); + + let zoomPosX, zoomPosY; + let delta = 0.2; + if (elemData[elemId].zoomLevel > 7) { + delta = 0.9; + } else if (elemData[elemId].zoomLevel > 2) { + delta = 0.6; + } + + zoomPosX = e.clientX; + zoomPosY = e.clientY; + + fullScreenMode = false; + elemData[elemId].zoomLevel = updateZoom( + elemData[elemId].zoomLevel + + (operation === "+" ? delta : -delta), + zoomPosX - targetElement.getBoundingClientRect().left, + zoomPosY - targetElement.getBoundingClientRect().top + ); + + targetElement.isZoomed = true; + } + } + + /** + * This function fits the target element to the screen by calculating + * the required scale and offsets. It also updates the global variables + * zoomLevel, panX, and panY to reflect the new state. + */ + + function fitToElement() { + //Reset Zoom + targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`; + + let parentElement; + + if (isExtension) { + parentElement = targetElement.closest('[id^="component-"]'); + } else { + parentElement = targetElement.parentElement; + } + + + // Get element and screen dimensions + const elementWidth = targetElement.offsetWidth; + const elementHeight = targetElement.offsetHeight; + + const screenWidth = parentElement.clientWidth; + const screenHeight = parentElement.clientHeight; + + // Get element's coordinates relative to the parent element + const elementRect = targetElement.getBoundingClientRect(); + const parentRect = parentElement.getBoundingClientRect(); + const elementX = elementRect.x - parentRect.x; + + // Calculate scale and offsets + const scaleX = screenWidth / elementWidth; + const scaleY = screenHeight / elementHeight; + const scale = Math.min(scaleX, scaleY); + + const transformOrigin = + window.getComputedStyle(targetElement).transformOrigin; + const [originX, originY] = transformOrigin.split(" "); + const originXValue = parseFloat(originX); + const originYValue = parseFloat(originY); + + const offsetX = + (screenWidth - elementWidth * scale) / 2 - + originXValue * (1 - scale); + const offsetY = + (screenHeight - elementHeight * scale) / 2.5 - + originYValue * (1 - scale); + + // Apply scale and offsets to the element + targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`; + + // Update global variables + elemData[elemId].zoomLevel = scale; + elemData[elemId].panX = offsetX; + elemData[elemId].panY = offsetY; + + fullScreenMode = false; + toggleOverlap("off"); + } + + /** + * This function fits the target element to the screen by calculating + * the required scale and offsets. It also updates the global variables + * zoomLevel, panX, and panY to reflect the new state. + */ + + // Fullscreen mode + function fitToScreen() { + const canvas = gradioApp().querySelector( + `${elemId} canvas[key="interface"]` + ); + + if (!canvas) return; + + if (canvas.offsetWidth > 862 || isExtension) { + targetElement.style.width = (canvas.offsetWidth + 2) + "px"; + } + + if (isExtension) { + targetElement.style.overflow = "visible"; + } + + if (fullScreenMode) { + resetZoom(); + fullScreenMode = false; + return; + } + + //Reset Zoom + targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`; + + // Get scrollbar width to right-align the image + const scrollbarWidth = + window.innerWidth - document.documentElement.clientWidth; + + // Get element and screen dimensions + const elementWidth = targetElement.offsetWidth; + const elementHeight = targetElement.offsetHeight; + const screenWidth = window.innerWidth - scrollbarWidth; + const screenHeight = window.innerHeight; + + // Get element's coordinates relative to the page + const elementRect = targetElement.getBoundingClientRect(); + const elementY = elementRect.y; + const elementX = elementRect.x; + + // Calculate scale and offsets + const scaleX = screenWidth / elementWidth; + const scaleY = screenHeight / elementHeight; + const scale = Math.min(scaleX, scaleY); + + // Get the current transformOrigin + const computedStyle = window.getComputedStyle(targetElement); + const transformOrigin = computedStyle.transformOrigin; + const [originX, originY] = transformOrigin.split(" "); + const originXValue = parseFloat(originX); + const originYValue = parseFloat(originY); + + // Calculate offsets with respect to the transformOrigin + const offsetX = + (screenWidth - elementWidth * scale) / 2 - + elementX - + originXValue * (1 - scale); + const offsetY = + (screenHeight - elementHeight * scale) / 2 - + elementY - + originYValue * (1 - scale); + + // Apply scale and offsets to the element + targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`; + + // Update global variables + elemData[elemId].zoomLevel = scale; + elemData[elemId].panX = offsetX; + elemData[elemId].panY = offsetY; + + fullScreenMode = true; + toggleOverlap("on"); + } + + // Handle keydown events + function handleKeyDown(event) { + // Disable key locks to make pasting from the buffer work correctly + if ((event.ctrlKey && event.code === 'KeyV') || (event.ctrlKey && event.code === 'KeyC') || event.code === "F5") { + return; + } + + // before activating shortcut, ensure user is not actively typing in an input field + if (!hotkeysConfig.canvas_blur_prompt) { + if (event.target.nodeName === 'TEXTAREA' || event.target.nodeName === 'INPUT') { + return; + } + } + + + const hotkeyActions = { + [hotkeysConfig.canvas_hotkey_reset]: resetZoom, + [hotkeysConfig.canvas_hotkey_overlap]: toggleOverlap, + [hotkeysConfig.canvas_hotkey_fullscreen]: fitToScreen, + [hotkeysConfig.canvas_hotkey_shrink_brush]: () => adjustBrushSize(elemId, 10), + [hotkeysConfig.canvas_hotkey_grow_brush]: () => adjustBrushSize(elemId, -10) + }; + + const action = hotkeyActions[event.code]; + if (action) { + event.preventDefault(); + action(event); + } + + if ( + isModifierKey(event, hotkeysConfig.canvas_hotkey_zoom) || + isModifierKey(event, hotkeysConfig.canvas_hotkey_adjust) + ) { + event.preventDefault(); + } + } + + // Get Mouse position + function getMousePosition(e) { + mouseX = e.offsetX; + mouseY = e.offsetY; + } + + // Simulation of the function to put a long image into the screen. + // We detect if an image has a scroll bar or not, make a fullscreen to reveal the image, then reduce it to fit into the element. + // We hide the image and show it to the user when it is ready. + + targetElement.isExpanded = false; + function autoExpand() { + const canvas = document.querySelector(`${elemId} canvas[key="interface"]`); + if (canvas) { + if (hasHorizontalScrollbar(targetElement) && targetElement.isExpanded === false) { + targetElement.style.visibility = "hidden"; + setTimeout(() => { + fitToScreen(); + resetZoom(); + targetElement.style.visibility = "visible"; + targetElement.isExpanded = true; + }, 10); + } + } + } + + targetElement.addEventListener("mousemove", getMousePosition); + + //observers + // Creating an observer with a callback function to handle DOM changes + const observer = new MutationObserver((mutationsList, observer) => { + for (let mutation of mutationsList) { + // If the style attribute of the canvas has changed, by observation it happens only when the picture changes + if (mutation.type === 'attributes' && mutation.attributeName === 'style' && + mutation.target.tagName.toLowerCase() === 'canvas') { + targetElement.isExpanded = false; + setTimeout(resetZoom, 10); + } + } + }); + + // Apply auto expand if enabled + if (hotkeysConfig.canvas_auto_expand) { + targetElement.addEventListener("mousemove", autoExpand); + // Set up an observer to track attribute changes + observer.observe(targetElement, {attributes: true, childList: true, subtree: true}); + } + + // Handle events only inside the targetElement + let isKeyDownHandlerAttached = false; + + function handleMouseMove() { + if (!isKeyDownHandlerAttached) { + document.addEventListener("keydown", handleKeyDown); + isKeyDownHandlerAttached = true; + + activeElement = elemId; + } + } + + function handleMouseLeave() { + if (isKeyDownHandlerAttached) { + document.removeEventListener("keydown", handleKeyDown); + isKeyDownHandlerAttached = false; + + activeElement = null; + } + } + + // Add mouse event handlers + targetElement.addEventListener("mousemove", handleMouseMove); + targetElement.addEventListener("mouseleave", handleMouseLeave); + + // Reset zoom when click on another tab + elements.img2imgTabs.addEventListener("click", resetZoom); + elements.img2imgTabs.addEventListener("click", () => { + // targetElement.style.width = ""; + if (parseInt(targetElement.style.width) > 865) { + setTimeout(fitToElement, 0); + } + }); + + targetElement.addEventListener("wheel", e => { + // change zoom level + const operation = e.deltaY > 0 ? "-" : "+"; + changeZoomLevel(operation, e); + + // Handle brush size adjustment with ctrl key pressed + if (isModifierKey(e, hotkeysConfig.canvas_hotkey_adjust)) { + e.preventDefault(); + + // Increase or decrease brush size based on scroll direction + adjustBrushSize(elemId, e.deltaY); + } + }); + + // Handle the move event for pan functionality. Updates the panX and panY variables and applies the new transform to the target element. + function handleMoveKeyDown(e) { + + // Disable key locks to make pasting from the buffer work correctly + if ((e.ctrlKey && e.code === 'KeyV') || (e.ctrlKey && event.code === 'KeyC') || e.code === "F5") { + return; + } + + // before activating shortcut, ensure user is not actively typing in an input field + if (!hotkeysConfig.canvas_blur_prompt) { + if (e.target.nodeName === 'TEXTAREA' || e.target.nodeName === 'INPUT') { + return; + } + } + + + if (e.code === hotkeysConfig.canvas_hotkey_move) { + if (!e.ctrlKey && !e.metaKey && isKeyDownHandlerAttached) { + e.preventDefault(); + document.activeElement.blur(); + isMoving = true; + } + } + } + + function handleMoveKeyUp(e) { + if (e.code === hotkeysConfig.canvas_hotkey_move) { + isMoving = false; + } + } + + document.addEventListener("keydown", handleMoveKeyDown); + document.addEventListener("keyup", handleMoveKeyUp); + + // Detect zoom level and update the pan speed. + function updatePanPosition(movementX, movementY) { + let panSpeed = 2; + + if (elemData[elemId].zoomLevel > 8) { + panSpeed = 3.5; + } + + elemData[elemId].panX += movementX * panSpeed; + elemData[elemId].panY += movementY * panSpeed; + + // Delayed redraw of an element + requestAnimationFrame(() => { + targetElement.style.transform = `translate(${elemData[elemId].panX}px, ${elemData[elemId].panY}px) scale(${elemData[elemId].zoomLevel})`; + toggleOverlap("on"); + }); + } + + function handleMoveByKey(e) { + if (isMoving && elemId === activeElement) { + updatePanPosition(e.movementX, e.movementY); + targetElement.style.pointerEvents = "none"; + + if (isExtension) { + targetElement.style.overflow = "visible"; + } + + } else { + targetElement.style.pointerEvents = "auto"; + } + } + + // Prevents sticking to the mouse + window.onblur = function() { + isMoving = false; + }; + + // Checks for extension + function checkForOutBox() { + const parentElement = targetElement.closest('[id^="component-"]'); + if (parentElement.offsetWidth < targetElement.offsetWidth && !targetElement.isExpanded) { + resetZoom(); + targetElement.isExpanded = true; + } + + if (parentElement.offsetWidth < targetElement.offsetWidth && elemData[elemId].zoomLevel == 1) { + resetZoom(); + } + + if (parentElement.offsetWidth < targetElement.offsetWidth && targetElement.offsetWidth * elemData[elemId].zoomLevel > parentElement.offsetWidth && elemData[elemId].zoomLevel < 1 && !targetElement.isZoomed) { + resetZoom(); + } + } + + if (isExtension) { + targetElement.addEventListener("mousemove", checkForOutBox); + } + + + window.addEventListener('resize', (e) => { + resetZoom(); + + if (isExtension) { + targetElement.isExpanded = false; + targetElement.isZoomed = false; + } + }); + + gradioApp().addEventListener("mousemove", handleMoveByKey); + + + } + + applyZoomAndPan(elementIDs.sketch, false); + applyZoomAndPan(elementIDs.inpaint, false); + applyZoomAndPan(elementIDs.inpaintSketch, false); + + // Make the function global so that other extensions can take advantage of this solution + const applyZoomAndPanIntegration = async(id, elementIDs) => { + const mainEl = document.querySelector(id); + if (id.toLocaleLowerCase() === "none") { + for (const elementID of elementIDs) { + const el = await waitForElement(elementID); + if (!el) break; + applyZoomAndPan(elementID); + } + return; + } + + if (!mainEl) return; + mainEl.addEventListener("click", async() => { + for (const elementID of elementIDs) { + const el = await waitForElement(elementID); + if (!el) break; + applyZoomAndPan(elementID); + } + }, {once: true}); + }; + + window.applyZoomAndPan = applyZoomAndPan; // Only 1 elements, argument elementID, for example applyZoomAndPan("#txt2img_controlnet_ControlNet_input_image") + + window.applyZoomAndPanIntegration = applyZoomAndPanIntegration; // for any extension + + /* + The function `applyZoomAndPanIntegration` takes two arguments: + + 1. `id`: A string identifier for the element to which zoom and pan functionality will be applied on click. + If the `id` value is "none", the functionality will be applied to all elements specified in the second argument without a click event. + + 2. `elementIDs`: An array of string identifiers for elements. Zoom and pan functionality will be applied to each of these elements on click of the element specified by the first argument. + If "none" is specified in the first argument, the functionality will be applied to each of these elements without a click event. + + Example usage: + applyZoomAndPanIntegration("#txt2img_controlnet", ["#txt2img_controlnet_ControlNet_input_image"]); + In this example, zoom and pan functionality will be applied to the element with the identifier "txt2img_controlnet_ControlNet_input_image" upon clicking the element with the identifier "txt2img_controlnet". + */ + + // More examples + // Add integration with ControlNet txt2img One TAB + // applyZoomAndPanIntegration("#txt2img_controlnet", ["#txt2img_controlnet_ControlNet_input_image"]); + + // Add integration with ControlNet txt2img Tabs + // applyZoomAndPanIntegration("#txt2img_controlnet",Array.from({ length: 10 }, (_, i) => `#txt2img_controlnet_ControlNet-${i}_input_image`)); + + // Add integration with Inpaint Anything + // applyZoomAndPanIntegration("None", ["#ia_sam_image", "#ia_sel_mask"]); +}); diff --git a/extensions-builtin/canvas-zoom-and-pan/scripts/hotkey_config.py b/extensions-builtin/canvas-zoom-and-pan/scripts/hotkey_config.py new file mode 100644 index 0000000000000000000000000000000000000000..89b7c31f22d410c5af106b0eaf30d41304518a26 --- /dev/null +++ b/extensions-builtin/canvas-zoom-and-pan/scripts/hotkey_config.py @@ -0,0 +1,17 @@ +import gradio as gr +from modules import shared + +shared.options_templates.update(shared.options_section(('canvas_hotkey', "Canvas Hotkeys"), { + "canvas_hotkey_zoom": shared.OptionInfo("Alt", "Zoom canvas", gr.Radio, {"choices": ["Shift","Ctrl", "Alt"]}).info("If you choose 'Shift' you cannot scroll horizontally, 'Alt' can cause a little trouble in firefox"), + "canvas_hotkey_adjust": shared.OptionInfo("Ctrl", "Adjust brush size", gr.Radio, {"choices": ["Shift","Ctrl", "Alt"]}).info("If you choose 'Shift' you cannot scroll horizontally, 'Alt' can cause a little trouble in firefox"), + "canvas_hotkey_shrink_brush": shared.OptionInfo("Q", "Shrink the brush size"), + "canvas_hotkey_grow_brush": shared.OptionInfo("W", "Enlarge the brush size"), + "canvas_hotkey_move": shared.OptionInfo("F", "Moving the canvas").info("To work correctly in firefox, turn off 'Automatically search the page text when typing' in the browser settings"), + "canvas_hotkey_fullscreen": shared.OptionInfo("S", "Fullscreen Mode, maximizes the picture so that it fits into the screen and stretches it to its full width "), + "canvas_hotkey_reset": shared.OptionInfo("R", "Reset zoom and canvas positon"), + "canvas_hotkey_overlap": shared.OptionInfo("O", "Toggle overlap").info("Technical button, neededs for testing"), + "canvas_show_tooltip": shared.OptionInfo(True, "Enable tooltip on the canvas"), + "canvas_auto_expand": shared.OptionInfo(True, "Automatically expands an image that does not fit completely in the canvas area, similar to manually pressing the S and R buttons"), + "canvas_blur_prompt": shared.OptionInfo(False, "Take the focus off the prompt when working with a canvas"), + "canvas_disabled_functions": shared.OptionInfo(["Overlap"], "Disable function that you don't use", gr.CheckboxGroup, {"choices": ["Zoom","Adjust brush size","Hotkey enlarge brush","Hotkey shrink brush","Moving canvas","Fullscreen","Reset Zoom","Overlap"]}), +})) diff --git a/extensions-builtin/canvas-zoom-and-pan/style.css b/extensions-builtin/canvas-zoom-and-pan/style.css new file mode 100644 index 0000000000000000000000000000000000000000..5d8054e65196408c97791727088088650f102b21 --- /dev/null +++ b/extensions-builtin/canvas-zoom-and-pan/style.css @@ -0,0 +1,66 @@ +.canvas-tooltip-info { + position: absolute; + top: 10px; + left: 10px; + cursor: help; + background-color: rgba(0, 0, 0, 0.3); + width: 20px; + height: 20px; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + flex-direction: column; + + z-index: 100; +} + +.canvas-tooltip-info::after { + content: ''; + display: block; + width: 2px; + height: 7px; + background-color: white; + margin-top: 2px; +} + +.canvas-tooltip-info::before { + content: ''; + display: block; + width: 2px; + height: 2px; + background-color: white; +} + +.canvas-tooltip-content { + display: none; + background-color: #f9f9f9; + color: #333; + border: 1px solid #ddd; + padding: 15px; + position: absolute; + top: 40px; + left: 10px; + width: 250px; + font-size: 16px; + opacity: 0; + border-radius: 8px; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + + z-index: 100; +} + +.canvas-tooltip:hover .canvas-tooltip-content { + display: block; + animation: fadeIn 0.5s; + opacity: 1; +} + +@keyframes fadeIn { + from {opacity: 0;} + to {opacity: 1;} +} + +.styler { + overflow:inherit !important; +} \ No newline at end of file diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py new file mode 100644 index 0000000000000000000000000000000000000000..1430d8512b15f061356dc1f5c69411145be75845 --- /dev/null +++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py @@ -0,0 +1,78 @@ +import math + +import gradio as gr +from modules import scripts, shared, ui_components, ui_settings, infotext_utils +from modules.ui_components import FormColumn + + +class ExtraOptionsSection(scripts.Script): + section = "extra_options" + + def __init__(self): + self.comps = None + self.setting_names = None + + def title(self): + return "Extra options" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, is_img2img): + self.comps = [] + self.setting_names = [] + self.infotext_fields = [] + extra_options = shared.opts.extra_options_img2img if is_img2img else shared.opts.extra_options_txt2img + elem_id_tabname = "extra_options_" + ("img2img" if is_img2img else "txt2img") + + mapping = {k: v for v, k in infotext_utils.infotext_to_setting_name_mapping} + + with gr.Blocks() as interface: + with gr.Accordion("Options", open=False, elem_id=elem_id_tabname) if shared.opts.extra_options_accordion and extra_options else gr.Group(elem_id=elem_id_tabname): + + row_count = math.ceil(len(extra_options) / shared.opts.extra_options_cols) + + for row in range(row_count): + with gr.Row(): + for col in range(shared.opts.extra_options_cols): + index = row * shared.opts.extra_options_cols + col + if index >= len(extra_options): + break + + setting_name = extra_options[index] + + with FormColumn(): + comp = ui_settings.create_setting_component(setting_name) + + self.comps.append(comp) + self.setting_names.append(setting_name) + + setting_infotext_name = mapping.get(setting_name) + if setting_infotext_name is not None: + self.infotext_fields.append((comp, setting_infotext_name)) + + def get_settings_values(): + res = [ui_settings.get_value_for_setting(key) for key in self.setting_names] + return res[0] if len(res) == 1 else res + + interface.load(fn=get_settings_values, inputs=[], outputs=self.comps, queue=False, show_progress=False) + + return self.comps + + def before_process(self, p, *args): + for name, value in zip(self.setting_names, args): + if name not in p.override_settings: + p.override_settings[name] = value + + +shared.options_templates.update(shared.options_section(('settings_in_ui', "Settings in UI", "ui"), { + "settings_in_ui": shared.OptionHTML(""" +This page allows you to add some settings to the main interface of txt2img and img2img tabs. +"""), + "extra_options_txt2img": shared.OptionInfo([], "Settings for txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(), + "extra_options_img2img": shared.OptionInfo([], "Settings for img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(), + "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Slider, {"step": 1, "minimum": 1, "maximum": 20}).info("displayed amount will depend on the actual browser window width").needs_reload_ui(), + "extra_options_accordion": shared.OptionInfo(False, "Place added settings into an accordion").needs_reload_ui() +})) + + diff --git a/extensions-builtin/forge_legacy_preprocessors/.gitignore b/extensions-builtin/forge_legacy_preprocessors/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..60d06e51ec71848d6700eac9c6f3db544ef3c1a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/.gitignore @@ -0,0 +1,185 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea +*.pt +*.pth +*.ckpt +*.bin +*.safetensors + +# Editor setting metadata +.idea/ +.vscode/ +detected_maps/ +annotator/downloads/ + +# test results and expectations +web_tests/results/ +web_tests/expectations/ +tests/web_api/full_coverage/results/ +tests/web_api/full_coverage/expectations/ + +*_diff.png + +# Presets +presets/ + +# Ignore existing dir of hand refiner if exists. +annotator/hand_refiner_portable \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/LICENSE b/extensions-builtin/forge_legacy_preprocessors/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f288702d2fa16d3cdf0035b15a9fcbc552cd88e7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9bad05450ca061904f97acebe04ff7183cfbdc1a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Miaomiao Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..214f3f11f7566b5fcdd6d7304b791fda2eb384e1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/anime_face_segment/__init__.py @@ -0,0 +1,172 @@ +import os +import torch +import torch.nn as nn +import torch.nn.functional as F +from PIL import Image +import fnmatch +import cv2 + +import sys + +import numpy as np +from modules import devices +from einops import rearrange +from annotator.annotator_path import models_path + +import torchvision +from torchvision.models import MobileNet_V2_Weights +from torchvision import transforms + +COLOR_BACKGROUND = (255,255,0) +COLOR_HAIR = (0,0,255) +COLOR_EYE = (255,0,0) +COLOR_MOUTH = (255,255,255) +COLOR_FACE = (0,255,0) +COLOR_SKIN = (0,255,255) +COLOR_CLOTHES = (255,0,255) +PALETTE = [COLOR_BACKGROUND,COLOR_HAIR,COLOR_EYE,COLOR_MOUTH,COLOR_FACE,COLOR_SKIN,COLOR_CLOTHES] + +class UNet(nn.Module): + def __init__(self): + super(UNet, self).__init__() + self.NUM_SEG_CLASSES = 7 # Background, hair, face, eye, mouth, skin, clothes + + mobilenet_v2 = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1) + mob_blocks = mobilenet_v2.features + + # Encoder + self.en_block0 = nn.Sequential( # in_ch=3 out_ch=16 + mob_blocks[0], + mob_blocks[1] + ) + self.en_block1 = nn.Sequential( # in_ch=16 out_ch=24 + mob_blocks[2], + mob_blocks[3], + ) + self.en_block2 = nn.Sequential( # in_ch=24 out_ch=32 + mob_blocks[4], + mob_blocks[5], + mob_blocks[6], + ) + self.en_block3 = nn.Sequential( # in_ch=32 out_ch=96 + mob_blocks[7], + mob_blocks[8], + mob_blocks[9], + mob_blocks[10], + mob_blocks[11], + mob_blocks[12], + mob_blocks[13], + ) + self.en_block4 = nn.Sequential( # in_ch=96 out_ch=160 + mob_blocks[14], + mob_blocks[15], + mob_blocks[16], + ) + + # Decoder + self.de_block4 = nn.Sequential( # in_ch=160 out_ch=96 + nn.UpsamplingNearest2d(scale_factor=2), + nn.Conv2d(160, 96, kernel_size=3, padding=1), + nn.InstanceNorm2d(96), + nn.LeakyReLU(0.1), + nn.Dropout(p=0.2) + ) + self.de_block3 = nn.Sequential( # in_ch=96x2 out_ch=32 + nn.UpsamplingNearest2d(scale_factor=2), + nn.Conv2d(96*2, 32, kernel_size=3, padding=1), + nn.InstanceNorm2d(32), + nn.LeakyReLU(0.1), + nn.Dropout(p=0.2) + ) + self.de_block2 = nn.Sequential( # in_ch=32x2 out_ch=24 + nn.UpsamplingNearest2d(scale_factor=2), + nn.Conv2d(32*2, 24, kernel_size=3, padding=1), + nn.InstanceNorm2d(24), + nn.LeakyReLU(0.1), + nn.Dropout(p=0.2) + ) + self.de_block1 = nn.Sequential( # in_ch=24x2 out_ch=16 + nn.UpsamplingNearest2d(scale_factor=2), + nn.Conv2d(24*2, 16, kernel_size=3, padding=1), + nn.InstanceNorm2d(16), + nn.LeakyReLU(0.1), + nn.Dropout(p=0.2) + ) + + self.de_block0 = nn.Sequential( # in_ch=16x2 out_ch=7 + nn.UpsamplingNearest2d(scale_factor=2), + nn.Conv2d(16*2, self.NUM_SEG_CLASSES, kernel_size=3, padding=1), + nn.Softmax2d() + ) + + def forward(self, x): + e0 = self.en_block0(x) + e1 = self.en_block1(e0) + e2 = self.en_block2(e1) + e3 = self.en_block3(e2) + e4 = self.en_block4(e3) + + d4 = self.de_block4(e4) + d4 = F.interpolate(d4, size=e3.size()[2:], mode='bilinear', align_corners=True) + c4 = torch.cat((d4,e3),1) + + d3 = self.de_block3(c4) + d3 = F.interpolate(d3, size=e2.size()[2:], mode='bilinear', align_corners=True) + c3 = torch.cat((d3,e2),1) + + d2 = self.de_block2(c3) + d2 = F.interpolate(d2, size=e1.size()[2:], mode='bilinear', align_corners=True) + c2 =torch.cat((d2,e1),1) + + d1 = self.de_block1(c2) + d1 = F.interpolate(d1, size=e0.size()[2:], mode='bilinear', align_corners=True) + c1 = torch.cat((d1,e0),1) + y = self.de_block0(c1) + + return y + + +class AnimeFaceSegment: + + model_dir = os.path.join(models_path, "anime_face_segment") + + def __init__(self): + self.model = None + self.device = devices.get_device_for("controlnet") + + def load_model(self): + remote_model_path = "https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/resolve/main/Annotators/UNet.pth" + modelpath = os.path.join(self.model_dir, "UNet.pth") + if not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + net = UNet() + ckpt = torch.load(modelpath, map_location=self.device) + for key in list(ckpt.keys()): + if 'module.' in key: + ckpt[key.replace('module.', '')] = ckpt[key] + del ckpt[key] + net.load_state_dict(ckpt) + net.eval() + self.model = net.to(self.device) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, input_image): + + if self.model is None: + self.load_model() + self.model.to(self.device) + transform = transforms.Compose([ + transforms.Resize(512,interpolation=transforms.InterpolationMode.BICUBIC), + transforms.ToTensor(),]) + img = Image.fromarray(input_image) + with torch.no_grad(): + img = transform(img).unsqueeze(dim=0).to(self.device) + seg = self.model(img).squeeze(dim=0) + seg = seg.cpu().detach().numpy() + img = rearrange(seg,'h w c -> w c h') + img = [[PALETTE[np.argmax(val)] for val in buf]for buf in img] + return np.array(img).astype(np.uint8) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/annotator_path.py b/extensions-builtin/forge_legacy_preprocessors/annotator/annotator_path.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a191bb6fc0cbc670b9cf63fedf6342f9524139 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/annotator_path.py @@ -0,0 +1,11 @@ +import os +from modules_forge.shared import preprocessor_dir + + +models_path = preprocessor_dir +clip_vision_path = os.path.join(preprocessor_dir, 'clip_vision') + +os.makedirs(models_path, exist_ok=True) +os.makedirs(clip_vision_path, exist_ok=True) + +print(f'ControlNet preprocessor location: {models_path}') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/binary/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/binary/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2d13ad692ffc109ad95789334bb5524d52794acc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/binary/__init__.py @@ -0,0 +1,14 @@ +import cv2 + + +def apply_binary(img, bin_threshold): + img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + + if bin_threshold == 0 or bin_threshold == 255: + # Otsu's threshold + otsu_threshold, img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + print("Otsu threshold:", otsu_threshold) + else: + _, img_bin = cv2.threshold(img_gray, bin_threshold, 255, cv2.THRESH_BINARY_INV) + + return cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/canny/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/canny/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ace985839d3fc18dd4947f6c38e9f5d5a2625aca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/canny/__init__.py @@ -0,0 +1,5 @@ +import cv2 + + +def apply_canny(img, low_threshold, high_threshold): + return cv2.Canny(img, low_threshold, high_threshold) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/color/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/color/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..65799a2a83efd18dc556600c99d43292845aa6f2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/color/__init__.py @@ -0,0 +1,20 @@ +import cv2 + +def cv2_resize_shortest_edge(image, size): + h, w = image.shape[:2] + if h < w: + new_h = size + new_w = int(round(w / h * size)) + else: + new_w = size + new_h = int(round(h / w * size)) + resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA) + return resized_image + +def apply_color(img, res=512): + img = cv2_resize_shortest_edge(img, res) + h, w = img.shape[:2] + + input_img_color = cv2.resize(img, (w//64, h//64), interpolation=cv2.INTER_CUBIC) + input_img_color = cv2.resize(input_img_color, (w, h), interpolation=cv2.INTER_NEAREST) + return input_img_color \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e43dcfee958672398c7693b23618656a0a3f0850 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/__init__.py @@ -0,0 +1,57 @@ +import torchvision # Fix issue Unknown builtin op: torchvision::nms +import cv2 +import numpy as np +import torch +from einops import rearrange +from .densepose import DensePoseMaskedColormapResultsVisualizer, _extract_i_from_iuvarr, densepose_chart_predictor_output_to_result_with_confidences +from modules import devices +from annotator.annotator_path import models_path +import os + +N_PART_LABELS = 24 +result_visualizer = DensePoseMaskedColormapResultsVisualizer( + alpha=1, + data_extractor=_extract_i_from_iuvarr, + segm_extractor=_extract_i_from_iuvarr, + val_scale = 255.0 / N_PART_LABELS +) +remote_torchscript_path = "https://huggingface.co/LayerNorm/DensePose-TorchScript-with-hint-image/resolve/main/densepose_r50_fpn_dl.torchscript" +torchscript_model = None +model_dir = os.path.join(models_path, "densepose") + +def apply_densepose(input_image, cmap="viridis"): + global torchscript_model + if torchscript_model is None: + model_path = os.path.join(model_dir, "densepose_r50_fpn_dl.torchscript") + if not os.path.exists(model_path): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_torchscript_path, model_dir=model_dir) + torchscript_model = torch.jit.load(model_path, map_location="cpu").to(devices.get_device_for("controlnet")).eval() + H, W = input_image.shape[:2] + + hint_image_canvas = np.zeros([H, W], dtype=np.uint8) + hint_image_canvas = np.tile(hint_image_canvas[:, :, np.newaxis], [1, 1, 3]) + input_image = rearrange(torch.from_numpy(input_image).to(devices.get_device_for("controlnet")), 'h w c -> c h w') + pred_boxes, corase_segm, fine_segm, u, v = torchscript_model(input_image) + + extractor = densepose_chart_predictor_output_to_result_with_confidences + densepose_results = [extractor(pred_boxes[i:i+1], corase_segm[i:i+1], fine_segm[i:i+1], u[i:i+1], v[i:i+1]) for i in range(len(pred_boxes))] + + if cmap=="viridis": + result_visualizer.mask_visualizer.cmap = cv2.COLORMAP_VIRIDIS + hint_image = result_visualizer.visualize(hint_image_canvas, densepose_results) + hint_image = cv2.cvtColor(hint_image, cv2.COLOR_BGR2RGB) + hint_image[:, :, 0][hint_image[:, :, 0] == 0] = 68 + hint_image[:, :, 1][hint_image[:, :, 1] == 0] = 1 + hint_image[:, :, 2][hint_image[:, :, 2] == 0] = 84 + else: + result_visualizer.mask_visualizer.cmap = cv2.COLORMAP_PARULA + hint_image = result_visualizer.visualize(hint_image_canvas, densepose_results) + hint_image = cv2.cvtColor(hint_image, cv2.COLOR_BGR2RGB) + + return hint_image + +def unload_model(): + global torchscript_model + if torchscript_model is not None: + torchscript_model.cpu() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/densepose.py b/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/densepose.py new file mode 100644 index 0000000000000000000000000000000000000000..5e43b05fcd76efd5774485ca35e715e64acefdbe --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/densepose/densepose.py @@ -0,0 +1,347 @@ +from typing import Tuple +import math +import numpy as np +from enum import IntEnum +from typing import List, Tuple, Union +import torch +from torch.nn import functional as F +import logging +import cv2 + +Image = np.ndarray +Boxes = torch.Tensor +ImageSizeType = Tuple[int, int] +_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] +IntTupleBox = Tuple[int, int, int, int] + +class BoxMode(IntEnum): + """ + Enum of different ways to represent a box. + """ + + XYXY_ABS = 0 + """ + (x0, y0, x1, y1) in absolute floating points coordinates. + The coordinates in range [0, width or height]. + """ + XYWH_ABS = 1 + """ + (x0, y0, w, h) in absolute floating points coordinates. + """ + XYXY_REL = 2 + """ + Not yet supported! + (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. + """ + XYWH_REL = 3 + """ + Not yet supported! + (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. + """ + XYWHA_ABS = 4 + """ + (xc, yc, w, h, a) in absolute floating points coordinates. + (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. + """ + + @staticmethod + def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType: + """ + Args: + box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 + from_mode, to_mode (BoxMode) + + Returns: + The converted box of the same type. + """ + if from_mode == to_mode: + return box + + original_type = type(box) + is_numpy = isinstance(box, np.ndarray) + single_box = isinstance(box, (list, tuple)) + if single_box: + assert len(box) == 4 or len(box) == 5, ( + "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," + " where k == 4 or 5" + ) + arr = torch.tensor(box)[None, :] + else: + # avoid modifying the input box + if is_numpy: + arr = torch.from_numpy(np.asarray(box)).clone() + else: + arr = box.clone() + + assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ], "Relative mode not yet supported!" + + if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: + assert ( + arr.shape[-1] == 5 + ), "The last dimension of input shape must be 5 for XYWHA format" + original_dtype = arr.dtype + arr = arr.double() + + w = arr[:, 2] + h = arr[:, 3] + a = arr[:, 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + new_w = c * w + s * h + new_h = c * h + s * w + + # convert center to top-left corner + arr[:, 0] -= new_w / 2.0 + arr[:, 1] -= new_h / 2.0 + # bottom-right corner + arr[:, 2] = arr[:, 0] + new_w + arr[:, 3] = arr[:, 1] + new_h + + arr = arr[:, :4].to(dtype=original_dtype) + elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: + original_dtype = arr.dtype + arr = arr.double() + arr[:, 0] += arr[:, 2] / 2.0 + arr[:, 1] += arr[:, 3] / 2.0 + angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) + arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) + else: + if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: + arr[:, 2] += arr[:, 0] + arr[:, 3] += arr[:, 1] + elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: + arr[:, 2] -= arr[:, 0] + arr[:, 3] -= arr[:, 1] + else: + raise NotImplementedError( + "Conversion from BoxMode {} to {} is not supported yet".format( + from_mode, to_mode + ) + ) + + if single_box: + return original_type(arr.flatten().tolist()) + if is_numpy: + return arr.numpy() + else: + return arr + +class MatrixVisualizer: + """ + Base visualizer for matrix data + """ + + def __init__( + self, + inplace=True, + cmap=cv2.COLORMAP_PARULA, + val_scale=1.0, + alpha=0.7, + interp_method_matrix=cv2.INTER_LINEAR, + interp_method_mask=cv2.INTER_NEAREST, + ): + self.inplace = inplace + self.cmap = cmap + self.val_scale = val_scale + self.alpha = alpha + self.interp_method_matrix = interp_method_matrix + self.interp_method_mask = interp_method_mask + + def visualize(self, image_bgr, mask, matrix, bbox_xywh): + self._check_image(image_bgr) + self._check_mask_matrix(mask, matrix) + if self.inplace: + image_target_bgr = image_bgr + else: + image_target_bgr = image_bgr * 0 + x, y, w, h = [int(v) for v in bbox_xywh] + if w <= 0 or h <= 0: + return image_bgr + mask, matrix = self._resize(mask, matrix, w, h) + mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3]) + matrix_scaled = matrix.astype(np.float32) * self.val_scale + _EPSILON = 1e-6 + if np.any(matrix_scaled > 255 + _EPSILON): + logger = logging.getLogger(__name__) + logger.warning( + f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]" + ) + matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8) + matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap) + matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg] + image_target_bgr[y : y + h, x : x + w, :] = ( + image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha + ) + return image_target_bgr.astype(np.uint8) + + def _resize(self, mask, matrix, w, h): + if (w != mask.shape[1]) or (h != mask.shape[0]): + mask = cv2.resize(mask, (w, h), self.interp_method_mask) + if (w != matrix.shape[1]) or (h != matrix.shape[0]): + matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix) + return mask, matrix + + def _check_image(self, image_rgb): + assert len(image_rgb.shape) == 3 + assert image_rgb.shape[2] == 3 + assert image_rgb.dtype == np.uint8 + + def _check_mask_matrix(self, mask, matrix): + assert len(matrix.shape) == 2 + assert len(mask.shape) == 2 + assert mask.dtype == np.uint8 + +class DensePoseResultsVisualizer: + def visualize( + self, + image_bgr: Image, + results, + ) -> Image: + context = self.create_visualization_context(image_bgr) + for i, result in enumerate(results): + boxes_xywh, labels, uv = result + iuv_array = torch.cat( + (labels[None].type(torch.float32), uv * 255.0) + ).type(torch.uint8) + self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh) + image_bgr = self.context_to_image_bgr(context) + return image_bgr + + def create_visualization_context(self, image_bgr: Image): + return image_bgr + + def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None: + pass + + def context_to_image_bgr(self, context): + return context + + def get_image_bgr_from_context(self, context): + return context + +class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer): + def __init__( + self, + data_extractor, + segm_extractor, + inplace=True, + cmap=cv2.COLORMAP_PARULA, + alpha=0.7, + val_scale=1.0, + **kwargs, + ): + self.mask_visualizer = MatrixVisualizer( + inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha + ) + self.data_extractor = data_extractor + self.segm_extractor = segm_extractor + + def context_to_image_bgr(self, context): + return context + + def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None: + image_bgr = self.get_image_bgr_from_context(context) + matrix = self.data_extractor(iuv_arr) + segm = self.segm_extractor(iuv_arr) + mask = np.zeros(matrix.shape, dtype=np.uint8) + mask[segm > 0] = 1 + image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh) + + +def _extract_i_from_iuvarr(iuv_arr): + return iuv_arr[0, :, :] + + +def _extract_u_from_iuvarr(iuv_arr): + return iuv_arr[1, :, :] + + +def _extract_v_from_iuvarr(iuv_arr): + return iuv_arr[2, :, :] + +def make_int_box(box: torch.Tensor) -> IntTupleBox: + int_box = [0, 0, 0, 0] + int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist()) + return int_box[0], int_box[1], int_box[2], int_box[3] + +def densepose_chart_predictor_output_to_result_with_confidences( + boxes: Boxes, + coarse_segm, + fine_segm, + u, v + +): + boxes_xyxy_abs = boxes.clone() + boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + box_xywh = make_int_box(boxes_xywh_abs[0]) + + labels = resample_fine_and_coarse_segm_tensors_to_bbox(fine_segm, coarse_segm, box_xywh).squeeze(0) + uv = resample_uv_tensors_to_bbox(u, v, labels, box_xywh) + confidences = [] + return box_xywh, labels, uv + +def resample_fine_and_coarse_segm_tensors_to_bbox( + fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox +): + """ + Resample fine and coarse segmentation tensors to the given + bounding box and derive labels for each pixel of the bounding box + + Args: + fine_segm: float tensor of shape [1, C, Hout, Wout] + coarse_segm: float tensor of shape [1, K, Hout, Wout] + box_xywh_abs (tuple of 4 int): bounding box given by its upper-left + corner coordinates, width (W) and height (H) + Return: + Labels for each pixel of the bounding box, a long tensor of size [1, H, W] + """ + x, y, w, h = box_xywh_abs + w = max(int(w), 1) + h = max(int(h), 1) + # coarse segmentation + coarse_segm_bbox = F.interpolate( + coarse_segm, + (h, w), + mode="bilinear", + align_corners=False, + ).argmax(dim=1) + # combined coarse and fine segmentation + labels = ( + F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) + * (coarse_segm_bbox > 0).long() + ) + return labels + +def resample_uv_tensors_to_bbox( + u: torch.Tensor, + v: torch.Tensor, + labels: torch.Tensor, + box_xywh_abs: IntTupleBox, +) -> torch.Tensor: + """ + Resamples U and V coordinate estimates for the given bounding box + + Args: + u (tensor [1, C, H, W] of float): U coordinates + v (tensor [1, C, H, W] of float): V coordinates + labels (tensor [H, W] of long): labels obtained by resampling segmentation + outputs for the given bounding box + box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs + Return: + Resampled U and V coordinates - a tensor [2, H, W] of float + """ + x, y, w, h = box_xywh_abs + w = max(int(w), 1) + h = max(int(h), 1) + u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False) + v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False) + uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device) + for part_id in range(1, u_bbox.size(1)): + uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id] + uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id] + return uv + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/depth_anything.py b/extensions-builtin/forge_legacy_preprocessors/annotator/depth_anything.py new file mode 100644 index 0000000000000000000000000000000000000000..bbe480c5e0947a3b4b7bbd2b8fce45a8c783e936 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/depth_anything.py @@ -0,0 +1,79 @@ +import os +import torch +import cv2 +import numpy as np +import torch.nn.functional as F +from torchvision.transforms import Compose + +from depth_anything.dpt import DPT_DINOv2 +from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet +from .util import load_model +from .annotator_path import models_path + + +transform = Compose( + [ + Resize( + width=518, + height=518, + resize_target=False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ] +) + + +class DepthAnythingDetector: + """https://github.com/LiheYoung/Depth-Anything""" + + model_dir = os.path.join(models_path, "depth_anything") + + def __init__(self, device: torch.device): + self.device = device + self.model = ( + DPT_DINOv2( + encoder="vitl", + features=256, + out_channels=[256, 512, 1024, 1024], + localhub=False, + ) + .to(device) + .eval() + ) + remote_url = os.environ.get( + "CONTROLNET_DEPTH_ANYTHING_MODEL_URL", + "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth", + ) + model_path = load_model( + "depth_anything_vitl14.pth", remote_url=remote_url, model_dir=self.model_dir + ) + self.model.load_state_dict(torch.load(model_path)) + + def __call__(self, image: np.ndarray, colored: bool = True) -> np.ndarray: + self.model.to(self.device) + h, w = image.shape[:2] + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 + image = transform({"image": image})["image"] + image = torch.from_numpy(image).unsqueeze(0).to(self.device) + @torch.no_grad() + def predict_depth(model, image): + return model(image) + depth = predict_depth(self.model, image) + depth = F.interpolate( + depth[None], (h, w), mode="bilinear", align_corners=False + )[0, 0] + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.cpu().numpy().astype(np.uint8) + if colored: + return cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1] + else: + return depth + + def unload_model(self): + self.model.to("cpu") diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/hed/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/hed/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0d495cddd0bfc0e3f632034def616d280204f64b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/hed/__init__.py @@ -0,0 +1,98 @@ +# This is an improved version and model of HED edge detection with Apache License, Version 2.0. +# Please use this implementation in your products +# This implementation may produce slightly different results from Saining Xie's official implementations, +# but it generates smoother edges and is more suitable for ControlNet as well as other image-to-image translations. +# Different from official models and other implementations, this is an RGB-input model (rather than BGR) +# and in this way it works better for gradio's RGB protocol + +import os +import cv2 +import torch +import numpy as np + +from einops import rearrange +import os +from modules import devices +from annotator.annotator_path import models_path +from annotator.util import safe_step, nms + + +class DoubleConvBlock(torch.nn.Module): + def __init__(self, input_channel, output_channel, layer_number): + super().__init__() + self.convs = torch.nn.Sequential() + self.convs.append(torch.nn.Conv2d(in_channels=input_channel, out_channels=output_channel, kernel_size=(3, 3), stride=(1, 1), padding=1)) + for i in range(1, layer_number): + self.convs.append(torch.nn.Conv2d(in_channels=output_channel, out_channels=output_channel, kernel_size=(3, 3), stride=(1, 1), padding=1)) + self.projection = torch.nn.Conv2d(in_channels=output_channel, out_channels=1, kernel_size=(1, 1), stride=(1, 1), padding=0) + + def __call__(self, x, down_sampling=False): + h = x + if down_sampling: + h = torch.nn.functional.max_pool2d(h, kernel_size=(2, 2), stride=(2, 2)) + for conv in self.convs: + h = conv(h) + h = torch.nn.functional.relu(h) + return h, self.projection(h) + + +class ControlNetHED_Apache2(torch.nn.Module): + def __init__(self): + super().__init__() + self.norm = torch.nn.Parameter(torch.zeros(size=(1, 3, 1, 1))) + self.block1 = DoubleConvBlock(input_channel=3, output_channel=64, layer_number=2) + self.block2 = DoubleConvBlock(input_channel=64, output_channel=128, layer_number=2) + self.block3 = DoubleConvBlock(input_channel=128, output_channel=256, layer_number=3) + self.block4 = DoubleConvBlock(input_channel=256, output_channel=512, layer_number=3) + self.block5 = DoubleConvBlock(input_channel=512, output_channel=512, layer_number=3) + + def __call__(self, x): + h = x - self.norm + h, projection1 = self.block1(h) + h, projection2 = self.block2(h, down_sampling=True) + h, projection3 = self.block3(h, down_sampling=True) + h, projection4 = self.block4(h, down_sampling=True) + h, projection5 = self.block5(h, down_sampling=True) + return projection1, projection2, projection3, projection4, projection5 + + +netNetwork = None +remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetHED.pth" +modeldir = os.path.join(models_path, "hed") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) + + +def apply_hed(input_image, is_safe=False): + global netNetwork + if netNetwork is None: + modelpath = os.path.join(modeldir, "ControlNetHED.pth") + old_modelpath = os.path.join(old_modeldir, "ControlNetHED.pth") + if os.path.exists(old_modelpath): + modelpath = old_modelpath + elif not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=modeldir) + netNetwork = ControlNetHED_Apache2().to(devices.get_device_for("controlnet")) + netNetwork.load_state_dict(torch.load(modelpath, map_location='cpu')) + netNetwork.to(devices.get_device_for("controlnet")).float().eval() + + assert input_image.ndim == 3 + H, W, C = input_image.shape + with torch.no_grad(): + image_hed = torch.from_numpy(input_image.copy()).float().to(devices.get_device_for("controlnet")) + image_hed = rearrange(image_hed, 'h w c -> 1 c h w') + edges = netNetwork(image_hed) + edges = [e.detach().cpu().numpy().astype(np.float32)[0, 0] for e in edges] + edges = [cv2.resize(e, (W, H), interpolation=cv2.INTER_LINEAR) for e in edges] + edges = np.stack(edges, axis=2) + edge = 1 / (1 + np.exp(-np.mean(edges, axis=2).astype(np.float64))) + if is_safe: + edge = safe_step(edge) + edge = (edge * 255.0).clip(0, 255).astype(np.uint8) + return edge + + +def unload_hed_model(): + global netNetwork + if netNetwork is not None: + netNetwork.cpu() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0eda4cff54f02e1d724657d57ae2789c9123c6d9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/__init__.py @@ -0,0 +1,212 @@ +import numpy as np +import cv2 +import torch + +import os +from modules import devices +from annotator.annotator_path import models_path + +import mmcv +from mmdet.apis import inference_detector, init_detector +from mmpose.apis import inference_top_down_pose_model +from mmpose.apis import init_pose_model, process_mmdet_results, vis_pose_result + + +def preprocessing(image, device): + # Resize + scale = 640 / max(image.shape[:2]) + image = cv2.resize(image, dsize=None, fx=scale, fy=scale) + raw_image = image.astype(np.uint8) + + # Subtract mean values + image = image.astype(np.float32) + image -= np.array( + [ + float(104.008), + float(116.669), + float(122.675), + ] + ) + + # Convert to torch.Tensor and add "batch" axis + image = torch.from_numpy(image.transpose(2, 0, 1)).float().unsqueeze(0) + image = image.to(device) + + return image, raw_image + + +def imshow_keypoints(img, + pose_result, + skeleton=None, + kpt_score_thr=0.1, + pose_kpt_color=None, + pose_link_color=None, + radius=4, + thickness=1): + """Draw keypoints and links on an image. + Args: + img (ndarry): The image to draw poses on. + pose_result (list[kpts]): The poses to draw. Each element kpts is + a set of K keypoints as an Kx3 numpy.ndarray, where each + keypoint is represented as x, y, score. + kpt_score_thr (float, optional): Minimum score of keypoints + to be shown. Default: 0.3. + pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None, + the keypoint will not be drawn. + pose_link_color (np.array[Mx3]): Color of M links. If None, the + links will not be drawn. + thickness (int): Thickness of lines. + """ + + img_h, img_w, _ = img.shape + img = np.zeros(img.shape) + + for idx, kpts in enumerate(pose_result): + if idx > 1: + continue + kpts = kpts['keypoints'] + # print(kpts) + kpts = np.array(kpts, copy=False) + + # draw each point on image + if pose_kpt_color is not None: + assert len(pose_kpt_color) == len(kpts) + + for kid, kpt in enumerate(kpts): + x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2] + + if kpt_score < kpt_score_thr or pose_kpt_color[kid] is None: + # skip the point that should not be drawn + continue + + color = tuple(int(c) for c in pose_kpt_color[kid]) + cv2.circle(img, (int(x_coord), int(y_coord)), + radius, color, -1) + + # draw links + if skeleton is not None and pose_link_color is not None: + assert len(pose_link_color) == len(skeleton) + + for sk_id, sk in enumerate(skeleton): + pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1])) + pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1])) + + if (pos1[0] <= 0 or pos1[0] >= img_w or pos1[1] <= 0 or pos1[1] >= img_h or pos2[0] <= 0 + or pos2[0] >= img_w or pos2[1] <= 0 or pos2[1] >= img_h or kpts[sk[0], 2] < kpt_score_thr + or kpts[sk[1], 2] < kpt_score_thr or pose_link_color[sk_id] is None): + # skip the link that should not be drawn + continue + color = tuple(int(c) for c in pose_link_color[sk_id]) + cv2.line(img, pos1, pos2, color, thickness=thickness) + + return img + + +human_det, pose_model = None, None +det_model_path = "https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth" +pose_model_path = "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth" + +modeldir = os.path.join(models_path, "keypose") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) + +det_config = 'faster_rcnn_r50_fpn_coco.py' +pose_config = 'hrnet_w48_coco_256x192.py' + +det_checkpoint = 'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' +pose_checkpoint = 'hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth' +det_cat_id = 1 +bbox_thr = 0.2 + +skeleton = [ + [15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], + [7, 9], [8, 10], + [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6] +] + +pose_kpt_color = [ + [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], + [0, 255, 0], + [255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], + [255, 128, 0], + [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0] +] + +pose_link_color = [ + [0, 255, 0], [0, 255, 0], [255, 128, 0], [255, 128, 0], + [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [0, 255, 0], + [255, 128, 0], + [0, 255, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255], [51, 153, 255], + [51, 153, 255], + [51, 153, 255], [51, 153, 255], [51, 153, 255] +] + +def find_download_model(checkpoint, remote_path): + modelpath = os.path.join(modeldir, checkpoint) + old_modelpath = os.path.join(old_modeldir, checkpoint) + + if os.path.exists(old_modelpath): + modelpath = old_modelpath + elif not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_path, model_dir=modeldir) + + return modelpath + +def apply_keypose(input_image): + global human_det, pose_model + if netNetwork is None: + det_model_local = find_download_model(det_checkpoint, det_model_path) + hrnet_model_local = find_download_model(pose_checkpoint, pose_model_path) + det_config_mmcv = mmcv.Config.fromfile(det_config) + pose_config_mmcv = mmcv.Config.fromfile(pose_config) + human_det = init_detector(det_config_mmcv, det_model_local, device=devices.get_device_for("controlnet")) + pose_model = init_pose_model(pose_config_mmcv, hrnet_model_local, device=devices.get_device_for("controlnet")) + + assert input_image.ndim == 3 + input_image = input_image.copy() + with torch.no_grad(): + image = torch.from_numpy(input_image).float().to(devices.get_device_for("controlnet")) + image = image / 255.0 + mmdet_results = inference_detector(human_det, image) + + # keep the person class bounding boxes. + person_results = process_mmdet_results(mmdet_results, det_cat_id) + + return_heatmap = False + dataset = pose_model.cfg.data['test']['type'] + + # e.g. use ('backbone', ) to return backbone feature + output_layer_names = None + pose_results, _ = inference_top_down_pose_model( + pose_model, + image, + person_results, + bbox_thr=bbox_thr, + format='xyxy', + dataset=dataset, + dataset_info=None, + return_heatmap=return_heatmap, + outputs=output_layer_names + ) + + im_keypose_out = imshow_keypoints( + image, + pose_results, + skeleton=skeleton, + pose_kpt_color=pose_kpt_color, + pose_link_color=pose_link_color, + radius=2, + thickness=2 + ) + im_keypose_out = im_keypose_out.astype(np.uint8) + + # image_hed = rearrange(image_hed, 'h w c -> 1 c h w') + # edge = netNetwork(image_hed)[0] + # edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8) + return im_keypose_out + + +def unload_hed_model(): + global netNetwork + if netNetwork is not None: + netNetwork.cpu() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/faster_rcnn_r50_fpn_coco.py b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/faster_rcnn_r50_fpn_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a9ad9528b22163ae7ce1390375b69227fd6eafd9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/faster_rcnn_r50_fpn_coco.py @@ -0,0 +1,182 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +total_epochs = 12 + +model = dict( + type='FasterRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) + )) + +dataset_type = 'CocoDataset' +data_root = 'data/coco' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=f'{data_root}/annotations/instances_train2017.json', + img_prefix=f'{data_root}/train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=f'{data_root}/annotations/instances_val2017.json', + img_prefix=f'{data_root}/val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=f'{data_root}/annotations/instances_val2017.json', + img_prefix=f'{data_root}/val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/hrnet_w48_coco_256x192.py b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/hrnet_w48_coco_256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..9755e6773cd3a8c0d2ac684c612d716cfd44b0ca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/keypose/hrnet_w48_coco_256x192.py @@ -0,0 +1,169 @@ +# _base_ = [ +# '../../../../_base_/default_runtime.py', +# '../../../../_base_/datasets/coco.py' +# ] +evaluation = dict(interval=10, metric='mAP', save_best='AP') + +optimizer = dict( + type='Adam', + lr=5e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[170, 200]) +total_epochs = 210 +channel_cfg = dict( + num_output_channels=17, + dataset_joints=17, + dataset_channel=[ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ], + inference_channel=[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + ]) + +# model settings +model = dict( + type='TopDown', + pretrained='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth', + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + ), + keypoint_head=dict( + type='TopdownHeatmapSimpleHead', + in_channels=48, + out_channels=channel_cfg['num_output_channels'], + num_deconv_layers=0, + extra=dict(final_conv_kernel=1, ), + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=False, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownGetBboxCenterScale', padding=1.25), + dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3), + dict(type='TopDownRandomFlip', flip_prob=0.5), + dict( + type='TopDownHalfBodyTransform', + num_joints_half_body=8, + prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=2), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownGetBboxCenterScale', padding=1.25), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/coco' +data = dict( + samples_per_gpu=32, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=32), + test_dataloader=dict(samples_per_gpu=32), + train=dict( + type='TopDownCocoDataset', + ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline, + dataset_info={{_base_.dataset_info}}), + val=dict( + type='TopDownCocoDataset', + ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline, + dataset_info={{_base_.dataset_info}}), + test=dict( + type='TopDownCocoDataset', + ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=test_pipeline, + dataset_info={{_base_.dataset_info}}), +) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7c177170ee04834f6b46f7abc2282467805a5b63 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/__init__.py @@ -0,0 +1,114 @@ +import cv2 +import numpy as np +import torch +import os +from modules import devices, shared +from annotator.annotator_path import models_path +from torchvision.transforms import transforms + +# AdelaiDepth/LeReS imports +from .leres.depthmap import estimateleres, estimateboost +from .leres.multi_depth_model_woauxi import RelDepthModel +from .leres.net_tools import strip_prefix_if_present + +# pix2pix/merge net imports +from .pix2pix.options.test_options import TestOptions +from .pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel + +base_model_path = os.path.join(models_path, "leres") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) + +remote_model_path_leres = "https://huggingface.co/lllyasviel/Annotators/resolve/main/res101.pth" +remote_model_path_pix2pix = "https://huggingface.co/lllyasviel/Annotators/resolve/main/latest_net_G.pth" + +model = None +pix2pixmodel = None + +def unload_leres_model(): + global model, pix2pixmodel + if model is not None: + model = model.cpu() + if pix2pixmodel is not None: + pix2pixmodel = pix2pixmodel.unload_network('G') + + +def apply_leres(input_image, thr_a, thr_b, boost=False): + global model, pix2pixmodel + if model is None: + model_path = os.path.join(base_model_path, "res101.pth") + old_model_path = os.path.join(old_modeldir, "res101.pth") + + if os.path.exists(old_model_path): + model_path = old_model_path + elif not os.path.exists(model_path): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path_leres, model_dir=base_model_path) + + if torch.cuda.is_available(): + checkpoint = torch.load(model_path) + else: + checkpoint = torch.load(model_path, map_location=torch.device('cpu')) + + model = RelDepthModel(backbone='resnext101') + model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True) + del checkpoint + + if boost and pix2pixmodel is None: + pix2pixmodel_path = os.path.join(base_model_path, "latest_net_G.pth") + if not os.path.exists(pix2pixmodel_path): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path_pix2pix, model_dir=base_model_path) + + opt = TestOptions().parse() + if not torch.cuda.is_available(): + opt.gpu_ids = [] # cpu mode + pix2pixmodel = Pix2Pix4DepthModel(opt) + pix2pixmodel.save_dir = base_model_path + pix2pixmodel.load_networks('latest') + pix2pixmodel.eval() + + if devices.get_device_for("controlnet").type != 'mps': + model = model.to(devices.get_device_for("controlnet")) + + assert input_image.ndim == 3 + height, width, dim = input_image.shape + + with torch.no_grad(): + + if boost: + pix2pixmodel.netG.to(devices.get_device_for("controlnet")) + depth = estimateboost(input_image, model, 0, pix2pixmodel, max(width, height)) + else: + depth = estimateleres(input_image, model, width, height) + + numbytes=2 + depth_min = depth.min() + depth_max = depth.max() + max_val = (2**(8*numbytes))-1 + + # check output before normalizing and mapping to 16 bit + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape) + + # single channel, 16 bit image + depth_image = out.astype("uint16") + + # convert to uint8 + depth_image = cv2.convertScaleAbs(depth_image, alpha=(255.0/65535.0)) + + # remove near + if thr_a != 0: + thr_a = ((thr_a/100)*255) + depth_image = cv2.threshold(depth_image, thr_a, 255, cv2.THRESH_TOZERO)[1] + + # invert image + depth_image = cv2.bitwise_not(depth_image) + + # remove bg + if thr_b != 0: + thr_b = ((thr_b/100)*255) + depth_image = cv2.threshold(depth_image, thr_b, 255, cv2.THRESH_TOZERO)[1] + + return depth_image diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e0f1d07d98d4e85e684734d058dfe2515d215405 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/LICENSE @@ -0,0 +1,23 @@ +https://github.com/thygate/stable-diffusion-webui-depthmap-script + +MIT License + +Copyright (c) 2023 Bob Thiry + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f12c9975c1aa05401269be3ca3dbaa56bde55581 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnet.py @@ -0,0 +1,199 @@ +import torch.nn as nn +import torch.nn as NN + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = NN.BatchNorm2d(planes * self.expansion) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = NN.BatchNorm2d(64) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + #self.avgpool = nn.AvgPool2d(7, stride=1) + #self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + NN.BatchNorm2d(planes * block.expansion), #NN.BatchNorm2d + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + features = [] + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + features.append(x) + x = self.layer2(x) + features.append(x) + x = self.layer3(x) + features.append(x) + x = self.layer4(x) + features.append(x) + + return features + + +def resnet18(pretrained=True, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def resnet34(pretrained=True, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def resnet50(pretrained=True, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + + return model + + +def resnet101(pretrained=True, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + + return model + + +def resnet152(pretrained=True, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + return model diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnext_torch.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnext_torch.py new file mode 100644 index 0000000000000000000000000000000000000000..9af54fcc3e5b363935ef60c8aaf269110c0d6611 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/Resnext_torch.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +# coding: utf-8 +import torch.nn as nn + +try: + from urllib import urlretrieve +except ImportError: + from urllib.request import urlretrieve + +__all__ = ['resnext101_32x8d'] + + +model_urls = { + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + #self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + #self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x): + # See note [TorchScript super()] + features = [] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + features.append(x) + + x = self.layer2(x) + features.append(x) + + x = self.layer3(x) + features.append(x) + + x = self.layer4(x) + features.append(x) + + #x = self.avgpool(x) + #x = torch.flatten(x, 1) + #x = self.fc(x) + + return features + + def forward(self, x): + return self._forward_impl(x) + + + +def resnext101_32x8d(pretrained=True, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + return model + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/depthmap.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/depthmap.py new file mode 100644 index 0000000000000000000000000000000000000000..ebceecbe28ec248f6f96bb65b1c53bdbaf393ecc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/depthmap.py @@ -0,0 +1,546 @@ +# Author: thygate +# https://github.com/thygate/stable-diffusion-webui-depthmap-script + +from modules import devices +from modules.shared import opts +from torchvision.transforms import transforms +from operator import getitem + +import torch, gc +import cv2 +import numpy as np +import skimage.measure + +whole_size_threshold = 1600 # R_max from the paper +pix2pixsize = 1024 + +def scale_torch(img): + """ + Scale the image and output it in torch.tensor. + :param img: input rgb is in shape [H, W, C], input depth/disp is in shape [H, W] + :param scale: the scale factor. float + :return: img. [C, H, W] + """ + if len(img.shape) == 2: + img = img[np.newaxis, :, :] + if img.shape[2] == 3: + transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225) )]) + img = transform(img.astype(np.float32)) + else: + img = img.astype(np.float32) + img = torch.from_numpy(img) + return img + +def estimateleres(img, model, w, h): + # leres transform input + rgb_c = img[:, :, ::-1].copy() + A_resize = cv2.resize(rgb_c, (w, h)) + img_torch = scale_torch(A_resize)[None, :, :, :] + + # compute + with torch.no_grad(): + img_torch = img_torch.to(devices.get_device_for("controlnet")) + prediction = model.depth_model(img_torch) + + prediction = prediction.squeeze().cpu().numpy() + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + return prediction + +def generatemask(size): + # Generates a Guassian mask + mask = np.zeros(size, dtype=np.float32) + sigma = int(size[0]/16) + k_size = int(2 * np.ceil(2 * int(size[0]/16)) + 1) + mask[int(0.15*size[0]):size[0] - int(0.15*size[0]), int(0.15*size[1]): size[1] - int(0.15*size[1])] = 1 + mask = cv2.GaussianBlur(mask, (int(k_size), int(k_size)), sigma) + mask = (mask - mask.min()) / (mask.max() - mask.min()) + mask = mask.astype(np.float32) + return mask + +def resizewithpool(img, size): + i_size = img.shape[0] + n = int(np.floor(i_size/size)) + + out = skimage.measure.block_reduce(img, (n, n), np.max) + return out + +def rgb2gray(rgb): + # Converts rgb to gray + return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140]) + +def calculateprocessingres(img, basesize, confidence=0.1, scale_threshold=3, whole_size_threshold=3000): + # Returns the R_x resolution described in section 5 of the main paper. + + # Parameters: + # img :input rgb image + # basesize : size the dilation kernel which is equal to receptive field of the network. + # confidence: value of x in R_x; allowed percentage of pixels that are not getting any contextual cue. + # scale_threshold: maximum allowed upscaling on the input image ; it has been set to 3. + # whole_size_threshold: maximum allowed resolution. (R_max from section 6 of the main paper) + + # Returns: + # outputsize_scale*speed_scale :The computed R_x resolution + # patch_scale: K parameter from section 6 of the paper + + # speed scale parameter is to process every image in a smaller size to accelerate the R_x resolution search + speed_scale = 32 + image_dim = int(min(img.shape[0:2])) + + gray = rgb2gray(img) + grad = np.abs(cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)) + np.abs(cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)) + grad = cv2.resize(grad, (image_dim, image_dim), cv2.INTER_AREA) + + # thresholding the gradient map to generate the edge-map as a proxy of the contextual cues + m = grad.min() + M = grad.max() + middle = m + (0.4 * (M - m)) + grad[grad < middle] = 0 + grad[grad >= middle] = 1 + + # dilation kernel with size of the receptive field + kernel = np.ones((int(basesize/speed_scale), int(basesize/speed_scale)), float) + # dilation kernel with size of the a quarter of receptive field used to compute k + # as described in section 6 of main paper + kernel2 = np.ones((int(basesize / (4*speed_scale)), int(basesize / (4*speed_scale))), float) + + # Output resolution limit set by the whole_size_threshold and scale_threshold. + threshold = min(whole_size_threshold, scale_threshold * max(img.shape[:2])) + + outputsize_scale = basesize / speed_scale + for p_size in range(int(basesize/speed_scale), int(threshold/speed_scale), int(basesize / (2*speed_scale))): + grad_resized = resizewithpool(grad, p_size) + grad_resized = cv2.resize(grad_resized, (p_size, p_size), cv2.INTER_NEAREST) + grad_resized[grad_resized >= 0.5] = 1 + grad_resized[grad_resized < 0.5] = 0 + + dilated = cv2.dilate(grad_resized, kernel, iterations=1) + meanvalue = (1-dilated).mean() + if meanvalue > confidence: + break + else: + outputsize_scale = p_size + + grad_region = cv2.dilate(grad_resized, kernel2, iterations=1) + patch_scale = grad_region.mean() + + return int(outputsize_scale*speed_scale), patch_scale + +# Generate a double-input depth estimation +def doubleestimate(img, size1, size2, pix2pixsize, model, net_type, pix2pixmodel): + # Generate the low resolution estimation + estimate1 = singleestimate(img, size1, model, net_type) + # Resize to the inference size of merge network. + estimate1 = cv2.resize(estimate1, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + + # Generate the high resolution estimation + estimate2 = singleestimate(img, size2, model, net_type) + # Resize to the inference size of merge network. + estimate2 = cv2.resize(estimate2, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + + # Inference on the merge model + pix2pixmodel.set_input(estimate1, estimate2) + pix2pixmodel.test() + visuals = pix2pixmodel.get_current_visuals() + prediction_mapped = visuals['fake_B'] + prediction_mapped = (prediction_mapped+1)/2 + prediction_mapped = (prediction_mapped - torch.min(prediction_mapped)) / ( + torch.max(prediction_mapped) - torch.min(prediction_mapped)) + prediction_mapped = prediction_mapped.squeeze().cpu().numpy() + + return prediction_mapped + +# Generate a single-input depth estimation +def singleestimate(img, msize, model, net_type): + # if net_type == 0: + return estimateleres(img, model, msize, msize) + # else: + # return estimatemidasBoost(img, model, msize, msize) + +def applyGridpatch(blsize, stride, img, box): + # Extract a simple grid patch. + counter1 = 0 + patch_bound_list = {} + for k in range(blsize, img.shape[1] - blsize, stride): + for j in range(blsize, img.shape[0] - blsize, stride): + patch_bound_list[str(counter1)] = {} + patchbounds = [j - blsize, k - blsize, j - blsize + 2 * blsize, k - blsize + 2 * blsize] + patch_bound = [box[0] + patchbounds[1], box[1] + patchbounds[0], patchbounds[3] - patchbounds[1], + patchbounds[2] - patchbounds[0]] + patch_bound_list[str(counter1)]['rect'] = patch_bound + patch_bound_list[str(counter1)]['size'] = patch_bound[2] + counter1 = counter1 + 1 + return patch_bound_list + +# Generating local patches to perform the local refinement described in section 6 of the main paper. +def generatepatchs(img, base_size): + + # Compute the gradients as a proxy of the contextual cues. + img_gray = rgb2gray(img) + whole_grad = np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)) +\ + np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3)) + + threshold = whole_grad[whole_grad > 0].mean() + whole_grad[whole_grad < threshold] = 0 + + # We use the integral image to speed-up the evaluation of the amount of gradients for each patch. + gf = whole_grad.sum()/len(whole_grad.reshape(-1)) + grad_integral_image = cv2.integral(whole_grad) + + # Variables are selected such that the initial patch size would be the receptive field size + # and the stride is set to 1/3 of the receptive field size. + blsize = int(round(base_size/2)) + stride = int(round(blsize*0.75)) + + # Get initial Grid + patch_bound_list = applyGridpatch(blsize, stride, img, [0, 0, 0, 0]) + + # Refine initial Grid of patches by discarding the flat (in terms of gradients of the rgb image) ones. Refine + # each patch size to ensure that there will be enough depth cues for the network to generate a consistent depth map. + print("Selecting patches ...") + patch_bound_list = adaptiveselection(grad_integral_image, patch_bound_list, gf) + + # Sort the patch list to make sure the merging operation will be done with the correct order: starting from biggest + # patch + patchset = sorted(patch_bound_list.items(), key=lambda x: getitem(x[1], 'size'), reverse=True) + return patchset + +def getGF_fromintegral(integralimage, rect): + # Computes the gradient density of a given patch from the gradient integral image. + x1 = rect[1] + x2 = rect[1]+rect[3] + y1 = rect[0] + y2 = rect[0]+rect[2] + value = integralimage[x2, y2]-integralimage[x1, y2]-integralimage[x2, y1]+integralimage[x1, y1] + return value + +# Adaptively select patches +def adaptiveselection(integral_grad, patch_bound_list, gf): + patchlist = {} + count = 0 + height, width = integral_grad.shape + + search_step = int(32/factor) + + # Go through all patches + for c in range(len(patch_bound_list)): + # Get patch + bbox = patch_bound_list[str(c)]['rect'] + + # Compute the amount of gradients present in the patch from the integral image. + cgf = getGF_fromintegral(integral_grad, bbox)/(bbox[2]*bbox[3]) + + # Check if patching is beneficial by comparing the gradient density of the patch to + # the gradient density of the whole image + if cgf >= gf: + bbox_test = bbox.copy() + patchlist[str(count)] = {} + + # Enlarge each patch until the gradient density of the patch is equal + # to the whole image gradient density + while True: + + bbox_test[0] = bbox_test[0] - int(search_step/2) + bbox_test[1] = bbox_test[1] - int(search_step/2) + + bbox_test[2] = bbox_test[2] + search_step + bbox_test[3] = bbox_test[3] + search_step + + # Check if we are still within the image + if bbox_test[0] < 0 or bbox_test[1] < 0 or bbox_test[1] + bbox_test[3] >= height \ + or bbox_test[0] + bbox_test[2] >= width: + break + + # Compare gradient density + cgf = getGF_fromintegral(integral_grad, bbox_test)/(bbox_test[2]*bbox_test[3]) + if cgf < gf: + break + bbox = bbox_test.copy() + + # Add patch to selected patches + patchlist[str(count)]['rect'] = bbox + patchlist[str(count)]['size'] = bbox[2] + count = count + 1 + + # Return selected patches + return patchlist + +def impatch(image, rect): + # Extract the given patch pixels from a given image. + w1 = rect[0] + h1 = rect[1] + w2 = w1 + rect[2] + h2 = h1 + rect[3] + image_patch = image[h1:h2, w1:w2] + return image_patch + +class ImageandPatchs: + def __init__(self, root_dir, name, patchsinfo, rgb_image, scale=1): + self.root_dir = root_dir + self.patchsinfo = patchsinfo + self.name = name + self.patchs = patchsinfo + self.scale = scale + + self.rgb_image = cv2.resize(rgb_image, (round(rgb_image.shape[1]*scale), round(rgb_image.shape[0]*scale)), + interpolation=cv2.INTER_CUBIC) + + self.do_have_estimate = False + self.estimation_updated_image = None + self.estimation_base_image = None + + def __len__(self): + return len(self.patchs) + + def set_base_estimate(self, est): + self.estimation_base_image = est + if self.estimation_updated_image is not None: + self.do_have_estimate = True + + def set_updated_estimate(self, est): + self.estimation_updated_image = est + if self.estimation_base_image is not None: + self.do_have_estimate = True + + def __getitem__(self, index): + patch_id = int(self.patchs[index][0]) + rect = np.array(self.patchs[index][1]['rect']) + msize = self.patchs[index][1]['size'] + + ## applying scale to rect: + rect = np.round(rect * self.scale) + rect = rect.astype('int') + msize = round(msize * self.scale) + + patch_rgb = impatch(self.rgb_image, rect) + if self.do_have_estimate: + patch_whole_estimate_base = impatch(self.estimation_base_image, rect) + patch_whole_estimate_updated = impatch(self.estimation_updated_image, rect) + return {'patch_rgb': patch_rgb, 'patch_whole_estimate_base': patch_whole_estimate_base, + 'patch_whole_estimate_updated': patch_whole_estimate_updated, 'rect': rect, + 'size': msize, 'id': patch_id} + else: + return {'patch_rgb': patch_rgb, 'rect': rect, 'size': msize, 'id': patch_id} + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + """ + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + """ + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + #self.print_options(opt) + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + opt.gpu_ids.append(id) + #if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + self.opt = opt + return self.opt + + +def estimateboost(img, model, model_type, pix2pixmodel, max_res=512): + global whole_size_threshold + + # get settings + if hasattr(opts, 'depthmap_script_boost_rmax'): + whole_size_threshold = opts.depthmap_script_boost_rmax + + if model_type == 0: #leres + net_receptive_field_size = 448 + patch_netsize = 2 * net_receptive_field_size + elif model_type == 1: #dpt_beit_large_512 + net_receptive_field_size = 512 + patch_netsize = 2 * net_receptive_field_size + else: #other midas + net_receptive_field_size = 384 + patch_netsize = 2 * net_receptive_field_size + + gc.collect() + devices.torch_gc() + + # Generate mask used to smoothly blend the local pathc estimations to the base estimate. + # It is arbitrarily large to avoid artifacts during rescaling for each crop. + mask_org = generatemask((3000, 3000)) + mask = mask_org.copy() + + # Value x of R_x defined in the section 5 of the main paper. + r_threshold_value = 0.2 + #if R0: + # r_threshold_value = 0 + + input_resolution = img.shape + scale_threshold = 3 # Allows up-scaling with a scale up to 3 + + # Find the best input resolution R-x. The resolution search described in section 5-double estimation of the main paper and section B of the + # supplementary material. + whole_image_optimal_size, patch_scale = calculateprocessingres(img, net_receptive_field_size, r_threshold_value, scale_threshold, whole_size_threshold) + + # print('wholeImage being processed in :', whole_image_optimal_size) + + # Generate the base estimate using the double estimation. + whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, model_type, pix2pixmodel) + + # Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select + # small high-density regions of the image. + global factor + factor = max(min(1, 4 * patch_scale * whole_image_optimal_size / whole_size_threshold), 0.2) + # print('Adjust factor is:', 1/factor) + + # Check if Local boosting is beneficial. + if max_res < whole_image_optimal_size: + # print("No Local boosting. Specified Max Res is smaller than R20, Returning doubleestimate result") + return cv2.resize(whole_estimate, (input_resolution[1], input_resolution[0]), interpolation=cv2.INTER_CUBIC) + + # Compute the default target resolution. + if img.shape[0] > img.shape[1]: + a = 2 * whole_image_optimal_size + b = round(2 * whole_image_optimal_size * img.shape[1] / img.shape[0]) + else: + a = round(2 * whole_image_optimal_size * img.shape[0] / img.shape[1]) + b = 2 * whole_image_optimal_size + b = int(round(b / factor)) + a = int(round(a / factor)) + + """ + # recompute a, b and saturate to max res. + if max(a,b) > max_res: + print('Default Res is higher than max-res: Reducing final resolution') + if img.shape[0] > img.shape[1]: + a = max_res + b = round(max_res * img.shape[1] / img.shape[0]) + else: + a = round(max_res * img.shape[0] / img.shape[1]) + b = max_res + b = int(b) + a = int(a) + """ + + img = cv2.resize(img, (b, a), interpolation=cv2.INTER_CUBIC) + + # Extract selected patches for local refinement + base_size = net_receptive_field_size * 2 + patchset = generatepatchs(img, base_size) + + # print('Target resolution: ', img.shape) + + # Computing a scale in case user prompted to generate the results as the same resolution of the input. + # Notice that our method output resolution is independent of the input resolution and this parameter will only + # enable a scaling operation during the local patch merge implementation to generate results with the same resolution + # as the input. + """ + if output_resolution == 1: + mergein_scale = input_resolution[0] / img.shape[0] + print('Dynamicly change merged-in resolution; scale:', mergein_scale) + else: + mergein_scale = 1 + """ + # always rescale to input res for now + mergein_scale = input_resolution[0] / img.shape[0] + + imageandpatchs = ImageandPatchs('', '', patchset, img, mergein_scale) + whole_estimate_resized = cv2.resize(whole_estimate, (round(img.shape[1]*mergein_scale), + round(img.shape[0]*mergein_scale)), interpolation=cv2.INTER_CUBIC) + imageandpatchs.set_base_estimate(whole_estimate_resized.copy()) + imageandpatchs.set_updated_estimate(whole_estimate_resized.copy()) + + print('Resulting depthmap resolution will be :', whole_estimate_resized.shape[:2]) + print('Patches to process: '+str(len(imageandpatchs))) + + # Enumerate through all patches, generate their estimations and refining the base estimate. + for patch_ind in range(len(imageandpatchs)): + + # Get patch information + patch = imageandpatchs[patch_ind] # patch object + patch_rgb = patch['patch_rgb'] # rgb patch + patch_whole_estimate_base = patch['patch_whole_estimate_base'] # corresponding patch from base + rect = patch['rect'] # patch size and location + patch_id = patch['id'] # patch ID + org_size = patch_whole_estimate_base.shape # the original size from the unscaled input + print('\t Processing patch', patch_ind, '/', len(imageandpatchs)-1, '|', rect) + + # We apply double estimation for patches. The high resolution value is fixed to twice the receptive + # field size of the network for patches to accelerate the process. + patch_estimation = doubleestimate(patch_rgb, net_receptive_field_size, patch_netsize, pix2pixsize, model, model_type, pix2pixmodel) + patch_estimation = cv2.resize(patch_estimation, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + patch_whole_estimate_base = cv2.resize(patch_whole_estimate_base, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + + # Merging the patch estimation into the base estimate using our merge network: + # We feed the patch estimation and the same region from the updated base estimate to the merge network + # to generate the target estimate for the corresponding region. + pix2pixmodel.set_input(patch_whole_estimate_base, patch_estimation) + + # Run merging network + pix2pixmodel.test() + visuals = pix2pixmodel.get_current_visuals() + + prediction_mapped = visuals['fake_B'] + prediction_mapped = (prediction_mapped+1)/2 + prediction_mapped = prediction_mapped.squeeze().cpu().numpy() + + mapped = prediction_mapped + + # We use a simple linear polynomial to make sure the result of the merge network would match the values of + # base estimate + p_coef = np.polyfit(mapped.reshape(-1), patch_whole_estimate_base.reshape(-1), deg=1) + merged = np.polyval(p_coef, mapped.reshape(-1)).reshape(mapped.shape) + + merged = cv2.resize(merged, (org_size[1],org_size[0]), interpolation=cv2.INTER_CUBIC) + + # Get patch size and location + w1 = rect[0] + h1 = rect[1] + w2 = w1 + rect[2] + h2 = h1 + rect[3] + + # To speed up the implementation, we only generate the Gaussian mask once with a sufficiently large size + # and resize it to our needed size while merging the patches. + if mask.shape != org_size: + mask = cv2.resize(mask_org, (org_size[1],org_size[0]), interpolation=cv2.INTER_LINEAR) + + tobemergedto = imageandpatchs.estimation_updated_image + + # Update the whole estimation: + # We use a simple Gaussian mask to blend the merged patch region with the base estimate to ensure seamless + # blending at the boundaries of the patch region. + tobemergedto[h1:h2, w1:w2] = np.multiply(tobemergedto[h1:h2, w1:w2], 1 - mask) + np.multiply(merged, mask) + imageandpatchs.set_updated_estimate(tobemergedto) + + # output + return cv2.resize(imageandpatchs.estimation_updated_image, (input_resolution[1], input_resolution[0]), interpolation=cv2.INTER_CUBIC) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/multi_depth_model_woauxi.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/multi_depth_model_woauxi.py new file mode 100644 index 0000000000000000000000000000000000000000..822ab0893267042446c2a24ed35b4ea053c9914a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/multi_depth_model_woauxi.py @@ -0,0 +1,34 @@ +from . import network_auxi as network +from .net_tools import get_func +import torch +import torch.nn as nn +from modules import devices + +class RelDepthModel(nn.Module): + def __init__(self, backbone='resnet50'): + super(RelDepthModel, self).__init__() + if backbone == 'resnet50': + encoder = 'resnet50_stride32' + elif backbone == 'resnext101': + encoder = 'resnext101_stride32x8d' + self.depth_model = DepthModel(encoder) + + def inference(self, rgb): + with torch.no_grad(): + input = rgb.to(self.depth_model.device) + depth = self.depth_model(input) + #pred_depth_out = depth - depth.min() + 0.01 + return depth #pred_depth_out + + +class DepthModel(nn.Module): + def __init__(self, encoder): + super(DepthModel, self).__init__() + backbone = network.__name__.split('.')[-1] + '.' + encoder + self.encoder_modules = get_func(backbone)() + self.decoder_modules = network.Decoder() + + def forward(self, x): + lateral_out = self.encoder_modules(x) + out_logit = self.decoder_modules(lateral_out) + return out_logit \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/net_tools.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/net_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..745ba5a0ef19adb869525e6b252db86780b8126e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/net_tools.py @@ -0,0 +1,54 @@ +import importlib +import torch +import os +from collections import OrderedDict + + +def get_func(func_name): + """Helper to return a function object by name. func_name must identify a + function in this module or the path to a function relative to the base + 'modeling' module. + """ + if func_name == '': + return None + try: + parts = func_name.split('.') + # Refers to a function in this module + if len(parts) == 1: + return globals()[parts[0]] + # Otherwise, assume we're referencing a module under modeling + module_name = 'annotator.leres.leres.' + '.'.join(parts[:-1]) + module = importlib.import_module(module_name) + return getattr(module, parts[-1]) + except Exception: + print('Failed to f1ind function: %s', func_name) + raise + +def load_ckpt(args, depth_model, shift_model, focal_model): + """ + Load checkpoint. + """ + if os.path.isfile(args.load_ckpt): + print("loading checkpoint %s" % args.load_ckpt) + checkpoint = torch.load(args.load_ckpt) + if shift_model is not None: + shift_model.load_state_dict(strip_prefix_if_present(checkpoint['shift_model'], 'module.'), + strict=True) + if focal_model is not None: + focal_model.load_state_dict(strip_prefix_if_present(checkpoint['focal_model'], 'module.'), + strict=True) + depth_model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), + strict=True) + del checkpoint + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def strip_prefix_if_present(state_dict, prefix): + keys = sorted(state_dict.keys()) + if not all(key.startswith(prefix) for key in keys): + return state_dict + stripped_state_dict = OrderedDict() + for key, value in state_dict.items(): + stripped_state_dict[key.replace(prefix, "")] = value + return stripped_state_dict \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/network_auxi.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/network_auxi.py new file mode 100644 index 0000000000000000000000000000000000000000..1bd87011a5339aca632d1a10b217c8737bdc794f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/leres/network_auxi.py @@ -0,0 +1,417 @@ +import torch +import torch.nn as nn +import torch.nn.init as init + +from . import Resnet, Resnext_torch + + +def resnet50_stride32(): + return DepthNet(backbone='resnet', depth=50, upfactors=[2, 2, 2, 2]) + +def resnext101_stride32x8d(): + return DepthNet(backbone='resnext101_32x8d', depth=101, upfactors=[2, 2, 2, 2]) + + +class Decoder(nn.Module): + def __init__(self): + super(Decoder, self).__init__() + self.inchannels = [256, 512, 1024, 2048] + self.midchannels = [256, 256, 256, 512] + self.upfactors = [2,2,2,2] + self.outchannels = 1 + + self.conv = FTB(inchannels=self.inchannels[3], midchannels=self.midchannels[3]) + self.conv1 = nn.Conv2d(in_channels=self.midchannels[3], out_channels=self.midchannels[2], kernel_size=3, padding=1, stride=1, bias=True) + self.upsample = nn.Upsample(scale_factor=self.upfactors[3], mode='bilinear', align_corners=True) + + self.ffm2 = FFM(inchannels=self.inchannels[2], midchannels=self.midchannels[2], outchannels = self.midchannels[2], upfactor=self.upfactors[2]) + self.ffm1 = FFM(inchannels=self.inchannels[1], midchannels=self.midchannels[1], outchannels = self.midchannels[1], upfactor=self.upfactors[1]) + self.ffm0 = FFM(inchannels=self.inchannels[0], midchannels=self.midchannels[0], outchannels = self.midchannels[0], upfactor=self.upfactors[0]) + + self.outconv = AO(inchannels=self.midchannels[0], outchannels=self.outchannels, upfactor=2) + self._init_params() + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): #NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + def forward(self, features): + x_32x = self.conv(features[3]) # 1/32 + x_32 = self.conv1(x_32x) + x_16 = self.upsample(x_32) # 1/16 + + x_8 = self.ffm2(features[2], x_16) # 1/8 + x_4 = self.ffm1(features[1], x_8) # 1/4 + x_2 = self.ffm0(features[0], x_4) # 1/2 + #----------------------------------------- + x = self.outconv(x_2) # original size + return x + +class DepthNet(nn.Module): + __factory = { + 18: Resnet.resnet18, + 34: Resnet.resnet34, + 50: Resnet.resnet50, + 101: Resnet.resnet101, + 152: Resnet.resnet152 + } + def __init__(self, + backbone='resnet', + depth=50, + upfactors=[2, 2, 2, 2]): + super(DepthNet, self).__init__() + self.backbone = backbone + self.depth = depth + self.pretrained = False + self.inchannels = [256, 512, 1024, 2048] + self.midchannels = [256, 256, 256, 512] + self.upfactors = upfactors + self.outchannels = 1 + + # Build model + if self.backbone == 'resnet': + if self.depth not in DepthNet.__factory: + raise KeyError("Unsupported depth:", self.depth) + self.encoder = DepthNet.__factory[depth](pretrained=self.pretrained) + elif self.backbone == 'resnext101_32x8d': + self.encoder = Resnext_torch.resnext101_32x8d(pretrained=self.pretrained) + else: + self.encoder = Resnext_torch.resnext101(pretrained=self.pretrained) + + def forward(self, x): + x = self.encoder(x) # 1/32, 1/16, 1/8, 1/4 + return x + + +class FTB(nn.Module): + def __init__(self, inchannels, midchannels=512): + super(FTB, self).__init__() + self.in1 = inchannels + self.mid = midchannels + self.conv1 = nn.Conv2d(in_channels=self.in1, out_channels=self.mid, kernel_size=3, padding=1, stride=1, + bias=True) + # NN.BatchNorm2d + self.conv_branch = nn.Sequential(nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3, + padding=1, stride=1, bias=True), \ + nn.BatchNorm2d(num_features=self.mid), \ + nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3, + padding=1, stride=1, bias=True)) + self.relu = nn.ReLU(inplace=True) + + self.init_params() + + def forward(self, x): + x = self.conv1(x) + x = x + self.conv_branch(x) + x = self.relu(x) + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class ATA(nn.Module): + def __init__(self, inchannels, reduction=8): + super(ATA, self).__init__() + self.inchannels = inchannels + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential(nn.Linear(self.inchannels * 2, self.inchannels // reduction), + nn.ReLU(inplace=True), + nn.Linear(self.inchannels // reduction, self.inchannels), + nn.Sigmoid()) + self.init_params() + + def forward(self, low_x, high_x): + n, c, _, _ = low_x.size() + x = torch.cat([low_x, high_x], 1) + x = self.avg_pool(x) + x = x.view(n, -1) + x = self.fc(x).view(n, c, 1, 1) + x = low_x * x + high_x + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + # init.normal(m.weight, std=0.01) + init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + # init.normal_(m.weight, std=0.01) + init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class FFM(nn.Module): + def __init__(self, inchannels, midchannels, outchannels, upfactor=2): + super(FFM, self).__init__() + self.inchannels = inchannels + self.midchannels = midchannels + self.outchannels = outchannels + self.upfactor = upfactor + + self.ftb1 = FTB(inchannels=self.inchannels, midchannels=self.midchannels) + # self.ata = ATA(inchannels = self.midchannels) + self.ftb2 = FTB(inchannels=self.midchannels, midchannels=self.outchannels) + + self.upsample = nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True) + + self.init_params() + + def forward(self, low_x, high_x): + x = self.ftb1(low_x) + x = x + high_x + x = self.ftb2(x) + x = self.upsample(x) + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class AO(nn.Module): + # Adaptive output module + def __init__(self, inchannels, outchannels, upfactor=2): + super(AO, self).__init__() + self.inchannels = inchannels + self.outchannels = outchannels + self.upfactor = upfactor + + self.adapt_conv = nn.Sequential( + nn.Conv2d(in_channels=self.inchannels, out_channels=self.inchannels // 2, kernel_size=3, padding=1, + stride=1, bias=True), \ + nn.BatchNorm2d(num_features=self.inchannels // 2), \ + nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.inchannels // 2, out_channels=self.outchannels, kernel_size=3, padding=1, + stride=1, bias=True), \ + nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True)) + + self.init_params() + + def forward(self, x): + x = self.adapt_conv(x) + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + + +# ============================================================================================================== + + +class ResidualConv(nn.Module): + def __init__(self, inchannels): + super(ResidualConv, self).__init__() + # NN.BatchNorm2d + self.conv = nn.Sequential( + # nn.BatchNorm2d(num_features=inchannels), + nn.ReLU(inplace=False), + # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=3, padding=1, stride=1, groups=inchannels,bias=True), + # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=1, padding=0, stride=1, groups=1,bias=True) + nn.Conv2d(in_channels=inchannels, out_channels=inchannels / 2, kernel_size=3, padding=1, stride=1, + bias=False), + nn.BatchNorm2d(num_features=inchannels / 2), + nn.ReLU(inplace=False), + nn.Conv2d(in_channels=inchannels / 2, out_channels=inchannels, kernel_size=3, padding=1, stride=1, + bias=False) + ) + self.init_params() + + def forward(self, x): + x = self.conv(x) + x + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class FeatureFusion(nn.Module): + def __init__(self, inchannels, outchannels): + super(FeatureFusion, self).__init__() + self.conv = ResidualConv(inchannels=inchannels) + # NN.BatchNorm2d + self.up = nn.Sequential(ResidualConv(inchannels=inchannels), + nn.ConvTranspose2d(in_channels=inchannels, out_channels=outchannels, kernel_size=3, + stride=2, padding=1, output_padding=1), + nn.BatchNorm2d(num_features=outchannels), + nn.ReLU(inplace=True)) + + def forward(self, lowfeat, highfeat): + return self.up(highfeat + self.conv(lowfeat)) + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class SenceUnderstand(nn.Module): + def __init__(self, channels): + super(SenceUnderstand, self).__init__() + self.channels = channels + self.conv1 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(inplace=True)) + self.pool = nn.AdaptiveAvgPool2d(8) + self.fc = nn.Sequential(nn.Linear(512 * 8 * 8, self.channels), + nn.ReLU(inplace=True)) + self.conv2 = nn.Sequential( + nn.Conv2d(in_channels=self.channels, out_channels=self.channels, kernel_size=1, padding=0), + nn.ReLU(inplace=True)) + self.initial_params() + + def forward(self, x): + n, c, h, w = x.size() + x = self.conv1(x) + x = self.pool(x) + x = x.view(n, -1) + x = self.fc(x) + x = x.view(n, self.channels, 1, 1) + x = self.conv2(x) + x = x.repeat(1, 1, h, w) + return x + + def initial_params(self, dev=0.01): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # print torch.sum(m.weight) + m.weight.data.normal_(0, dev) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, nn.ConvTranspose2d): + # print torch.sum(m.weight) + m.weight.data.normal_(0, dev) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, dev) + + +if __name__ == '__main__': + net = DepthNet(depth=50, pretrained=True) + print(net) + inputs = torch.ones(4,3,128,128) + out = net(inputs) + print(out.size()) + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..38b1a24fd389a138b930dcf1ee606ef97a0186c8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/LICENSE @@ -0,0 +1,19 @@ +https://github.com/compphoto/BoostingMonocularDepth + +Copyright 2021, Seyed Mahdi Hosseini Miangoleh, Sebastian Dille, Computational Photography Laboratory. All rights reserved. + +This software is for academic use only. A redistribution of this +software, with or without modifications, has to be for academic +use only, while giving the appropriate credit to the original +authors of the software. The methods implemented as a part of +this software may be covered under patents or patent applications. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f96e5c7f032f2154c6bb433b68fc968d0a19b5a8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/__init__.py @@ -0,0 +1,67 @@ +"""This package contains modules related to objective functions, optimizations, and network architectures. + +To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. +You need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate loss, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + +In the function <__init__>, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. + +Now you can use the model class by specifying flag '--model dummy'. +See our template model class 'template_model.py' for more details. +""" + +import importlib +from .base_model import BaseModel + + +def find_model_using_name(model_name): + """Import the module "models/[model_name]_model.py". + + In the file, the class called DatasetNameModel() will + be instantiated. It has to be a subclass of BaseModel, + and it is case-insensitive. + """ + model_filename = "annotator.leres.pix2pix.models." + model_name + "_model" + modellib = importlib.import_module(model_filename) + model = None + target_model_name = model_name.replace('_', '') + 'model' + for name, cls in modellib.__dict__.items(): + if name.lower() == target_model_name.lower() \ + and issubclass(cls, BaseModel): + model = cls + + if model is None: + print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) + exit(0) + + return model + + +def get_option_setter(model_name): + """Return the static method of the model class.""" + model_class = find_model_using_name(model_name) + return model_class.modify_commandline_options + + +def create_model(opt): + """Create a model given the option. + + This function warps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from models import create_model + >>> model = create_model(opt) + """ + model = find_model_using_name(opt.model) + instance = model(opt) + print("model [%s] was created" % type(instance).__name__) + return instance diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a90c5f832404bc44ef247b42a72988a37fc834cb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model.py @@ -0,0 +1,241 @@ +import os +import torch, gc +from modules import devices +from collections import OrderedDict +from abc import ABC, abstractmethod +from . import networks + + +class BaseModel(ABC): + """This class is an abstract base class (ABC) for models. + To create a subclass, you need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate losses, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + """ + + def __init__(self, opt): + """Initialize the BaseModel class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + + When creating your custom class, you need to implement your own initialization. + In this function, you should first call + Then, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. + """ + self.opt = opt + self.gpu_ids = opt.gpu_ids + self.isTrain = opt.isTrain + self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') # get device name: CPU or GPU + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir + if opt.preprocess != 'scale_width': # with [scale_width], input images might have different sizes, which hurts the performance of cudnn.benchmark. + torch.backends.cudnn.benchmark = True + self.loss_names = [] + self.model_names = [] + self.visual_names = [] + self.optimizers = [] + self.image_paths = [] + self.metric = 0 # used for learning rate policy 'plateau' + + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new model-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + return parser + + @abstractmethod + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): includes the data itself and its metadata information. + """ + pass + + @abstractmethod + def forward(self): + """Run forward pass; called by both functions and .""" + pass + + @abstractmethod + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + pass + + def setup(self, opt): + """Load and print networks; create schedulers + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + if self.isTrain: + self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers] + if not self.isTrain or opt.continue_train: + load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch + self.load_networks(load_suffix) + self.print_networks(opt.verbose) + + def eval(self): + """Make models eval mode during test time""" + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, 'net' + name) + net.eval() + + def test(self): + """Forward function used in test time. + + This function wraps function in no_grad() so we don't save intermediate steps for backprop + It also calls to produce additional visualization results + """ + with torch.no_grad(): + self.forward() + self.compute_visuals() + + def compute_visuals(self): + """Calculate additional output images for visdom and HTML visualization""" + pass + + def get_image_paths(self): + """ Return image paths that are used to load current data""" + return self.image_paths + + def update_learning_rate(self): + """Update learning rates for all the networks; called at the end of every epoch""" + old_lr = self.optimizers[0].param_groups[0]['lr'] + for scheduler in self.schedulers: + if self.opt.lr_policy == 'plateau': + scheduler.step(self.metric) + else: + scheduler.step() + + lr = self.optimizers[0].param_groups[0]['lr'] + print('learning rate %.7f -> %.7f' % (old_lr, lr)) + + def get_current_visuals(self): + """Return visualization images. train.py will display these images with visdom, and save the images to a HTML""" + visual_ret = OrderedDict() + for name in self.visual_names: + if isinstance(name, str): + visual_ret[name] = getattr(self, name) + return visual_ret + + def get_current_losses(self): + """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" + errors_ret = OrderedDict() + for name in self.loss_names: + if isinstance(name, str): + errors_ret[name] = float(getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number + return errors_ret + + def save_networks(self, epoch): + """Save all the networks to the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + save_filename = '%s_net_%s.pth' % (epoch, name) + save_path = os.path.join(self.save_dir, save_filename) + net = getattr(self, 'net' + name) + + if len(self.gpu_ids) > 0 and torch.cuda.is_available(): + torch.save(net.module.cpu().state_dict(), save_path) + net.cuda(self.gpu_ids[0]) + else: + torch.save(net.cpu().state_dict(), save_path) + + def unload_network(self, name): + """Unload network and gc. + """ + if isinstance(name, str): + net = getattr(self, 'net' + name) + del net + gc.collect() + devices.torch_gc() + return None + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def load_networks(self, epoch): + """Load all the networks from the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + load_filename = '%s_net_%s.pth' % (epoch, name) + load_path = os.path.join(self.save_dir, load_filename) + net = getattr(self, 'net' + name) + if isinstance(net, torch.nn.DataParallel): + net = net.module + # print('Loading depth boost model from %s' % load_path) + # if you are using PyTorch newer than 0.4 (e.g., built from + # GitHub source), you can remove str() on self.device + state_dict = torch.load(load_path, map_location=str(self.device)) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) + net.load_state_dict(state_dict) + + def print_networks(self, verbose): + """Print the total number of parameters in the network and (if verbose) network architecture + + Parameters: + verbose (bool) -- if verbose: print the network architecture + """ + print('---------- Networks initialized -------------') + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, 'net' + name) + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + if verbose: + print(net) + print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6)) + print('-----------------------------------------------') + + def set_requires_grad(self, nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model_hg.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model_hg.py new file mode 100644 index 0000000000000000000000000000000000000000..1709accdf0b048b3793dfd1f58d1b06c35f7b907 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/base_model_hg.py @@ -0,0 +1,58 @@ +import os +import torch + +class BaseModelHG(): + def name(self): + return 'BaseModel' + + def initialize(self, opt): + self.opt = opt + self.gpu_ids = opt.gpu_ids + self.isTrain = opt.isTrain + self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) + + def set_input(self, input): + self.input = input + + def forward(self): + pass + + # used in test time, no backprop + def test(self): + pass + + def get_image_paths(self): + pass + + def optimize_parameters(self): + pass + + def get_current_visuals(self): + return self.input + + def get_current_errors(self): + return {} + + def save(self, label): + pass + + # helper saving function that can be used by subclasses + def save_network(self, network, network_label, epoch_label, gpu_ids): + save_filename = '_%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(self.save_dir, save_filename) + torch.save(network.cpu().state_dict(), save_path) + if len(gpu_ids) and torch.cuda.is_available(): + network.cuda(device_id=gpu_ids[0]) + + # helper loading function that can be used by subclasses + def load_network(self, network, network_label, epoch_label): + save_filename = '%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(self.save_dir, save_filename) + print(save_path) + model = torch.load(save_path) + return model + # network.load_state_dict(torch.load(save_path)) + + def update_learning_rate(): + pass diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/networks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..0cf912b2973721a02deefd042af621e732bad59f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/networks.py @@ -0,0 +1,623 @@ +import torch +import torch.nn as nn +from torch.nn import init +import functools +from torch.optim import lr_scheduler + + +############################################################################### +# Helper Functions +############################################################################### + + +class Identity(nn.Module): + def forward(self, x): + return x + + +def get_norm_layer(norm_type='instance'): + """Return a normalization layer + + Parameters: + norm_type (str) -- the name of the normalization layer: batch | instance | none + + For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). + For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. + """ + if norm_type == 'batch': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) + elif norm_type == 'instance': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + elif norm_type == 'none': + def norm_layer(x): return Identity() + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + return norm_layer + + +def get_scheduler(optimizer, opt): + """Return a learning rate scheduler + + Parameters: + optimizer -- the optimizer of the network + opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.  + opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine + + For 'linear', we keep the same learning rate for the first epochs + and linearly decay the rate to zero over the next epochs. + For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. + See https://pytorch.org/docs/stable/optim.html for more details. + """ + if opt.lr_policy == 'linear': + def lambda_rule(epoch): + lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs_decay + 1) + return lr_l + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) + elif opt.lr_policy == 'step': + scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) + elif opt.lr_policy == 'plateau': + scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + elif opt.lr_policy == 'cosine': + scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0) + else: + return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) + return scheduler + + +def init_weights(net, init_type='normal', init_gain=0.02): + """Initialize network weights. + + Parameters: + net (network) -- network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + + We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might + work better for some applications. Feel free to try yourself. + """ + def init_func(m): # define the initialization function + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, init_gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=init_gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=init_gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + init.normal_(m.weight.data, 1.0, init_gain) + init.constant_(m.bias.data, 0.0) + + # print('initialize network with %s' % init_type) + net.apply(init_func) # apply the initialization function + + +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights + Parameters: + net (network) -- the network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Return an initialized network. + """ + if len(gpu_ids) > 0: + assert(torch.cuda.is_available()) + net.to(gpu_ids[0]) + net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs + init_weights(net, init_type, init_gain=init_gain) + return net + + +def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + netG (str) -- the architecture's name: resnet_9blocks | resnet_6blocks | unet_256 | unet_128 + norm (str) -- the name of normalization layers used in the network: batch | instance | none + use_dropout (bool) -- if use dropout layers. + init_type (str) -- the name of our initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a generator + + Our current implementation provides two types of generators: + U-Net: [unet_128] (for 128x128 input images) and [unet_256] (for 256x256 input images) + The original U-Net paper: https://arxiv.org/abs/1505.04597 + + Resnet-based generator: [resnet_6blocks] (with 6 Resnet blocks) and [resnet_9blocks] (with 9 Resnet blocks) + Resnet-based generator consists of several Resnet blocks between a few downsampling/upsampling operations. + We adapt Torch code from Justin Johnson's neural style transfer project (https://github.com/jcjohnson/fast-neural-style). + + + The generator has been initialized by . It uses RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netG == 'resnet_9blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9) + elif netG == 'resnet_6blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + elif netG == 'resnet_12blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=12) + elif netG == 'unet_128': + net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_256': + net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_672': + net = UnetGenerator(input_nc, output_nc, 5, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_960': + net = UnetGenerator(input_nc, output_nc, 6, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_1024': + net = UnetGenerator(input_nc, output_nc, 10, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + else: + raise NotImplementedError('Generator model name [%s] is not recognized' % netG) + return init_net(net, init_type, init_gain, gpu_ids) + + +def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the first conv layer + netD (str) -- the architecture's name: basic | n_layers | pixel + n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' + norm (str) -- the type of normalization layers used in the network. + init_type (str) -- the name of the initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a discriminator + + Our current implementation provides three types of discriminators: + [basic]: 'PatchGAN' classifier described in the original pix2pix paper. + It can classify whether 70×70 overlapping patches are real or fake. + Such a patch-level discriminator architecture has fewer parameters + than a full-image discriminator and can work on arbitrarily-sized images + in a fully convolutional fashion. + + [n_layers]: With this mode, you can specify the number of conv layers in the discriminator + with the parameter (default=3 as used in [basic] (PatchGAN).) + + [pixel]: 1x1 PixelGAN discriminator can classify whether a pixel is real or not. + It encourages greater color diversity but has no effect on spatial statistics. + + The discriminator has been initialized by . It uses Leakly RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netD == 'basic': # default PatchGAN classifier + net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer) + elif netD == 'n_layers': # more options + net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer) + elif netD == 'pixel': # classify if each pixel is real or fake + net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer) + else: + raise NotImplementedError('Discriminator model name [%s] is not recognized' % netD) + return init_net(net, init_type, init_gain, gpu_ids) + + +############################################################################## +# Classes +############################################################################## +class GANLoss(nn.Module): + """Define different GAN objectives. + + The GANLoss class abstracts away the need to create the target label tensor + that has the same size as the input. + """ + + def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): + """ Initialize the GANLoss class. + + Parameters: + gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp. + target_real_label (bool) - - label for a real image + target_fake_label (bool) - - label of a fake image + + Note: Do not use sigmoid as the last layer of Discriminator. + LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss. + """ + super(GANLoss, self).__init__() + self.register_buffer('real_label', torch.tensor(target_real_label)) + self.register_buffer('fake_label', torch.tensor(target_fake_label)) + self.gan_mode = gan_mode + if gan_mode == 'lsgan': + self.loss = nn.MSELoss() + elif gan_mode == 'vanilla': + self.loss = nn.BCEWithLogitsLoss() + elif gan_mode in ['wgangp']: + self.loss = None + else: + raise NotImplementedError('gan mode %s not implemented' % gan_mode) + + def get_target_tensor(self, prediction, target_is_real): + """Create label tensors with the same size as the input. + + Parameters: + prediction (tensor) - - tpyically the prediction from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + A label tensor filled with ground truth label, and with the size of the input + """ + + if target_is_real: + target_tensor = self.real_label + else: + target_tensor = self.fake_label + return target_tensor.expand_as(prediction) + + def __call__(self, prediction, target_is_real): + """Calculate loss given Discriminator's output and grount truth labels. + + Parameters: + prediction (tensor) - - tpyically the prediction output from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + the calculated loss. + """ + if self.gan_mode in ['lsgan', 'vanilla']: + target_tensor = self.get_target_tensor(prediction, target_is_real) + loss = self.loss(prediction, target_tensor) + elif self.gan_mode == 'wgangp': + if target_is_real: + loss = -prediction.mean() + else: + loss = prediction.mean() + return loss + + +def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0): + """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028 + + Arguments: + netD (network) -- discriminator network + real_data (tensor array) -- real images + fake_data (tensor array) -- generated images from the generator + device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') + type (str) -- if we mix real and fake data or not [real | fake | mixed]. + constant (float) -- the constant used in formula ( ||gradient||_2 - constant)^2 + lambda_gp (float) -- weight for this loss + + Returns the gradient penalty loss + """ + if lambda_gp > 0.0: + if type == 'real': # either use real images, fake images, or a linear interpolation of two. + interpolatesv = real_data + elif type == 'fake': + interpolatesv = fake_data + elif type == 'mixed': + alpha = torch.rand(real_data.shape[0], 1, device=device) + alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape) + interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) + else: + raise NotImplementedError('{} not implemented'.format(type)) + interpolatesv.requires_grad_(True) + disc_interpolates = netD(interpolatesv) + gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv, + grad_outputs=torch.ones(disc_interpolates.size()).to(device), + create_graph=True, retain_graph=True, only_inputs=True) + gradients = gradients[0].view(real_data.size(0), -1) # flat the data + gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp # added eps + return gradient_penalty, gradients + else: + return 0.0, None + + +class ResnetGenerator(nn.Module): + """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. + + We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) + """ + + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + """Construct a Resnet-based generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers + n_blocks (int) -- the number of ResNet blocks + padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero + """ + assert(n_blocks >= 0) + super(ResnetGenerator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), + norm_layer(ngf), + nn.ReLU(True)] + + n_downsampling = 2 + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU(True)] + + mult = 2 ** n_downsampling + for i in range(n_blocks): # add ResNet blocks + + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + model += [nn.Tanh()] + + self.model = nn.Sequential(*model) + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class ResnetBlock(nn.Module): + """Define a Resnet block""" + + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Initialize the Resnet block + + A resnet block is a conv block with skip connections + We construct a conv block with build_conv_block function, + and implement skip connections in function. + Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf + """ + super(ResnetBlock, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) + + def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Construct a convolutional block. + + Parameters: + dim (int) -- the number of channels in the conv layer. + padding_type (str) -- the name of padding layer: reflect | replicate | zero + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + use_bias (bool) -- if the conv layer uses bias or not + + Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU)) + """ + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + """Forward function (with skip connections)""" + out = x + self.conv_block(x) # add skip connections + return out + + +class UnetGenerator(nn.Module): + """Create a Unet-based generator""" + + def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ + super(UnetGenerator, self).__init__() + # construct unet structure + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer + for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + # gradually reduce the number of filters from ngf * 8 to ngf + unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + + def __init__(self, outer_nc, inner_nc, input_nc=None, + submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet submodule with skip connections. + + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + """ + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, + stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: # add skip connections + return torch.cat([x, self.model(x)], 1) + + +class NLayerDiscriminator(nn.Module): + """Defines a PatchGAN discriminator""" + + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d): + """Construct a PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + n_layers (int) -- the number of conv layers in the discriminator + norm_layer -- normalization layer + """ + super(NLayerDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + kw = 4 + padw = 1 + sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)] + nf_mult = 1 + nf_mult_prev = 1 + for n in range(1, n_layers): # gradually increase the number of filters + nf_mult_prev = nf_mult + nf_mult = min(2 ** n, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + nf_mult_prev = nf_mult + nf_mult = min(2 ** n_layers, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map + self.model = nn.Sequential(*sequence) + + def forward(self, input): + """Standard forward.""" + return self.model(input) + + +class PixelDiscriminator(nn.Module): + """Defines a 1x1 PatchGAN discriminator (pixelGAN)""" + + def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d): + """Construct a 1x1 PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + """ + super(PixelDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + self.net = [ + nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias), + norm_layer(ndf * 2), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] + + self.net = nn.Sequential(*self.net) + + def forward(self, input): + """Standard forward.""" + return self.net(input) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/pix2pix4depth_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/pix2pix4depth_model.py new file mode 100644 index 0000000000000000000000000000000000000000..89e89652feb96314973a050c5a2477b474630abb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/models/pix2pix4depth_model.py @@ -0,0 +1,155 @@ +import torch +from .base_model import BaseModel +from . import networks + + +class Pix2Pix4DepthModel(BaseModel): + """ This class implements the pix2pix model, for learning a mapping from input images to output images given paired data. + + The model training requires '--dataset_mode aligned' dataset. + By default, it uses a '--netG unet256' U-Net generator, + a '--netD basic' discriminator (PatchGAN), + and a '--gan_mode' vanilla GAN loss (the cross-entropy objective used in the orignal GAN paper). + + pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf + """ + @staticmethod + def modify_commandline_options(parser, is_train=True): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + + For pix2pix, we do not use image buffer + The training objective is: GAN Loss + lambda_L1 * ||G(A)-B||_1 + By default, we use vanilla GAN loss, UNet with batchnorm, and aligned datasets. + """ + # changing the default values to match the pix2pix paper (https://phillipi.github.io/pix2pix/) + parser.set_defaults(input_nc=2,output_nc=1,norm='none', netG='unet_1024', dataset_mode='depthmerge') + if is_train: + parser.set_defaults(pool_size=0, gan_mode='vanilla',) + parser.add_argument('--lambda_L1', type=float, default=1000, help='weight for L1 loss') + return parser + + def __init__(self, opt): + """Initialize the pix2pix class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + BaseModel.__init__(self, opt) + # specify the training losses you want to print out. The training/test scripts will call + + self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake'] + # self.loss_names = ['G_L1'] + + # specify the images you want to save/display. The training/test scripts will call + if self.isTrain: + self.visual_names = ['outer','inner', 'fake_B', 'real_B'] + else: + self.visual_names = ['fake_B'] + + # specify the models you want to save to the disk. The training/test scripts will call and + if self.isTrain: + self.model_names = ['G','D'] + else: # during test time, only load G + self.model_names = ['G'] + + # define networks (both generator and discriminator) + self.netG = networks.define_G(opt.input_nc, opt.output_nc, 64, 'unet_1024', 'none', + False, 'normal', 0.02, self.gpu_ids) + + if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc + self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD, + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) + + if self.isTrain: + # define loss functions + self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device) + self.criterionL1 = torch.nn.L1Loss() + # initialize optimizers; schedulers will be automatically created by function . + self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=1e-4, betas=(opt.beta1, 0.999)) + self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=2e-06, betas=(opt.beta1, 0.999)) + self.optimizers.append(self.optimizer_G) + self.optimizers.append(self.optimizer_D) + + def set_input_train(self, input): + self.outer = input['data_outer'].to(self.device) + self.outer = torch.nn.functional.interpolate(self.outer,(1024,1024),mode='bilinear',align_corners=False) + + self.inner = input['data_inner'].to(self.device) + self.inner = torch.nn.functional.interpolate(self.inner,(1024,1024),mode='bilinear',align_corners=False) + + self.image_paths = input['image_path'] + + if self.isTrain: + self.gtfake = input['data_gtfake'].to(self.device) + self.gtfake = torch.nn.functional.interpolate(self.gtfake, (1024, 1024), mode='bilinear', align_corners=False) + self.real_B = self.gtfake + + self.real_A = torch.cat((self.outer, self.inner), 1) + + def set_input(self, outer, inner): + inner = torch.from_numpy(inner).unsqueeze(0).unsqueeze(0) + outer = torch.from_numpy(outer).unsqueeze(0).unsqueeze(0) + + inner = (inner - torch.min(inner))/(torch.max(inner)-torch.min(inner)) + outer = (outer - torch.min(outer))/(torch.max(outer)-torch.min(outer)) + + inner = self.normalize(inner) + outer = self.normalize(outer) + + self.real_A = torch.cat((outer, inner), 1).to(self.device) + + + def normalize(self, input): + input = input * 2 + input = input - 1 + return input + + def forward(self): + """Run forward pass; called by both functions and .""" + self.fake_B = self.netG(self.real_A) # G(A) + + def backward_D(self): + """Calculate GAN loss for the discriminator""" + # Fake; stop backprop to the generator by detaching fake_B + fake_AB = torch.cat((self.real_A, self.fake_B), 1) # we use conditional GANs; we need to feed both input and output to the discriminator + pred_fake = self.netD(fake_AB.detach()) + self.loss_D_fake = self.criterionGAN(pred_fake, False) + # Real + real_AB = torch.cat((self.real_A, self.real_B), 1) + pred_real = self.netD(real_AB) + self.loss_D_real = self.criterionGAN(pred_real, True) + # combine loss and calculate gradients + self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5 + self.loss_D.backward() + + def backward_G(self): + """Calculate GAN and L1 loss for the generator""" + # First, G(A) should fake the discriminator + fake_AB = torch.cat((self.real_A, self.fake_B), 1) + pred_fake = self.netD(fake_AB) + self.loss_G_GAN = self.criterionGAN(pred_fake, True) + # Second, G(A) = B + self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_L1 + # combine loss and calculate gradients + self.loss_G = self.loss_G_L1 + self.loss_G_GAN + self.loss_G.backward() + + def optimize_parameters(self): + self.forward() # compute fake images: G(A) + # update D + self.set_requires_grad(self.netD, True) # enable backprop for D + self.optimizer_D.zero_grad() # set D's gradients to zero + self.backward_D() # calculate gradients for D + self.optimizer_D.step() # update D's weights + # update G + self.set_requires_grad(self.netD, False) # D requires no gradients when optimizing G + self.optimizer_G.zero_grad() # set G's gradients to zero + self.backward_G() # calculate graidents for G + self.optimizer_G.step() # udpate G's weights \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7eedebe54aa70169fd25951b3034d819e396c90 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/__init__.py @@ -0,0 +1 @@ +"""This package options includes option modules: training options, test options, and basic options (used in both training and test).""" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/base_options.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/base_options.py new file mode 100644 index 0000000000000000000000000000000000000000..533a1e88a7e8494223f6994e6861c93667754f83 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/base_options.py @@ -0,0 +1,156 @@ +import argparse +import os +from ...pix2pix.util import util +# import torch +from ...pix2pix import models +# import pix2pix.data +import numpy as np + +class BaseOptions(): + """This class defines options used during both training and test time. + + It also implements several helper functions such as parsing, printing, and saving the options. + It also gathers additional options defined in functions in both dataset class and model class. + """ + + def __init__(self): + """Reset the class; indicates the class hasn't been initailized""" + self.initialized = False + + def initialize(self, parser): + """Define the common options that are used in both training and test.""" + # basic parameters + parser.add_argument('--dataroot', help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') + parser.add_argument('--name', type=str, default='void', help='mahdi_unet_new, scaled_unet') + parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--checkpoints_dir', type=str, default='./pix2pix/checkpoints', help='models are saved here') + # model parameters + parser.add_argument('--model', type=str, default='cycle_gan', help='chooses which model to use. [cycle_gan | pix2pix | test | colorization]') + parser.add_argument('--input_nc', type=int, default=2, help='# of input image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--output_nc', type=int, default=1, help='# of output image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') + parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') + parser.add_argument('--netD', type=str, default='basic', help='specify discriminator architecture [basic | n_layers | pixel]. The basic model is a 70x70 PatchGAN. n_layers allows you to specify the layers in the discriminator') + parser.add_argument('--netG', type=str, default='resnet_9blocks', help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]') + parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') + parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization [instance | batch | none]') + parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal | xavier | kaiming | orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') + # dataset parameters + parser.add_argument('--dataset_mode', type=str, default='unaligned', help='chooses how datasets are loaded. [unaligned | aligned | single | colorization]') + parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') + parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') + parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data') + parser.add_argument('--batch_size', type=int, default=1, help='input batch size') + parser.add_argument('--load_size', type=int, default=672, help='scale images to this size') + parser.add_argument('--crop_size', type=int, default=672, help='then crop to this size') + parser.add_argument('--max_dataset_size', type=int, default=10000, help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') + parser.add_argument('--preprocess', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop | crop | scale_width | scale_width_and_crop | none]') + parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') + parser.add_argument('--display_winsize', type=int, default=256, help='display window size for both visdom and HTML') + # additional parameters + parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') + parser.add_argument('--load_iter', type=int, default='0', help='which iteration to load? if load_iter > 0, the code will load models by iter_[load_iter]; otherwise, the code will load models by [epoch]') + parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') + parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}') + + parser.add_argument('--data_dir', type=str, required=False, + help='input files directory images can be .png .jpg .tiff') + parser.add_argument('--output_dir', type=str, required=False, + help='result dir. result depth will be png. vides are JMPG as avi') + parser.add_argument('--savecrops', type=int, required=False) + parser.add_argument('--savewholeest', type=int, required=False) + parser.add_argument('--output_resolution', type=int, required=False, + help='0 for no restriction 1 for resize to input size') + parser.add_argument('--net_receptive_field_size', type=int, required=False) + parser.add_argument('--pix2pixsize', type=int, required=False) + parser.add_argument('--generatevideo', type=int, required=False) + parser.add_argument('--depthNet', type=int, required=False, help='0: midas 1:strurturedRL') + parser.add_argument('--R0', action='store_true') + parser.add_argument('--R20', action='store_true') + parser.add_argument('--Final', action='store_true') + parser.add_argument('--colorize_results', action='store_true') + parser.add_argument('--max_res', type=float, default=np.inf) + + self.initialized = True + return parser + + def gather_options(self): + """Initialize our parser with basic options(only once). + Add additional model-specific and dataset-specific options. + These options are defined in the function + in model and dataset classes. + """ + if not self.initialized: # check if it has been initialized + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = self.initialize(parser) + + # get the basic options + opt, _ = parser.parse_known_args() + + # modify model-related parser options + model_name = opt.model + model_option_setter = models.get_option_setter(model_name) + parser = model_option_setter(parser, self.isTrain) + opt, _ = parser.parse_known_args() # parse again with new defaults + + # modify dataset-related parser options + # dataset_name = opt.dataset_mode + # dataset_option_setter = pix2pix.data.get_option_setter(dataset_name) + # parser = dataset_option_setter(parser, self.isTrain) + + # save and return the parser + self.parser = parser + #return parser.parse_args() #EVIL + return opt + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + #self.print_options(opt) + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + opt.gpu_ids.append(id) + #if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + self.opt = opt + return self.opt diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/test_options.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/test_options.py new file mode 100644 index 0000000000000000000000000000000000000000..a3424b5e3b66d6813f74c8cecad691d7488d121c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/options/test_options.py @@ -0,0 +1,22 @@ +from .base_options import BaseOptions + + +class TestOptions(BaseOptions): + """This class includes test options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) # define shared options + parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') + parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') + # Dropout and Batchnorm has different behavioir during training and test. + parser.add_argument('--eval', action='store_true', help='use eval mode during test time.') + parser.add_argument('--num_test', type=int, default=50, help='how many test images to run') + # rewrite devalue values + parser.set_defaults(model='pix2pix4depth') + # To avoid cropping, the load_size should be the same as crop_size + parser.set_defaults(load_size=parser.get_default('crop_size')) + self.isTrain = False + return parser diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ae36f63d8859ec0c60dcbfe67c4ac324e751ddf7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/__init__.py @@ -0,0 +1 @@ +"""This package includes a miscellaneous collection of useful helper functions.""" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/get_data.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/get_data.py new file mode 100644 index 0000000000000000000000000000000000000000..97edc3ce3c3ab6d6080dca34e73a5fb77bb715fb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/get_data.py @@ -0,0 +1,110 @@ +from __future__ import print_function +import os +import tarfile +import requests +from warnings import warn +from zipfile import ZipFile +from bs4 import BeautifulSoup +from os.path import abspath, isdir, join, basename + + +class GetData(object): + """A Python script for downloading CycleGAN or pix2pix datasets. + + Parameters: + technique (str) -- One of: 'cyclegan' or 'pix2pix'. + verbose (bool) -- If True, print additional information. + + Examples: + >>> from util.get_data import GetData + >>> gd = GetData(technique='cyclegan') + >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed. + + Alternatively, You can use bash scripts: 'scripts/download_pix2pix_model.sh' + and 'scripts/download_cyclegan_model.sh'. + """ + + def __init__(self, technique='cyclegan', verbose=True): + url_dict = { + 'pix2pix': 'http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/', + 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets' + } + self.url = url_dict.get(technique.lower()) + self._verbose = verbose + + def _print(self, text): + if self._verbose: + print(text) + + @staticmethod + def _get_options(r): + soup = BeautifulSoup(r.text, 'lxml') + options = [h.text for h in soup.find_all('a', href=True) + if h.text.endswith(('.zip', 'tar.gz'))] + return options + + def _present_options(self): + r = requests.get(self.url) + options = self._get_options(r) + print('Options:\n') + for i, o in enumerate(options): + print("{0}: {1}".format(i, o)) + choice = input("\nPlease enter the number of the " + "dataset above you wish to download:") + return options[int(choice)] + + def _download_data(self, dataset_url, save_path): + if not isdir(save_path): + os.makedirs(save_path) + + base = basename(dataset_url) + temp_save_path = join(save_path, base) + + with open(temp_save_path, "wb") as f: + r = requests.get(dataset_url) + f.write(r.content) + + if base.endswith('.tar.gz'): + obj = tarfile.open(temp_save_path) + elif base.endswith('.zip'): + obj = ZipFile(temp_save_path, 'r') + else: + raise ValueError("Unknown File Type: {0}.".format(base)) + + self._print("Unpacking Data...") + obj.extractall(save_path) + obj.close() + os.remove(temp_save_path) + + def get(self, save_path, dataset=None): + """ + + Download a dataset. + + Parameters: + save_path (str) -- A directory to save the data to. + dataset (str) -- (optional). A specific dataset to download. + Note: this must include the file extension. + If None, options will be presented for you + to choose from. + + Returns: + save_path_full (str) -- the absolute path to the downloaded data. + + """ + if dataset is None: + selected_dataset = self._present_options() + else: + selected_dataset = dataset + + save_path_full = join(save_path, selected_dataset.split('.')[0]) + + if isdir(save_path_full): + warn("\n'{0}' already exists. Voiding Download.".format( + save_path_full)) + else: + self._print('Downloading Data...') + url = "{0}/{1}".format(self.url, selected_dataset) + self._download_data(url, save_path=save_path) + + return abspath(save_path_full) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/guidedfilter.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/guidedfilter.py new file mode 100644 index 0000000000000000000000000000000000000000..d377ff12e078a5f156e9246b63573dae71825fad --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/guidedfilter.py @@ -0,0 +1,47 @@ +import numpy as np + +class GuidedFilter(): + def __init__(self, source, reference, r=64, eps= 0.05**2): + self.source = source; + self.reference = reference; + self.r = r + self.eps = eps + + self.smooth = self.guidedfilter(self.source,self.reference,self.r,self.eps) + + def boxfilter(self,img, r): + (rows, cols) = img.shape + imDst = np.zeros_like(img) + + imCum = np.cumsum(img, 0) + imDst[0 : r+1, :] = imCum[r : 2*r+1, :] + imDst[r+1 : rows-r, :] = imCum[2*r+1 : rows, :] - imCum[0 : rows-2*r-1, :] + imDst[rows-r: rows, :] = np.tile(imCum[rows-1, :], [r, 1]) - imCum[rows-2*r-1 : rows-r-1, :] + + imCum = np.cumsum(imDst, 1) + imDst[:, 0 : r+1] = imCum[:, r : 2*r+1] + imDst[:, r+1 : cols-r] = imCum[:, 2*r+1 : cols] - imCum[:, 0 : cols-2*r-1] + imDst[:, cols-r: cols] = np.tile(imCum[:, cols-1], [r, 1]).T - imCum[:, cols-2*r-1 : cols-r-1] + + return imDst + + def guidedfilter(self,I, p, r, eps): + (rows, cols) = I.shape + N = self.boxfilter(np.ones([rows, cols]), r) + + meanI = self.boxfilter(I, r) / N + meanP = self.boxfilter(p, r) / N + meanIp = self.boxfilter(I * p, r) / N + covIp = meanIp - meanI * meanP + + meanII = self.boxfilter(I * I, r) / N + varI = meanII - meanI * meanI + + a = covIp / (varI + eps) + b = meanP - a * meanI + + meanA = self.boxfilter(a, r) / N + meanB = self.boxfilter(b, r) / N + + q = meanA * I + meanB + return q \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/html.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/html.py new file mode 100644 index 0000000000000000000000000000000000000000..cc3262a1eafda34842e4dbad47bb6ba72f0c5a68 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/html.py @@ -0,0 +1,86 @@ +import dominate +from dominate.tags import meta, h3, table, tr, td, p, a, img, br +import os + + +class HTML: + """This HTML class allows us to save images and write texts into a single HTML file. + + It consists of functions such as (add a text header to the HTML file), + (add a row of images to the HTML file), and (save the HTML to the disk). + It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. + """ + + def __init__(self, web_dir, title, refresh=0): + """Initialize the HTML classes + + Parameters: + web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: + with self.doc.head: + meta(http_equiv="refresh", content=str(refresh)) + + def get_image_dir(self): + """Return the directory that stores images""" + return self.img_dir + + def add_header(self, text): + """Insert a header to the HTML file + + Parameters: + text (str) -- the header text + """ + with self.doc: + h3(text) + + def add_images(self, ims, txts, links, width=400): + """add images to the HTML file + + Parameters: + ims (str list) -- a list of image paths + txts (str list) -- a list of image names shown on the website + links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page + """ + self.t = table(border=1, style="table-layout: fixed;") # Insert a table + self.doc.add(self.t) + with self.t: + with tr(): + for im, txt, link in zip(ims, txts, links): + with td(style="word-wrap: break-word;", halign="center", valign="top"): + with p(): + with a(href=os.path.join('images', link)): + img(style="width:%dpx" % width, src=os.path.join('images', im)) + br() + p(txt) + + def save(self): + """save the current content to the HMTL file""" + html_file = '%s/index.html' % self.web_dir + f = open(html_file, 'wt') + f.write(self.doc.render()) + f.close() + + +if __name__ == '__main__': # we show an example usage here. + html = HTML('web/', 'test_html') + html.add_header('hello world') + + ims, txts, links = [], [], [] + for n in range(4): + ims.append('image_%d.png' % n) + txts.append('text_%d' % n) + links.append('image_%d.png' % n) + html.add_images(ims, txts, links) + html.save() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/image_pool.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/image_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..6d086f882bc3d1b90c529fce6cddaaa75f2005d7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/image_pool.py @@ -0,0 +1,54 @@ +import random +import torch + + +class ImagePool(): + """This class implements an image buffer that stores previously generated images. + + This buffer enables us to update discriminators using a history of generated images + rather than the ones produced by the latest generators. + """ + + def __init__(self, pool_size): + """Initialize the ImagePool class + + Parameters: + pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created + """ + self.pool_size = pool_size + if self.pool_size > 0: # create an empty pool + self.num_imgs = 0 + self.images = [] + + def query(self, images): + """Return an image from the pool. + + Parameters: + images: the latest generated images from the generator + + Returns images from the buffer. + + By 50/100, the buffer will return input images. + By 50/100, the buffer will return images previously stored in the buffer, + and insert the current images to the buffer. + """ + if self.pool_size == 0: # if the buffer size is 0, do nothing + return images + return_images = [] + for image in images: + image = torch.unsqueeze(image.data, 0) + if self.num_imgs < self.pool_size: # if the buffer is not full; keep inserting current images to the buffer + self.num_imgs = self.num_imgs + 1 + self.images.append(image) + return_images.append(image) + else: + p = random.uniform(0, 1) + if p > 0.5: # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer + random_id = random.randint(0, self.pool_size - 1) # randint is inclusive + tmp = self.images[random_id].clone() + self.images[random_id] = image + return_images.append(tmp) + else: # by another 50% chance, the buffer will return the current image + return_images.append(image) + return_images = torch.cat(return_images, 0) # collect all the images and return + return return_images diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/util.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/util.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7aceaa00681cb76675df7866bf8db58c8d2caf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/util.py @@ -0,0 +1,105 @@ +"""This module contains simple helper functions """ +from __future__ import print_function +import torch +import numpy as np +from PIL import Image +import os + + +def tensor2im(input_image, imtype=np.uint16): + """"Converts a Tensor array into a numpy image array. + + Parameters: + input_image (tensor) -- the input image tensor array + imtype (type) -- the desired type of the converted numpy array + """ + if not isinstance(input_image, np.ndarray): + if isinstance(input_image, torch.Tensor): # get the data from a variable + image_tensor = input_image.data + else: + return input_image + image_numpy = torch.squeeze(image_tensor).cpu().numpy() # convert it into a numpy array + image_numpy = (image_numpy + 1) / 2.0 * (2**16-1) # + else: # if it is a numpy array, do nothing + image_numpy = input_image + return image_numpy.astype(imtype) + + +def diagnose_network(net, name='network'): + """Calculate and print the mean of average absolute(gradients) + + Parameters: + net (torch network) -- Torch network + name (str) -- the name of the network + """ + mean = 0.0 + count = 0 + for param in net.parameters(): + if param.grad is not None: + mean += torch.mean(torch.abs(param.grad.data)) + count += 1 + if count > 0: + mean = mean / count + print(name) + print(mean) + + +def save_image(image_numpy, image_path, aspect_ratio=1.0): + """Save a numpy image to the disk + + Parameters: + image_numpy (numpy array) -- input numpy array + image_path (str) -- the path of the image + """ + image_pil = Image.fromarray(image_numpy) + + image_pil = image_pil.convert('I;16') + + # image_pil = Image.fromarray(image_numpy) + # h, w, _ = image_numpy.shape + # + # if aspect_ratio > 1.0: + # image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC) + # if aspect_ratio < 1.0: + # image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC) + + image_pil.save(image_path) + + +def print_numpy(x, val=True, shp=False): + """Print the mean, min, max, median, std, and size of a numpy array + + Parameters: + val (bool) -- if print the values of the numpy array + shp (bool) -- if print the shape of the numpy array + """ + x = x.astype(np.float64) + if shp: + print('shape,', x.shape) + if val: + x = x.flatten() + print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( + np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) + + +def mkdirs(paths): + """create empty directories if they don't exist + + Parameters: + paths (str list) -- a list of directory paths + """ + if isinstance(paths, list) and not isinstance(paths, str): + for path in paths: + mkdir(path) + else: + mkdir(paths) + + +def mkdir(path): + """create a single empty directory if it didn't exist + + Parameters: + path (str) -- a single directory path + """ + if not os.path.exists(path): + os.makedirs(path) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/visualizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..810a0513ab997103ace77b665c9a17f223b173c9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/leres/pix2pix/util/visualizer.py @@ -0,0 +1,166 @@ +import numpy as np +import os +import sys +import ntpath +import time +from . import util, html +from subprocess import Popen, PIPE +import torch + + +if sys.version_info[0] == 2: + VisdomExceptionBase = Exception +else: + VisdomExceptionBase = ConnectionError + + +def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256): + """Save images to the disk. + + Parameters: + webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details) + visuals (OrderedDict) -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs + image_path (str) -- the string is used to create image paths + aspect_ratio (float) -- the aspect ratio of saved images + width (int) -- the images will be resized to width x width + + This function will save images stored in 'visuals' to the HTML file specified by 'webpage'. + """ + image_dir = webpage.get_image_dir() + short_path = ntpath.basename(image_path[0]) + name = os.path.splitext(short_path)[0] + + webpage.add_header(name) + ims, txts, links = [], [], [] + + for label, im_data in visuals.items(): + im = util.tensor2im(im_data) + image_name = '%s_%s.png' % (name, label) + save_path = os.path.join(image_dir, image_name) + util.save_image(im, save_path, aspect_ratio=aspect_ratio) + ims.append(image_name) + txts.append(label) + links.append(image_name) + webpage.add_images(ims, txts, links, width=width) + + +class Visualizer(): + """This class includes several functions that can display/save images and print/save logging information. + + It uses a Python library 'visdom' for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images. + """ + + def __init__(self, opt): + """Initialize the Visualizer class + + Parameters: + opt -- stores all the experiment flags; needs to be a subclass of BaseOptions + Step 1: Cache the training/test options + Step 2: connect to a visdom server + Step 3: create an HTML object for saveing HTML filters + Step 4: create a logging file to store training losses + """ + self.opt = opt # cache the option + self.display_id = opt.display_id + self.use_html = opt.isTrain and not opt.no_html + self.win_size = opt.display_winsize + self.name = opt.name + self.port = opt.display_port + self.saved = False + + if self.use_html: # create an HTML object at /web/; images will be saved under /web/images/ + self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') + self.img_dir = os.path.join(self.web_dir, 'images') + print('create web directory %s...' % self.web_dir) + util.mkdirs([self.web_dir, self.img_dir]) + # create a logging file to store training losses + self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') + with open(self.log_name, "a") as log_file: + now = time.strftime("%c") + log_file.write('================ Training Loss (%s) ================\n' % now) + + def reset(self): + """Reset the self.saved status""" + self.saved = False + + def create_visdom_connections(self): + """If the program could not connect to Visdom server, this function will start a new server at port < self.port > """ + cmd = sys.executable + ' -m visdom.server -p %d &>/dev/null &' % self.port + print('\n\nCould not connect to Visdom server. \n Trying to start a server....') + print('Command: %s' % cmd) + Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + + def display_current_results(self, visuals, epoch, save_result): + """Display current results on visdom; save current results to an HTML file. + + Parameters: + visuals (OrderedDict) - - dictionary of images to display or save + epoch (int) - - the current epoch + save_result (bool) - - if save the current results to an HTML file + """ + if self.use_html and (save_result or not self.saved): # save images to an HTML file if they haven't been saved. + self.saved = True + # save images to the disk + for label, image in visuals.items(): + image_numpy = util.tensor2im(image) + img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) + util.save_image(image_numpy, img_path) + + # update website + webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=1) + for n in range(epoch, 0, -1): + webpage.add_header('epoch [%d]' % n) + ims, txts, links = [], [], [] + + for label, image_numpy in visuals.items(): + # image_numpy = util.tensor2im(image) + img_path = 'epoch%.3d_%s.png' % (n, label) + ims.append(img_path) + txts.append(label) + links.append(img_path) + webpage.add_images(ims, txts, links, width=self.win_size) + webpage.save() + + # def plot_current_losses(self, epoch, counter_ratio, losses): + # """display the current losses on visdom display: dictionary of error labels and values + # + # Parameters: + # epoch (int) -- current epoch + # counter_ratio (float) -- progress (percentage) in the current epoch, between 0 to 1 + # losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + # """ + # if not hasattr(self, 'plot_data'): + # self.plot_data = {'X': [], 'Y': [], 'legend': list(losses.keys())} + # self.plot_data['X'].append(epoch + counter_ratio) + # self.plot_data['Y'].append([losses[k] for k in self.plot_data['legend']]) + # try: + # self.vis.line( + # X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1), + # Y=np.array(self.plot_data['Y']), + # opts={ + # 'title': self.name + ' loss over time', + # 'legend': self.plot_data['legend'], + # 'xlabel': 'epoch', + # 'ylabel': 'loss'}, + # win=self.display_id) + # except VisdomExceptionBase: + # self.create_visdom_connections() + + # losses: same format as |losses| of plot_current_losses + def print_current_losses(self, epoch, iters, losses, t_comp, t_data): + """print current losses on console; also save the losses to the disk + + Parameters: + epoch (int) -- current epoch + iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) + losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + t_comp (float) -- computational time per data point (normalized by batch_size) + t_data (float) -- data loading time per data point (normalized by batch_size) + """ + message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data) + for k, v in losses.items(): + message += '%s: %.3f ' % (k, v) + + print(message) # print the message + with open(self.log_name, "a") as log_file: + log_file.write('%s\n' % message) # save the message diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..16a9d56a3d4c15e4f34ac5426459c58487b01520 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Caroline Chan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..44acc630a6ccec97fd689b6d3f28bbd8ce7f6f51 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart/__init__.py @@ -0,0 +1,133 @@ +import os +import cv2 +import torch +import numpy as np + +import torch.nn as nn +from einops import rearrange +from modules import devices +from annotator.annotator_path import models_path + + +norm_layer = nn.InstanceNorm2d + + +class ResidualBlock(nn.Module): + def __init__(self, in_features): + super(ResidualBlock, self).__init__() + + conv_block = [ nn.ReflectionPad2d(1), + nn.Conv2d(in_features, in_features, 3), + norm_layer(in_features), + nn.ReLU(inplace=True), + nn.ReflectionPad2d(1), + nn.Conv2d(in_features, in_features, 3), + norm_layer(in_features) + ] + + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x): + return x + self.conv_block(x) + + +class Generator(nn.Module): + def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True): + super(Generator, self).__init__() + + # Initial convolution block + model0 = [ nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, 64, 7), + norm_layer(64), + nn.ReLU(inplace=True) ] + self.model0 = nn.Sequential(*model0) + + # Downsampling + model1 = [] + in_features = 64 + out_features = in_features*2 + for _ in range(2): + model1 += [ nn.Conv2d(in_features, out_features, 3, stride=2, padding=1), + norm_layer(out_features), + nn.ReLU(inplace=True) ] + in_features = out_features + out_features = in_features*2 + self.model1 = nn.Sequential(*model1) + + model2 = [] + # Residual blocks + for _ in range(n_residual_blocks): + model2 += [ResidualBlock(in_features)] + self.model2 = nn.Sequential(*model2) + + # Upsampling + model3 = [] + out_features = in_features//2 + for _ in range(2): + model3 += [ nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1), + norm_layer(out_features), + nn.ReLU(inplace=True) ] + in_features = out_features + out_features = in_features//2 + self.model3 = nn.Sequential(*model3) + + # Output layer + model4 = [ nn.ReflectionPad2d(3), + nn.Conv2d(64, output_nc, 7)] + if sigmoid: + model4 += [nn.Sigmoid()] + + self.model4 = nn.Sequential(*model4) + + def forward(self, x, cond=None): + out = self.model0(x) + out = self.model1(out) + out = self.model2(out) + out = self.model3(out) + out = self.model4(out) + + return out + + +class LineartDetector: + model_dir = os.path.join(models_path, "lineart") + model_default = 'sk_model.pth' + model_coarse = 'sk_model2.pth' + + def __init__(self, model_name): + self.model = None + self.model_name = model_name + self.device = devices.get_device_for("controlnet") + + def load_model(self, name): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/" + name + model_path = os.path.join(self.model_dir, name) + if not os.path.exists(model_path): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + model = Generator(3, 1, 3) + model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) + model.eval() + self.model = model.to(self.device) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, input_image): + if self.model is None: + self.load_model(self.model_name) + self.model.to(self.device) + + assert input_image.ndim == 3 + image = input_image + with torch.no_grad(): + image = torch.from_numpy(image).float().to(self.device) + image = image / 255.0 + image = rearrange(image, 'h w c -> 1 c h w') + line = self.model(image)[0][0] + + line = line.cpu().numpy() + line = (line * 255.0).clip(0, 255).astype(np.uint8) + + return line \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..16a9d56a3d4c15e4f34ac5426459c58487b01520 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Caroline Chan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3db2a26dbb56a34e89d3bb06d14f1cc638521169 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/lineart_anime/__init__.py @@ -0,0 +1,161 @@ +import numpy as np +import torch +import torch.nn as nn +import functools + +import os +import cv2 +from einops import rearrange +from modules import devices +from annotator.annotator_path import models_path + + +class UnetGenerator(nn.Module): + """Create a Unet-based generator""" + + def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ + super(UnetGenerator, self).__init__() + # construct unet structure + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer + for _ in range(num_downs - 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + # gradually reduce the number of filters from ngf * 8 to ngf + unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + + def __init__(self, outer_nc, inner_nc, input_nc=None, + submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet submodule with skip connections. + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + """ + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, + stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: # add skip connections + return torch.cat([x, self.model(x)], 1) + + +class LineartAnimeDetector: + model_dir = os.path.join(models_path, "lineart_anime") + + def __init__(self): + self.model = None + self.device = devices.get_device_for("controlnet") + + def load_model(self): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/netG.pth" + modelpath = os.path.join(self.model_dir, "netG.pth") + if not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + net = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False) + ckpt = torch.load(modelpath) + for key in list(ckpt.keys()): + if 'module.' in key: + ckpt[key.replace('module.', '')] = ckpt[key] + del ckpt[key] + net.load_state_dict(ckpt) + net.eval() + self.model = net.to(self.device) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, input_image): + if self.model is None: + self.load_model() + self.model.to(self.device) + + H, W, C = input_image.shape + Hn = 256 * int(np.ceil(float(H) / 256.0)) + Wn = 256 * int(np.ceil(float(W) / 256.0)) + img = cv2.resize(input_image, (Wn, Hn), interpolation=cv2.INTER_CUBIC) + with torch.no_grad(): + image_feed = torch.from_numpy(img).float().to(self.device) + image_feed = image_feed / 127.5 - 1.0 + image_feed = rearrange(image_feed, 'h w c -> 1 c h w') + + line = self.model(image_feed)[0, 0] * 127.5 + 127.5 + line = line.cpu().numpy() + + line = cv2.resize(line, (W, H), interpolation=cv2.INTER_CUBIC) + line = line.clip(0, 255).astype(np.uint8) + return line + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9bad05450ca061904f97acebe04ff7183cfbdc1a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Miaomiao Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4832443c32eac1e79525356daa16a88ef8447593 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/manga_line/__init__.py @@ -0,0 +1,248 @@ +import os +import torch +import torch.nn as nn +from PIL import Image +import fnmatch +import cv2 + +import sys + +import numpy as np +from einops import rearrange +from modules import devices +from annotator.annotator_path import models_path + + +class _bn_relu_conv(nn.Module): + def __init__(self, in_filters, nb_filters, fw, fh, subsample=1): + super(_bn_relu_conv, self).__init__() + self.model = nn.Sequential( + nn.BatchNorm2d(in_filters, eps=1e-3), + nn.LeakyReLU(0.2), + nn.Conv2d(in_filters, nb_filters, (fw, fh), stride=subsample, padding=(fw//2, fh//2), padding_mode='zeros') + ) + + def forward(self, x): + return self.model(x) + + # the following are for debugs + print("****", np.max(x.cpu().numpy()), np.min(x.cpu().numpy()), np.mean(x.cpu().numpy()), np.std(x.cpu().numpy()), x.shape) + for i,layer in enumerate(self.model): + if i != 2: + x = layer(x) + else: + x = layer(x) + #x = nn.functional.pad(x, (1, 1, 1, 1), mode='constant', value=0) + print("____", np.max(x.cpu().numpy()), np.min(x.cpu().numpy()), np.mean(x.cpu().numpy()), np.std(x.cpu().numpy()), x.shape) + print(x[0]) + return x + +class _u_bn_relu_conv(nn.Module): + def __init__(self, in_filters, nb_filters, fw, fh, subsample=1): + super(_u_bn_relu_conv, self).__init__() + self.model = nn.Sequential( + nn.BatchNorm2d(in_filters, eps=1e-3), + nn.LeakyReLU(0.2), + nn.Conv2d(in_filters, nb_filters, (fw, fh), stride=subsample, padding=(fw//2, fh//2)), + nn.Upsample(scale_factor=2, mode='nearest') + ) + + def forward(self, x): + return self.model(x) + + + +class _shortcut(nn.Module): + def __init__(self, in_filters, nb_filters, subsample=1): + super(_shortcut, self).__init__() + self.process = False + self.model = None + if in_filters != nb_filters or subsample != 1: + self.process = True + self.model = nn.Sequential( + nn.Conv2d(in_filters, nb_filters, (1, 1), stride=subsample) + ) + + def forward(self, x, y): + #print(x.size(), y.size(), self.process) + if self.process: + y0 = self.model(x) + #print("merge+", torch.max(y0+y), torch.min(y0+y),torch.mean(y0+y), torch.std(y0+y), y0.shape) + return y0 + y + else: + #print("merge", torch.max(x+y), torch.min(x+y),torch.mean(x+y), torch.std(x+y), y.shape) + return x + y + +class _u_shortcut(nn.Module): + def __init__(self, in_filters, nb_filters, subsample): + super(_u_shortcut, self).__init__() + self.process = False + self.model = None + if in_filters != nb_filters: + self.process = True + self.model = nn.Sequential( + nn.Conv2d(in_filters, nb_filters, (1, 1), stride=subsample, padding_mode='zeros'), + nn.Upsample(scale_factor=2, mode='nearest') + ) + + def forward(self, x, y): + if self.process: + return self.model(x) + y + else: + return x + y + + +class basic_block(nn.Module): + def __init__(self, in_filters, nb_filters, init_subsample=1): + super(basic_block, self).__init__() + self.conv1 = _bn_relu_conv(in_filters, nb_filters, 3, 3, subsample=init_subsample) + self.residual = _bn_relu_conv(nb_filters, nb_filters, 3, 3) + self.shortcut = _shortcut(in_filters, nb_filters, subsample=init_subsample) + + def forward(self, x): + x1 = self.conv1(x) + x2 = self.residual(x1) + return self.shortcut(x, x2) + +class _u_basic_block(nn.Module): + def __init__(self, in_filters, nb_filters, init_subsample=1): + super(_u_basic_block, self).__init__() + self.conv1 = _u_bn_relu_conv(in_filters, nb_filters, 3, 3, subsample=init_subsample) + self.residual = _bn_relu_conv(nb_filters, nb_filters, 3, 3) + self.shortcut = _u_shortcut(in_filters, nb_filters, subsample=init_subsample) + + def forward(self, x): + y = self.residual(self.conv1(x)) + return self.shortcut(x, y) + + +class _residual_block(nn.Module): + def __init__(self, in_filters, nb_filters, repetitions, is_first_layer=False): + super(_residual_block, self).__init__() + layers = [] + for i in range(repetitions): + init_subsample = 1 + if i == repetitions - 1 and not is_first_layer: + init_subsample = 2 + if i == 0: + l = basic_block(in_filters=in_filters, nb_filters=nb_filters, init_subsample=init_subsample) + else: + l = basic_block(in_filters=nb_filters, nb_filters=nb_filters, init_subsample=init_subsample) + layers.append(l) + + self.model = nn.Sequential(*layers) + + def forward(self, x): + return self.model(x) + + +class _upsampling_residual_block(nn.Module): + def __init__(self, in_filters, nb_filters, repetitions): + super(_upsampling_residual_block, self).__init__() + layers = [] + for i in range(repetitions): + l = None + if i == 0: + l = _u_basic_block(in_filters=in_filters, nb_filters=nb_filters)#(input) + else: + l = basic_block(in_filters=nb_filters, nb_filters=nb_filters)#(input) + layers.append(l) + + self.model = nn.Sequential(*layers) + + def forward(self, x): + return self.model(x) + + +class res_skip(nn.Module): + + def __init__(self): + super(res_skip, self).__init__() + self.block0 = _residual_block(in_filters=1, nb_filters=24, repetitions=2, is_first_layer=True)#(input) + self.block1 = _residual_block(in_filters=24, nb_filters=48, repetitions=3)#(block0) + self.block2 = _residual_block(in_filters=48, nb_filters=96, repetitions=5)#(block1) + self.block3 = _residual_block(in_filters=96, nb_filters=192, repetitions=7)#(block2) + self.block4 = _residual_block(in_filters=192, nb_filters=384, repetitions=12)#(block3) + + self.block5 = _upsampling_residual_block(in_filters=384, nb_filters=192, repetitions=7)#(block4) + self.res1 = _shortcut(in_filters=192, nb_filters=192)#(block3, block5, subsample=(1,1)) + + self.block6 = _upsampling_residual_block(in_filters=192, nb_filters=96, repetitions=5)#(res1) + self.res2 = _shortcut(in_filters=96, nb_filters=96)#(block2, block6, subsample=(1,1)) + + self.block7 = _upsampling_residual_block(in_filters=96, nb_filters=48, repetitions=3)#(res2) + self.res3 = _shortcut(in_filters=48, nb_filters=48)#(block1, block7, subsample=(1,1)) + + self.block8 = _upsampling_residual_block(in_filters=48, nb_filters=24, repetitions=2)#(res3) + self.res4 = _shortcut(in_filters=24, nb_filters=24)#(block0,block8, subsample=(1,1)) + + self.block9 = _residual_block(in_filters=24, nb_filters=16, repetitions=2, is_first_layer=True)#(res4) + self.conv15 = _bn_relu_conv(in_filters=16, nb_filters=1, fh=1, fw=1, subsample=1)#(block7) + + def forward(self, x): + x0 = self.block0(x) + x1 = self.block1(x0) + x2 = self.block2(x1) + x3 = self.block3(x2) + x4 = self.block4(x3) + + x5 = self.block5(x4) + res1 = self.res1(x3, x5) + + x6 = self.block6(res1) + res2 = self.res2(x2, x6) + + x7 = self.block7(res2) + res3 = self.res3(x1, x7) + + x8 = self.block8(res3) + res4 = self.res4(x0, x8) + + x9 = self.block9(res4) + y = self.conv15(x9) + + return y + + +class MangaLineExtration: + model_dir = os.path.join(models_path, "manga_line") + + def __init__(self): + self.model = None + self.device = devices.get_device_for("controlnet") + + def load_model(self): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/erika.pth" + modelpath = os.path.join(self.model_dir, "erika.pth") + if not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + #norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + net = res_skip() + ckpt = torch.load(modelpath) + for key in list(ckpt.keys()): + if 'module.' in key: + ckpt[key.replace('module.', '')] = ckpt[key] + del ckpt[key] + net.load_state_dict(ckpt) + net.eval() + self.model = net.to(self.device) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, input_image): + if self.model is None: + self.load_model() + self.model.to(self.device) + img = cv2.cvtColor(input_image, cv2.COLOR_RGB2GRAY) + img = np.ascontiguousarray(img.copy()).copy() + with torch.no_grad(): + image_feed = torch.from_numpy(img).float().to(self.device) + image_feed = rearrange(image_feed, 'h w -> 1 1 h w') + line = self.model(image_feed) + line = 255 - line.cpu().numpy()[0, 0] + return line.clip(0, 255).astype(np.uint8) + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f74edfb187e4e39583ed92bfe69ea29c42a34ddc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/__init__.py @@ -0,0 +1,5 @@ +from .mediapipe_face_common import generate_annotation + + +def apply_mediapipe_face(image, max_faces: int = 1, min_confidence: float = 0.5): + return generate_annotation(image, max_faces, min_confidence) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/mediapipe_face_common.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/mediapipe_face_common.py new file mode 100644 index 0000000000000000000000000000000000000000..0f7d3701dc40eee88977f17a877fa800d0ae328d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mediapipe_face/mediapipe_face_common.py @@ -0,0 +1,155 @@ +from typing import Mapping + +import mediapipe as mp +import numpy + + +mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles +mp_face_detection = mp.solutions.face_detection # Only for counting faces. +mp_face_mesh = mp.solutions.face_mesh +mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION +mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS +mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS + +DrawingSpec = mp.solutions.drawing_styles.DrawingSpec +PoseLandmark = mp.solutions.drawing_styles.PoseLandmark + +min_face_size_pixels: int = 64 +f_thick = 2 +f_rad = 1 +right_iris_draw = DrawingSpec(color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad) +right_eye_draw = DrawingSpec(color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad) +right_eyebrow_draw = DrawingSpec(color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad) +left_iris_draw = DrawingSpec(color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad) +left_eye_draw = DrawingSpec(color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad) +left_eyebrow_draw = DrawingSpec(color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad) +mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad) +head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad) + +# mp_face_mesh.FACEMESH_CONTOURS has all the items we care about. +face_connection_spec = {} +for edge in mp_face_mesh.FACEMESH_FACE_OVAL: + face_connection_spec[edge] = head_draw +for edge in mp_face_mesh.FACEMESH_LEFT_EYE: + face_connection_spec[edge] = left_eye_draw +for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW: + face_connection_spec[edge] = left_eyebrow_draw +# for edge in mp_face_mesh.FACEMESH_LEFT_IRIS: +# face_connection_spec[edge] = left_iris_draw +for edge in mp_face_mesh.FACEMESH_RIGHT_EYE: + face_connection_spec[edge] = right_eye_draw +for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW: + face_connection_spec[edge] = right_eyebrow_draw +# for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS: +# face_connection_spec[edge] = right_iris_draw +for edge in mp_face_mesh.FACEMESH_LIPS: + face_connection_spec[edge] = mouth_draw +iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw} + + +def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2): + """We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all + landmarks. Until our PR is merged into mediapipe, we need this separate method.""" + if len(image.shape) != 3: + raise ValueError("Input image must be H,W,C.") + image_rows, image_cols, image_channels = image.shape + if image_channels != 3: # BGR channels + raise ValueError('Input image must contain three channel bgr data.') + for idx, landmark in enumerate(landmark_list.landmark): + if ( + (landmark.HasField('visibility') and landmark.visibility < 0.9) or + (landmark.HasField('presence') and landmark.presence < 0.5) + ): + continue + if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0: + continue + image_x = int(image_cols*landmark.x) + image_y = int(image_rows*landmark.y) + draw_color = None + if isinstance(drawing_spec, Mapping): + if drawing_spec.get(idx) is None: + continue + else: + draw_color = drawing_spec[idx].color + elif isinstance(drawing_spec, DrawingSpec): + draw_color = drawing_spec.color + image[image_y-halfwidth:image_y+halfwidth, image_x-halfwidth:image_x+halfwidth, :] = draw_color + + +def reverse_channels(image): + """Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB.""" + # im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order. + # im[:,:,::[2,1,0]] would also work but makes a copy of the data. + return image[:, :, ::-1] + + +def generate_annotation( + img_rgb, + max_faces: int, + min_confidence: float +): + """ + Find up to 'max_faces' inside the provided input image. + If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many + pixels in the image. + """ + with mp_face_mesh.FaceMesh( + static_image_mode=True, + max_num_faces=max_faces, + refine_landmarks=True, + min_detection_confidence=min_confidence, + ) as facemesh: + img_height, img_width, img_channels = img_rgb.shape + assert(img_channels == 3) + + results = facemesh.process(img_rgb).multi_face_landmarks + + if results is None: + print("No faces detected in controlnet image for Mediapipe face annotator.") + return numpy.zeros_like(img_rgb) + + # Filter faces that are too small + filtered_landmarks = [] + for lm in results: + landmarks = lm.landmark + face_rect = [ + landmarks[0].x, + landmarks[0].y, + landmarks[0].x, + landmarks[0].y, + ] # Left, up, right, down. + for i in range(len(landmarks)): + face_rect[0] = min(face_rect[0], landmarks[i].x) + face_rect[1] = min(face_rect[1], landmarks[i].y) + face_rect[2] = max(face_rect[2], landmarks[i].x) + face_rect[3] = max(face_rect[3], landmarks[i].y) + if min_face_size_pixels > 0: + face_width = abs(face_rect[2] - face_rect[0]) + face_height = abs(face_rect[3] - face_rect[1]) + face_width_pixels = face_width * img_width + face_height_pixels = face_height * img_height + face_size = min(face_width_pixels, face_height_pixels) + if face_size >= min_face_size_pixels: + filtered_landmarks.append(lm) + else: + filtered_landmarks.append(lm) + + # Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start. + empty = numpy.zeros_like(img_rgb) + + # Draw detected faces: + for face_landmarks in filtered_landmarks: + mp_drawing.draw_landmarks( + empty, + face_landmarks, + connections=face_connection_spec.keys(), + landmark_drawing_spec=None, + connection_drawing_spec=face_connection_spec + ) + draw_pupils(empty, face_landmarks, iris_landmark_spec, 2) + + # Flip BGR back to RGB. + empty = reverse_channels(empty).copy() + + return empty diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..277b5c11be103f028a8d10985139f1da10c2f08e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc247615fbdaeba9105512184ce39a5baab57b2b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/__init__.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np +import torch + +from einops import rearrange +from .api import MiDaSInference +from modules import devices + +model = None + +def unload_midas_model(): + global model + if model is not None: + model = model.cpu() + +def apply_midas(input_image, a=np.pi * 2.0, bg_th=0.1): + global model + if model is None: + model = MiDaSInference(model_type="dpt_hybrid") + if devices.get_device_for("controlnet").type != 'mps': + model = model.to(devices.get_device_for("controlnet")) + + assert input_image.ndim == 3 + image_depth = input_image + with torch.no_grad(): + image_depth = torch.from_numpy(image_depth).float() + if devices.get_device_for("controlnet").type != 'mps': + image_depth = image_depth.to(devices.get_device_for("controlnet")) + image_depth = image_depth / 127.5 - 1.0 + image_depth = rearrange(image_depth, 'h w c -> 1 c h w') + depth = model(image_depth)[0] + + depth_pt = depth.clone() + depth_pt -= torch.min(depth_pt) + depth_pt /= torch.max(depth_pt) + depth_pt = depth_pt.cpu().numpy() + depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8) + + depth_np = depth.cpu().numpy() + x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3) + y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3) + z = np.ones_like(x) * a + x[depth_pt < bg_th] = 0 + y[depth_pt < bg_th] = 0 + normal = np.stack([x, y, z], axis=2) + normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5 + normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)[:, :, ::-1] + + return depth_image, normal_image diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/api.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/api.py new file mode 100644 index 0000000000000000000000000000000000000000..ba8ae4fd732ec045d2d3c3e3f02f2bb736cb174a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/api.py @@ -0,0 +1,181 @@ +# based on https://github.com/isl-org/MiDaS + +import cv2 +import torch +import torch.nn as nn +import os +from annotator.annotator_path import models_path + +from torchvision.transforms import Compose + +from .midas.dpt_depth import DPTDepthModel +from .midas.midas_net import MidasNet +from .midas.midas_net_custom import MidasNet_small +from .midas.transforms import Resize, NormalizeImage, PrepareForNet + +base_model_path = os.path.join(models_path, "midas") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) +remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt" + +ISL_PATHS = { + "dpt_large": os.path.join(base_model_path, "dpt_large-midas-2f21e586.pt"), + "dpt_hybrid": os.path.join(base_model_path, "dpt_hybrid-midas-501f0c75.pt"), + "midas_v21": "", + "midas_v21_small": "", +} + +OLD_ISL_PATHS = { + "dpt_large": os.path.join(old_modeldir, "dpt_large-midas-2f21e586.pt"), + "dpt_hybrid": os.path.join(old_modeldir, "dpt_hybrid-midas-501f0c75.pt"), + "midas_v21": "", + "midas_v21_small": "", +} + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def load_midas_transform(model_type): + # https://github.com/isl-org/MiDaS/blob/master/run.py + # load transform only + if model_type == "dpt_large": # DPT-Large + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid": # DPT-Hybrid + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21": + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + elif model_type == "midas_v21_small": + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + else: + assert False, f"model_type '{model_type}' not implemented, use: --model_type large" + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + return transform + + +def load_model(model_type): + # https://github.com/isl-org/MiDaS/blob/master/run.py + # load network + model_path = ISL_PATHS[model_type] + old_model_path = OLD_ISL_PATHS[model_type] + if model_type == "dpt_large": # DPT-Large + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid": # DPT-Hybrid + if os.path.exists(old_model_path): + model_path = old_model_path + elif not os.path.exists(model_path): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=base_model_path) + + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21": + model = MidasNet(model_path, non_negative=True) + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "midas_v21_small": + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, + non_negative=True, blocks={'expand': True}) + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + return model.eval(), transform + + +class MiDaSInference(nn.Module): + MODEL_TYPES_TORCH_HUB = [ + "DPT_Large", + "DPT_Hybrid", + "MiDaS_small" + ] + MODEL_TYPES_ISL = [ + "dpt_large", + "dpt_hybrid", + "midas_v21", + "midas_v21_small", + ] + + def __init__(self, model_type): + super().__init__() + assert (model_type in self.MODEL_TYPES_ISL) + model, _ = load_model(model_type) + self.model = model + self.model.train = disabled_train + + def forward(self, x): + with torch.no_grad(): + prediction = self.model(x) + return prediction + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/base_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf430239b47ec5ec07531263f26f5c24a2311cd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/base_model.py @@ -0,0 +1,16 @@ +import torch + + +class BaseModel(torch.nn.Module): + def load(self, path): + """Load model from file. + + Args: + path (str): file path + """ + parameters = torch.load(path, map_location=torch.device('cpu')) + + if "optimizer" in parameters: + parameters = parameters["model"] + + self.load_state_dict(parameters) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/blocks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..2145d18fa98060a618536d9a64fe6589e9be4f78 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/blocks.py @@ -0,0 +1,342 @@ +import torch +import torch.nn as nn + +from .vit import ( + _make_pretrained_vitb_rn50_384, + _make_pretrained_vitl16_384, + _make_pretrained_vitb16_384, + forward_vit, +) + +def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",): + if backbone == "vitl16_384": + pretrained = _make_pretrained_vitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # ViT-L/16 - 85.0% Top1 (backbone) + elif backbone == "vitb_rn50_384": + pretrained = _make_pretrained_vitb_rn50_384( + use_pretrained, + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) + scratch = _make_scratch( + [256, 512, 768, 768], features, groups=groups, expand=expand + ) # ViT-H/16 - 85.0% Top1 (backbone) + elif backbone == "vitb16_384": + pretrained = _make_pretrained_vitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # ViT-B/16 - 84.6% Top1 (backbone) + elif backbone == "resnext101_wsl": + pretrained = _make_pretrained_resnext101_wsl(use_pretrained) + scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3 + elif backbone == "efficientnet_lite3": + pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable) + scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3 + else: + print(f"Backbone '{backbone}' not implemented") + assert False + + return pretrained, scratch + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + out_shape4 = out_shape + if expand==True: + out_shape1 = out_shape + out_shape2 = out_shape*2 + out_shape3 = out_shape*4 + out_shape4 = out_shape*8 + + scratch.layer1_rn = nn.Conv2d( + in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer2_rn = nn.Conv2d( + in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer3_rn = nn.Conv2d( + in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer4_rn = nn.Conv2d( + in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + + return scratch + + +def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): + efficientnet = torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=use_pretrained, + exportable=exportable + ) + return _make_efficientnet_backbone(efficientnet) + + +def _make_efficientnet_backbone(effnet): + pretrained = nn.Module() + + pretrained.layer1 = nn.Sequential( + effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2] + ) + pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3]) + pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5]) + pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9]) + + return pretrained + + +def _make_resnet_backbone(resnet): + pretrained = nn.Module() + pretrained.layer1 = nn.Sequential( + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) + + pretrained.layer2 = resnet.layer2 + pretrained.layer3 = resnet.layer3 + pretrained.layer4 = resnet.layer4 + + return pretrained + + +def _make_pretrained_resnext101_wsl(use_pretrained): + resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl") + return _make_resnet_backbone(resnet) + + + +class Interpolate(nn.Module): + """Interpolation module. + """ + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners + ) + + return x + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True + ) + + return output + + + + +class ResidualConvUnit_custom(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + if self.bn==True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn==True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn==True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + # return out + x + + +class FeatureFusionBlock_custom(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand==True: + out_features = features//2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + # output += res + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=self.align_corners + ) + + output = self.out_conv(output) + + return output + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/dpt_depth.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/dpt_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..4e9aab5d2767dffea39da5b3f30e2798688216f1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/dpt_depth.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .base_model import BaseModel +from .blocks import ( + FeatureFusionBlock, + FeatureFusionBlock_custom, + Interpolate, + _make_encoder, + forward_vit, +) + + +def _make_fusion_block(features, use_bn): + return FeatureFusionBlock_custom( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + ) + + +class DPT(BaseModel): + def __init__( + self, + head, + features=256, + backbone="vitb_rn50_384", + readout="project", + channels_last=False, + use_bn=False, + ): + + super(DPT, self).__init__() + + self.channels_last = channels_last + + hooks = { + "vitb_rn50_384": [0, 1, 8, 11], + "vitb16_384": [2, 5, 8, 11], + "vitl16_384": [5, 11, 17, 23], + } + + # Instantiate backbone and reassemble blocks + self.pretrained, self.scratch = _make_encoder( + backbone, + features, + False, # Set to true of you want to train from scratch, uses ImageNet weights + groups=1, + expand=False, + exportable=False, + hooks=hooks[backbone], + use_readout=readout, + ) + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn) + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + self.scratch.output_conv = head + + + def forward(self, x): + if self.channels_last == True: + x.contiguous(memory_format=torch.channels_last) + + layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return out + + +class DPTDepthModel(DPT): + def __init__(self, path=None, non_negative=True, **kwargs): + features = kwargs["features"] if "features" in kwargs else 256 + + head = nn.Sequential( + nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + super().__init__(head, **kwargs) + + if path is not None: + self.load(path) + + def forward(self, x): + return super().forward(x).squeeze(dim=1) + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net.py new file mode 100644 index 0000000000000000000000000000000000000000..8a954977800b0a0f48807e80fa63041910e33c1f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net.py @@ -0,0 +1,76 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, Interpolate, _make_encoder + + +class MidasNet(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=256, non_negative=True): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet, self).__init__() + + use_pretrained = False if path is None else True + + self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) + + self.scratch.refinenet4 = FeatureFusionBlock(features) + self.scratch.refinenet3 = FeatureFusionBlock(features) + self.scratch.refinenet2 = FeatureFusionBlock(features) + self.scratch.refinenet1 = FeatureFusionBlock(features) + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + ) + + if path: + self.load(path) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net_custom.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..50e4acb5e53d5fabefe3dde16ab49c33c2b7797c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/midas_net_custom.py @@ -0,0 +1,128 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder + + +class MidasNet_small(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True, + blocks={'expand': True}): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet_small, self).__init__() + + use_pretrained = False if path else True + + self.channels_last = channels_last + self.blocks = blocks + self.backbone = backbone + + self.groups = 1 + + features1=features + features2=features + features3=features + features4=features + self.expand = False + if "expand" in self.blocks and self.blocks['expand'] == True: + self.expand = True + features1=features + features2=features*2 + features3=features*4 + features4=features*8 + + self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable) + + self.scratch.activation = nn.ReLU(False) + + self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners) + + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1), + self.scratch.activation, + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + if path: + self.load(path) + + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + if self.channels_last==True: + print("self.channels_last = ", self.channels_last) + x.contiguous(memory_format=torch.channels_last) + + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) + + + +def fuse_model(m): + prev_previous_type = nn.Identity() + prev_previous_name = '' + previous_type = nn.Identity() + previous_name = '' + for name, module in m.named_modules(): + if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU: + # print("FUSED ", prev_previous_name, previous_name, name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True) + elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d: + # print("FUSED ", prev_previous_name, previous_name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True) + # elif previous_type == nn.Conv2d and type(module) == nn.ReLU: + # print("FUSED ", previous_name, name) + # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True) + + prev_previous_type = previous_type + prev_previous_name = previous_name + previous_type = type(module) + previous_name = name \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/transforms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/vit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..ea46b1be88b261b0dec04f3da0256f5f66f88a74 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/midas/vit.py @@ -0,0 +1,491 @@ +import torch +import torch.nn as nn +import timm +import types +import math +import torch.nn.functional as F + + +class Slice(nn.Module): + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index :] + + +class AddReadout(nn.Module): + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index :] + readout.unsqueeze(1) + + +class ProjectReadout(nn.Module): + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :]) + features = torch.cat((x[:, self.start_index :], readout), -1) + + return self.project(features) + + +class Transpose(nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + x = x.transpose(self.dim0, self.dim1) + return x + + +def forward_vit(pretrained, x): + b, c, h, w = x.shape + + glob = pretrained.model.forward_flex(x) + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + layer_1 = pretrained.act_postprocess1[0:2](layer_1) + layer_2 = pretrained.act_postprocess2[0:2](layer_2) + layer_3 = pretrained.act_postprocess3[0:2](layer_3) + layer_4 = pretrained.act_postprocess4[0:2](layer_4) + + unflatten = nn.Sequential( + nn.Unflatten( + 2, + torch.Size( + [ + h // pretrained.model.patch_size[1], + w // pretrained.model.patch_size[0], + ] + ), + ) + ) + + if layer_1.ndim == 3: + layer_1 = unflatten(layer_1) + if layer_2.ndim == 3: + layer_2 = unflatten(layer_2) + if layer_3.ndim == 3: + layer_3 = unflatten(layer_3) + if layer_4.ndim == 3: + layer_4 = unflatten(layer_4) + + layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1) + layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2) + layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3) + layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, : self.start_index], + posemb[0, self.start_index :], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1) + + posemb = torch.cat([posemb_tok, posemb_grid], dim=1) + + return posemb + + +def forward_flex(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed( + self.pos_embed, h // self.patch_size[1], w // self.patch_size[0] + ) + + B = x.shape[0] + + if hasattr(self.patch_embed, "backbone"): + x = self.patch_embed.backbone(x) + if isinstance(x, (list, tuple)): + x = x[-1] # last feature if backbone outputs list/tuple of features + + x = self.patch_embed.proj(x).flatten(2).transpose(1, 2) + + if getattr(self, "dist_token", None) is not None: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + dist_token = self.dist_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, dist_token, x), dim=1) + else: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + x = torch.cat((cls_tokens, x), dim=1) + + x = x + pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + + return x + + +activations = {} + + +def get_activation(name): + def hook(model, input, output): + activations[name] = output + + return hook + + +def get_readout_oper(vit_features, features, use_readout, start_index=1): + if use_readout == "ignore": + readout_oper = [Slice(start_index)] * len(features) + elif use_readout == "add": + readout_oper = [AddReadout(start_index)] * len(features) + elif use_readout == "project": + readout_oper = [ + ProjectReadout(vit_features, start_index) for out_feat in features + ] + else: + assert ( + False + ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'" + + return readout_oper + + +def _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + # 32, 48, 136, 384 + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_deit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model( + "vit_deit_base_distilled_patch16_384", pretrained=pretrained + ) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + hooks=hooks, + use_readout=use_readout, + start_index=2, + ) + + +def _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=[0, 1, 8, 11], + vit_features=768, + use_vit_only=False, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + + if use_vit_only == True: + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + else: + pretrained.model.patch_embed.backbone.stages[0].register_forward_hook( + get_activation("1") + ) + pretrained.model.patch_embed.backbone.stages[1].register_forward_hook( + get_activation("2") + ) + + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + if use_vit_only == True: + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + else: + pretrained.act_postprocess1 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + pretrained.act_postprocess2 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitb_rn50_384( + pretrained, use_readout="ignore", hooks=None, use_vit_only=False +): + model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained) + + hooks = [0, 1, 8, 11] if hooks == None else hooks + return _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/midas/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9a9d3b5b66370fa98da9e067ba53ead848ea9a59 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/midas/utils.py @@ -0,0 +1,189 @@ +"""Utils for monoDepth.""" +import sys +import re +import numpy as np +import cv2 +import torch + + +def read_pfm(path): + """Read pfm file. + + Args: + path (str): path to file + + Returns: + tuple: (data, scale) + """ + with open(path, "rb") as file: + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + if header.decode("ascii") == "PF": + color = True + elif header.decode("ascii") == "Pf": + color = False + else: + raise Exception("Not a PFM file: " + path) + + dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) + if dim_match: + width, height = list(map(int, dim_match.groups())) + else: + raise Exception("Malformed PFM header.") + + scale = float(file.readline().decode("ascii").rstrip()) + if scale < 0: + # little-endian + endian = "<" + scale = -scale + else: + # big-endian + endian = ">" + + data = np.fromfile(file, endian + "f") + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + + return data, scale + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + + +def resize_image(img): + """Resize image and make it fit for network. + + Args: + img (array): image + + Returns: + tensor: data ready for network + """ + height_orig = img.shape[0] + width_orig = img.shape[1] + + if width_orig > height_orig: + scale = width_orig / 384 + else: + scale = height_orig / 384 + + height = (np.ceil(height_orig / scale / 32) * 32).astype(int) + width = (np.ceil(width_orig / scale / 32) * 32).astype(int) + + img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) + + img_resized = ( + torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float() + ) + img_resized = img_resized.unsqueeze(0) + + return img_resized + + +def resize_depth(depth, width, height): + """Resize depth map and bring to CPU (numpy). + + Args: + depth (tensor): depth + width (int): image width + height (int): image height + + Returns: + array: processed depth + """ + depth = torch.squeeze(depth[0, :, :, :]).to("cpu") + + depth_resized = cv2.resize( + depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC + ) + + return depth_resized + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.type) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d855c6db44b4e873eedd750d34fa2eaf22e22363 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021-present NAVER Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f8c8c9bbf155693531ec8178695cfe7fbde6423 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/__init__.py @@ -0,0 +1,49 @@ +import cv2 +import numpy as np +import torch +import os + +from einops import rearrange +from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny +from .models.mbv2_mlsd_large import MobileV2_MLSD_Large +from .utils import pred_lines +from modules import devices +from annotator.annotator_path import models_path + +mlsdmodel = None +remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth" +old_modeldir = os.path.dirname(os.path.realpath(__file__)) +modeldir = os.path.join(models_path, "mlsd") + +def unload_mlsd_model(): + global mlsdmodel + if mlsdmodel is not None: + mlsdmodel = mlsdmodel.cpu() + +def apply_mlsd(input_image, thr_v, thr_d): + global modelpath, mlsdmodel + if mlsdmodel is None: + modelpath = os.path.join(modeldir, "mlsd_large_512_fp32.pth") + old_modelpath = os.path.join(old_modeldir, "mlsd_large_512_fp32.pth") + if os.path.exists(old_modelpath): + modelpath = old_modelpath + elif not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=modeldir) + mlsdmodel = MobileV2_MLSD_Large() + mlsdmodel.load_state_dict(torch.load(modelpath), strict=True) + mlsdmodel = mlsdmodel.to(devices.get_device_for("controlnet")).eval() + + model = mlsdmodel + assert input_image.ndim == 3 + img = input_image + img_output = np.zeros_like(img) + try: + with torch.no_grad(): + lines = pred_lines(img, model, [img.shape[0], img.shape[1]], thr_v, thr_d) + for line in lines: + x_start, y_start, x_end, y_end = [int(val) for val in line] + cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1) + except Exception as e: + pass + return img_output[:, :, 0] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_large.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_large.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9799e7573ca41549b3c3b13ac47b906b369603 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_large.py @@ -0,0 +1,292 @@ +import os +import sys +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn import functional as F + + +class BlockTypeA(nn.Module): + def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True): + super(BlockTypeA, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c2, out_c2, kernel_size=1), + nn.BatchNorm2d(out_c2), + nn.ReLU(inplace=True) + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c1, out_c1, kernel_size=1), + nn.BatchNorm2d(out_c1), + nn.ReLU(inplace=True) + ) + self.upscale = upscale + + def forward(self, a, b): + b = self.conv1(b) + a = self.conv2(a) + if self.upscale: + b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True) + return torch.cat((a, b), dim=1) + + +class BlockTypeB(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeB, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, out_c, kernel_size=3, padding=1), + nn.BatchNorm2d(out_c), + nn.ReLU() + ) + + def forward(self, x): + x = self.conv1(x) + x + x = self.conv2(x) + return x + +class BlockTypeC(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeC, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + return x + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + self.channel_pad = out_planes - in_planes + self.stride = stride + #padding = (kernel_size - 1) // 2 + + # TFLite uses slightly different padding than PyTorch + if stride == 2: + padding = 0 + else: + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride) + + + def forward(self, x): + # TFLite uses different padding + if self.stride == 2: + x = F.pad(x, (0, 1, 0, 1), "constant", 0) + #print(x.shape) + + for module in self: + if not isinstance(module, nn.MaxPool2d): + x = module(x) + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, pretrained=True): + """ + MobileNet V2 main class + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + """ + super(MobileNetV2, self).__init__() + + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + width_mult = 1.0 + round_nearest = 8 + + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + #[6, 160, 3, 2], + #[6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(4, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + + self.features = nn.Sequential(*features) + self.fpn_selected = [1, 3, 6, 10, 13] + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + if pretrained: + self._load_pretrained_model() + + def _forward_impl(self, x): + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + fpn_features = [] + for i, f in enumerate(self.features): + if i > self.fpn_selected[-1]: + break + x = f(x) + if i in self.fpn_selected: + fpn_features.append(x) + + c1, c2, c3, c4, c5 = fpn_features + return c1, c2, c3, c4, c5 + + + def forward(self, x): + return self._forward_impl(x) + + def _load_pretrained_model(self): + pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth') + model_dict = {} + state_dict = self.state_dict() + for k, v in pretrain_dict.items(): + if k in state_dict: + model_dict[k] = v + state_dict.update(model_dict) + self.load_state_dict(state_dict) + + +class MobileV2_MLSD_Large(nn.Module): + def __init__(self): + super(MobileV2_MLSD_Large, self).__init__() + + self.backbone = MobileNetV2(pretrained=False) + ## A, B + self.block15 = BlockTypeA(in_c1= 64, in_c2= 96, + out_c1= 64, out_c2=64, + upscale=False) + self.block16 = BlockTypeB(128, 64) + + ## A, B + self.block17 = BlockTypeA(in_c1 = 32, in_c2 = 64, + out_c1= 64, out_c2= 64) + self.block18 = BlockTypeB(128, 64) + + ## A, B + self.block19 = BlockTypeA(in_c1=24, in_c2=64, + out_c1=64, out_c2=64) + self.block20 = BlockTypeB(128, 64) + + ## A, B, C + self.block21 = BlockTypeA(in_c1=16, in_c2=64, + out_c1=64, out_c2=64) + self.block22 = BlockTypeB(128, 64) + + self.block23 = BlockTypeC(64, 16) + + def forward(self, x): + c1, c2, c3, c4, c5 = self.backbone(x) + + x = self.block15(c4, c5) + x = self.block16(x) + + x = self.block17(c3, x) + x = self.block18(x) + + x = self.block19(c2, x) + x = self.block20(x) + + x = self.block21(c1, x) + x = self.block22(x) + x = self.block23(x) + x = x[:, 7:, :, :] + + return x \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_tiny.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ed633f2cc23ea1829a627fdb879ab39f641f83 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/models/mbv2_mlsd_tiny.py @@ -0,0 +1,275 @@ +import os +import sys +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn import functional as F + + +class BlockTypeA(nn.Module): + def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True): + super(BlockTypeA, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c2, out_c2, kernel_size=1), + nn.BatchNorm2d(out_c2), + nn.ReLU(inplace=True) + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c1, out_c1, kernel_size=1), + nn.BatchNorm2d(out_c1), + nn.ReLU(inplace=True) + ) + self.upscale = upscale + + def forward(self, a, b): + b = self.conv1(b) + a = self.conv2(a) + b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True) + return torch.cat((a, b), dim=1) + + +class BlockTypeB(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeB, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, out_c, kernel_size=3, padding=1), + nn.BatchNorm2d(out_c), + nn.ReLU() + ) + + def forward(self, x): + x = self.conv1(x) + x + x = self.conv2(x) + return x + +class BlockTypeC(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeC, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + return x + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + self.channel_pad = out_planes - in_planes + self.stride = stride + #padding = (kernel_size - 1) // 2 + + # TFLite uses slightly different padding than PyTorch + if stride == 2: + padding = 0 + else: + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride) + + + def forward(self, x): + # TFLite uses different padding + if self.stride == 2: + x = F.pad(x, (0, 1, 0, 1), "constant", 0) + #print(x.shape) + + for module in self: + if not isinstance(module, nn.MaxPool2d): + x = module(x) + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, pretrained=True): + """ + MobileNet V2 main class + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + """ + super(MobileNetV2, self).__init__() + + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + width_mult = 1.0 + round_nearest = 8 + + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + #[6, 96, 3, 1], + #[6, 160, 3, 2], + #[6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(4, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + self.features = nn.Sequential(*features) + + self.fpn_selected = [3, 6, 10] + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + #if pretrained: + # self._load_pretrained_model() + + def _forward_impl(self, x): + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + fpn_features = [] + for i, f in enumerate(self.features): + if i > self.fpn_selected[-1]: + break + x = f(x) + if i in self.fpn_selected: + fpn_features.append(x) + + c2, c3, c4 = fpn_features + return c2, c3, c4 + + + def forward(self, x): + return self._forward_impl(x) + + def _load_pretrained_model(self): + pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth') + model_dict = {} + state_dict = self.state_dict() + for k, v in pretrain_dict.items(): + if k in state_dict: + model_dict[k] = v + state_dict.update(model_dict) + self.load_state_dict(state_dict) + + +class MobileV2_MLSD_Tiny(nn.Module): + def __init__(self): + super(MobileV2_MLSD_Tiny, self).__init__() + + self.backbone = MobileNetV2(pretrained=True) + + self.block12 = BlockTypeA(in_c1= 32, in_c2= 64, + out_c1= 64, out_c2=64) + self.block13 = BlockTypeB(128, 64) + + self.block14 = BlockTypeA(in_c1 = 24, in_c2 = 64, + out_c1= 32, out_c2= 32) + self.block15 = BlockTypeB(64, 64) + + self.block16 = BlockTypeC(64, 16) + + def forward(self, x): + c2, c3, c4 = self.backbone(x) + + x = self.block12(c3, c4) + x = self.block13(x) + x = self.block14(c2, x) + x = self.block15(x) + x = self.block16(x) + x = x[:, 7:, :, :] + #print(x.shape) + x = F.interpolate(x, scale_factor=2.0, mode='bilinear', align_corners=True) + + return x \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a9cc5d904d9dd34d2ba4c902f3993f7abbb7ac5e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mlsd/utils.py @@ -0,0 +1,581 @@ +''' +modified by lihaoweicv +pytorch version +''' + +''' +M-LSD +Copyright 2021-present NAVER Corp. +Apache License v2.0 +''' + +import os +import numpy as np +import cv2 +import torch +from torch.nn import functional as F +from modules import devices + + +def deccode_output_score_and_ptss(tpMap, topk_n = 200, ksize = 5): + ''' + tpMap: + center: tpMap[1, 0, :, :] + displacement: tpMap[1, 1:5, :, :] + ''' + b, c, h, w = tpMap.shape + assert b==1, 'only support bsize==1' + displacement = tpMap[:, 1:5, :, :][0] + center = tpMap[:, 0, :, :] + heat = torch.sigmoid(center) + hmax = F.max_pool2d( heat, (ksize, ksize), stride=1, padding=(ksize-1)//2) + keep = (hmax == heat).float() + heat = heat * keep + heat = heat.reshape(-1, ) + + scores, indices = torch.topk(heat, topk_n, dim=-1, largest=True) + yy = torch.floor_divide(indices, w).unsqueeze(-1) + xx = torch.fmod(indices, w).unsqueeze(-1) + ptss = torch.cat((yy, xx),dim=-1) + + ptss = ptss.detach().cpu().numpy() + scores = scores.detach().cpu().numpy() + displacement = displacement.detach().cpu().numpy() + displacement = displacement.transpose((1,2,0)) + return ptss, scores, displacement + + +def pred_lines(image, model, + input_shape=[512, 512], + score_thr=0.10, + dist_thr=20.0): + h, w, _ = image.shape + h_ratio, w_ratio = [h / input_shape[0], w / input_shape[1]] + + resized_image = np.concatenate([cv2.resize(image, (input_shape[1], input_shape[0]), interpolation=cv2.INTER_AREA), + np.ones([input_shape[0], input_shape[1], 1])], axis=-1) + + resized_image = resized_image.transpose((2,0,1)) + batch_image = np.expand_dims(resized_image, axis=0).astype('float32') + batch_image = (batch_image / 127.5) - 1.0 + + batch_image = torch.from_numpy(batch_image).float().to(devices.get_device_for("controlnet")) + outputs = model(batch_image) + pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3) + start = vmap[:, :, :2] + end = vmap[:, :, 2:] + dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) + + segments_list = [] + for center, score in zip(pts, pts_score): + y, x = center + distance = dist_map[y, x] + if score > score_thr and distance > dist_thr: + disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :] + x_start = x + disp_x_start + y_start = y + disp_y_start + x_end = x + disp_x_end + y_end = y + disp_y_end + segments_list.append([x_start, y_start, x_end, y_end]) + + lines = 2 * np.array(segments_list) # 256 > 512 + lines[:, 0] = lines[:, 0] * w_ratio + lines[:, 1] = lines[:, 1] * h_ratio + lines[:, 2] = lines[:, 2] * w_ratio + lines[:, 3] = lines[:, 3] * h_ratio + + return lines + + +def pred_squares(image, + model, + input_shape=[512, 512], + params={'score': 0.06, + 'outside_ratio': 0.28, + 'inside_ratio': 0.45, + 'w_overlap': 0.0, + 'w_degree': 1.95, + 'w_length': 0.0, + 'w_area': 1.86, + 'w_center': 0.14}): + ''' + shape = [height, width] + ''' + h, w, _ = image.shape + original_shape = [h, w] + + resized_image = np.concatenate([cv2.resize(image, (input_shape[0], input_shape[1]), interpolation=cv2.INTER_AREA), + np.ones([input_shape[0], input_shape[1], 1])], axis=-1) + resized_image = resized_image.transpose((2, 0, 1)) + batch_image = np.expand_dims(resized_image, axis=0).astype('float32') + batch_image = (batch_image / 127.5) - 1.0 + + batch_image = torch.from_numpy(batch_image).float().to(devices.get_device_for("controlnet")) + outputs = model(batch_image) + + pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3) + start = vmap[:, :, :2] # (x, y) + end = vmap[:, :, 2:] # (x, y) + dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) + + junc_list = [] + segments_list = [] + for junc, score in zip(pts, pts_score): + y, x = junc + distance = dist_map[y, x] + if score > params['score'] and distance > 20.0: + junc_list.append([x, y]) + disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :] + d_arrow = 1.0 + x_start = x + d_arrow * disp_x_start + y_start = y + d_arrow * disp_y_start + x_end = x + d_arrow * disp_x_end + y_end = y + d_arrow * disp_y_end + segments_list.append([x_start, y_start, x_end, y_end]) + + segments = np.array(segments_list) + + ####### post processing for squares + # 1. get unique lines + point = np.array([[0, 0]]) + point = point[0] + start = segments[:, :2] + end = segments[:, 2:] + diff = start - end + a = diff[:, 1] + b = -diff[:, 0] + c = a * start[:, 0] + b * start[:, 1] + + d = np.abs(a * point[0] + b * point[1] - c) / np.sqrt(a ** 2 + b ** 2 + 1e-10) + theta = np.arctan2(diff[:, 0], diff[:, 1]) * 180 / np.pi + theta[theta < 0.0] += 180 + hough = np.concatenate([d[:, None], theta[:, None]], axis=-1) + + d_quant = 1 + theta_quant = 2 + hough[:, 0] //= d_quant + hough[:, 1] //= theta_quant + _, indices, counts = np.unique(hough, axis=0, return_index=True, return_counts=True) + + acc_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='float32') + idx_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='int32') - 1 + yx_indices = hough[indices, :].astype('int32') + acc_map[yx_indices[:, 0], yx_indices[:, 1]] = counts + idx_map[yx_indices[:, 0], yx_indices[:, 1]] = indices + + acc_map_np = acc_map + # acc_map = acc_map[None, :, :, None] + # + # ### fast suppression using tensorflow op + # acc_map = tf.constant(acc_map, dtype=tf.float32) + # max_acc_map = tf.keras.layers.MaxPool2D(pool_size=(5, 5), strides=1, padding='same')(acc_map) + # acc_map = acc_map * tf.cast(tf.math.equal(acc_map, max_acc_map), tf.float32) + # flatten_acc_map = tf.reshape(acc_map, [1, -1]) + # topk_values, topk_indices = tf.math.top_k(flatten_acc_map, k=len(pts)) + # _, h, w, _ = acc_map.shape + # y = tf.expand_dims(topk_indices // w, axis=-1) + # x = tf.expand_dims(topk_indices % w, axis=-1) + # yx = tf.concat([y, x], axis=-1) + + ### fast suppression using pytorch op + acc_map = torch.from_numpy(acc_map_np).unsqueeze(0).unsqueeze(0) + _,_, h, w = acc_map.shape + max_acc_map = F.max_pool2d(acc_map,kernel_size=5, stride=1, padding=2) + acc_map = acc_map * ( (acc_map == max_acc_map).float() ) + flatten_acc_map = acc_map.reshape([-1, ]) + + scores, indices = torch.topk(flatten_acc_map, len(pts), dim=-1, largest=True) + yy = torch.div(indices, w, rounding_mode='floor').unsqueeze(-1) + xx = torch.fmod(indices, w).unsqueeze(-1) + yx = torch.cat((yy, xx), dim=-1) + + yx = yx.detach().cpu().numpy() + + topk_values = scores.detach().cpu().numpy() + indices = idx_map[yx[:, 0], yx[:, 1]] + basis = 5 // 2 + + merged_segments = [] + for yx_pt, max_indice, value in zip(yx, indices, topk_values): + y, x = yx_pt + if max_indice == -1 or value == 0: + continue + segment_list = [] + for y_offset in range(-basis, basis + 1): + for x_offset in range(-basis, basis + 1): + indice = idx_map[y + y_offset, x + x_offset] + cnt = int(acc_map_np[y + y_offset, x + x_offset]) + if indice != -1: + segment_list.append(segments[indice]) + if cnt > 1: + check_cnt = 1 + current_hough = hough[indice] + for new_indice, new_hough in enumerate(hough): + if (current_hough == new_hough).all() and indice != new_indice: + segment_list.append(segments[new_indice]) + check_cnt += 1 + if check_cnt == cnt: + break + group_segments = np.array(segment_list).reshape([-1, 2]) + sorted_group_segments = np.sort(group_segments, axis=0) + x_min, y_min = sorted_group_segments[0, :] + x_max, y_max = sorted_group_segments[-1, :] + + deg = theta[max_indice] + if deg >= 90: + merged_segments.append([x_min, y_max, x_max, y_min]) + else: + merged_segments.append([x_min, y_min, x_max, y_max]) + + # 2. get intersections + new_segments = np.array(merged_segments) # (x1, y1, x2, y2) + start = new_segments[:, :2] # (x1, y1) + end = new_segments[:, 2:] # (x2, y2) + new_centers = (start + end) / 2.0 + diff = start - end + dist_segments = np.sqrt(np.sum(diff ** 2, axis=-1)) + + # ax + by = c + a = diff[:, 1] + b = -diff[:, 0] + c = a * start[:, 0] + b * start[:, 1] + pre_det = a[:, None] * b[None, :] + det = pre_det - np.transpose(pre_det) + + pre_inter_y = a[:, None] * c[None, :] + inter_y = (pre_inter_y - np.transpose(pre_inter_y)) / (det + 1e-10) + pre_inter_x = c[:, None] * b[None, :] + inter_x = (pre_inter_x - np.transpose(pre_inter_x)) / (det + 1e-10) + inter_pts = np.concatenate([inter_x[:, :, None], inter_y[:, :, None]], axis=-1).astype('int32') + + # 3. get corner information + # 3.1 get distance + ''' + dist_segments: + | dist(0), dist(1), dist(2), ...| + dist_inter_to_segment1: + | dist(inter,0), dist(inter,0), dist(inter,0), ... | + | dist(inter,1), dist(inter,1), dist(inter,1), ... | + ... + dist_inter_to_semgnet2: + | dist(inter,0), dist(inter,1), dist(inter,2), ... | + | dist(inter,0), dist(inter,1), dist(inter,2), ... | + ... + ''' + + dist_inter_to_segment1_start = np.sqrt( + np.sum(((inter_pts - start[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment1_end = np.sqrt( + np.sum(((inter_pts - end[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment2_start = np.sqrt( + np.sum(((inter_pts - start[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment2_end = np.sqrt( + np.sum(((inter_pts - end[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + + # sort ascending + dist_inter_to_segment1 = np.sort( + np.concatenate([dist_inter_to_segment1_start, dist_inter_to_segment1_end], axis=-1), + axis=-1) # [n_batch, n_batch, 2] + dist_inter_to_segment2 = np.sort( + np.concatenate([dist_inter_to_segment2_start, dist_inter_to_segment2_end], axis=-1), + axis=-1) # [n_batch, n_batch, 2] + + # 3.2 get degree + inter_to_start = new_centers[:, None, :] - inter_pts + deg_inter_to_start = np.arctan2(inter_to_start[:, :, 1], inter_to_start[:, :, 0]) * 180 / np.pi + deg_inter_to_start[deg_inter_to_start < 0.0] += 360 + inter_to_end = new_centers[None, :, :] - inter_pts + deg_inter_to_end = np.arctan2(inter_to_end[:, :, 1], inter_to_end[:, :, 0]) * 180 / np.pi + deg_inter_to_end[deg_inter_to_end < 0.0] += 360 + + ''' + B -- G + | | + C -- R + B : blue / G: green / C: cyan / R: red + + 0 -- 1 + | | + 3 -- 2 + ''' + # rename variables + deg1_map, deg2_map = deg_inter_to_start, deg_inter_to_end + # sort deg ascending + deg_sort = np.sort(np.concatenate([deg1_map[:, :, None], deg2_map[:, :, None]], axis=-1), axis=-1) + + deg_diff_map = np.abs(deg1_map - deg2_map) + # we only consider the smallest degree of intersect + deg_diff_map[deg_diff_map > 180] = 360 - deg_diff_map[deg_diff_map > 180] + + # define available degree range + deg_range = [60, 120] + + corner_dict = {corner_info: [] for corner_info in range(4)} + inter_points = [] + for i in range(inter_pts.shape[0]): + for j in range(i + 1, inter_pts.shape[1]): + # i, j > line index, always i < j + x, y = inter_pts[i, j, :] + deg1, deg2 = deg_sort[i, j, :] + deg_diff = deg_diff_map[i, j] + + check_degree = deg_diff > deg_range[0] and deg_diff < deg_range[1] + + outside_ratio = params['outside_ratio'] # over ratio >>> drop it! + inside_ratio = params['inside_ratio'] # over ratio >>> drop it! + check_distance = ((dist_inter_to_segment1[i, j, 1] >= dist_segments[i] and \ + dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * outside_ratio) or \ + (dist_inter_to_segment1[i, j, 1] <= dist_segments[i] and \ + dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * inside_ratio)) and \ + ((dist_inter_to_segment2[i, j, 1] >= dist_segments[j] and \ + dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * outside_ratio) or \ + (dist_inter_to_segment2[i, j, 1] <= dist_segments[j] and \ + dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * inside_ratio)) + + if check_degree and check_distance: + corner_info = None + + if (deg1 >= 0 and deg1 <= 45 and deg2 >= 45 and deg2 <= 120) or \ + (deg2 >= 315 and deg1 >= 45 and deg1 <= 120): + corner_info, color_info = 0, 'blue' + elif (deg1 >= 45 and deg1 <= 125 and deg2 >= 125 and deg2 <= 225): + corner_info, color_info = 1, 'green' + elif (deg1 >= 125 and deg1 <= 225 and deg2 >= 225 and deg2 <= 315): + corner_info, color_info = 2, 'black' + elif (deg1 >= 0 and deg1 <= 45 and deg2 >= 225 and deg2 <= 315) or \ + (deg2 >= 315 and deg1 >= 225 and deg1 <= 315): + corner_info, color_info = 3, 'cyan' + else: + corner_info, color_info = 4, 'red' # we don't use it + continue + + corner_dict[corner_info].append([x, y, i, j]) + inter_points.append([x, y]) + + square_list = [] + connect_list = [] + segments_list = [] + for corner0 in corner_dict[0]: + for corner1 in corner_dict[1]: + connect01 = False + for corner0_line in corner0[2:]: + if corner0_line in corner1[2:]: + connect01 = True + break + if connect01: + for corner2 in corner_dict[2]: + connect12 = False + for corner1_line in corner1[2:]: + if corner1_line in corner2[2:]: + connect12 = True + break + if connect12: + for corner3 in corner_dict[3]: + connect23 = False + for corner2_line in corner2[2:]: + if corner2_line in corner3[2:]: + connect23 = True + break + if connect23: + for corner3_line in corner3[2:]: + if corner3_line in corner0[2:]: + # SQUARE!!! + ''' + 0 -- 1 + | | + 3 -- 2 + square_list: + order: 0 > 1 > 2 > 3 + | x0, y0, x1, y1, x2, y2, x3, y3 | + | x0, y0, x1, y1, x2, y2, x3, y3 | + ... + connect_list: + order: 01 > 12 > 23 > 30 + | line_idx01, line_idx12, line_idx23, line_idx30 | + | line_idx01, line_idx12, line_idx23, line_idx30 | + ... + segments_list: + order: 0 > 1 > 2 > 3 + | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j | + | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j | + ... + ''' + square_list.append(corner0[:2] + corner1[:2] + corner2[:2] + corner3[:2]) + connect_list.append([corner0_line, corner1_line, corner2_line, corner3_line]) + segments_list.append(corner0[2:] + corner1[2:] + corner2[2:] + corner3[2:]) + + def check_outside_inside(segments_info, connect_idx): + # return 'outside or inside', min distance, cover_param, peri_param + if connect_idx == segments_info[0]: + check_dist_mat = dist_inter_to_segment1 + else: + check_dist_mat = dist_inter_to_segment2 + + i, j = segments_info + min_dist, max_dist = check_dist_mat[i, j, :] + connect_dist = dist_segments[connect_idx] + if max_dist > connect_dist: + return 'outside', min_dist, 0, 1 + else: + return 'inside', min_dist, -1, -1 + + top_square = None + + try: + map_size = input_shape[0] / 2 + squares = np.array(square_list).reshape([-1, 4, 2]) + score_array = [] + connect_array = np.array(connect_list) + segments_array = np.array(segments_list).reshape([-1, 4, 2]) + + # get degree of corners: + squares_rollup = np.roll(squares, 1, axis=1) + squares_rolldown = np.roll(squares, -1, axis=1) + vec1 = squares_rollup - squares + normalized_vec1 = vec1 / (np.linalg.norm(vec1, axis=-1, keepdims=True) + 1e-10) + vec2 = squares_rolldown - squares + normalized_vec2 = vec2 / (np.linalg.norm(vec2, axis=-1, keepdims=True) + 1e-10) + inner_products = np.sum(normalized_vec1 * normalized_vec2, axis=-1) # [n_squares, 4] + squares_degree = np.arccos(inner_products) * 180 / np.pi # [n_squares, 4] + + # get square score + overlap_scores = [] + degree_scores = [] + length_scores = [] + + for connects, segments, square, degree in zip(connect_array, segments_array, squares, squares_degree): + ''' + 0 -- 1 + | | + 3 -- 2 + + # segments: [4, 2] + # connects: [4] + ''' + + ###################################### OVERLAP SCORES + cover = 0 + perimeter = 0 + # check 0 > 1 > 2 > 3 + square_length = [] + + for start_idx in range(4): + end_idx = (start_idx + 1) % 4 + + connect_idx = connects[start_idx] # segment idx of segment01 + start_segments = segments[start_idx] + end_segments = segments[end_idx] + + start_point = square[start_idx] + end_point = square[end_idx] + + # check whether outside or inside + start_position, start_min, start_cover_param, start_peri_param = check_outside_inside(start_segments, + connect_idx) + end_position, end_min, end_cover_param, end_peri_param = check_outside_inside(end_segments, connect_idx) + + cover += dist_segments[connect_idx] + start_cover_param * start_min + end_cover_param * end_min + perimeter += dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min + + square_length.append( + dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min) + + overlap_scores.append(cover / perimeter) + ###################################### + ###################################### DEGREE SCORES + ''' + deg0 vs deg2 + deg1 vs deg3 + ''' + deg0, deg1, deg2, deg3 = degree + deg_ratio1 = deg0 / deg2 + if deg_ratio1 > 1.0: + deg_ratio1 = 1 / deg_ratio1 + deg_ratio2 = deg1 / deg3 + if deg_ratio2 > 1.0: + deg_ratio2 = 1 / deg_ratio2 + degree_scores.append((deg_ratio1 + deg_ratio2) / 2) + ###################################### + ###################################### LENGTH SCORES + ''' + len0 vs len2 + len1 vs len3 + ''' + len0, len1, len2, len3 = square_length + len_ratio1 = len0 / len2 if len2 > len0 else len2 / len0 + len_ratio2 = len1 / len3 if len3 > len1 else len3 / len1 + length_scores.append((len_ratio1 + len_ratio2) / 2) + + ###################################### + + overlap_scores = np.array(overlap_scores) + overlap_scores /= np.max(overlap_scores) + + degree_scores = np.array(degree_scores) + # degree_scores /= np.max(degree_scores) + + length_scores = np.array(length_scores) + + ###################################### AREA SCORES + area_scores = np.reshape(squares, [-1, 4, 2]) + area_x = area_scores[:, :, 0] + area_y = area_scores[:, :, 1] + correction = area_x[:, -1] * area_y[:, 0] - area_y[:, -1] * area_x[:, 0] + area_scores = np.sum(area_x[:, :-1] * area_y[:, 1:], axis=-1) - np.sum(area_y[:, :-1] * area_x[:, 1:], axis=-1) + area_scores = 0.5 * np.abs(area_scores + correction) + area_scores /= (map_size * map_size) # np.max(area_scores) + ###################################### + + ###################################### CENTER SCORES + centers = np.array([[256 // 2, 256 // 2]], dtype='float32') # [1, 2] + # squares: [n, 4, 2] + square_centers = np.mean(squares, axis=1) # [n, 2] + center2center = np.sqrt(np.sum((centers - square_centers) ** 2)) + center_scores = center2center / (map_size / np.sqrt(2.0)) + + ''' + score_w = [overlap, degree, area, center, length] + ''' + score_w = [0.0, 1.0, 10.0, 0.5, 1.0] + score_array = params['w_overlap'] * overlap_scores \ + + params['w_degree'] * degree_scores \ + + params['w_area'] * area_scores \ + - params['w_center'] * center_scores \ + + params['w_length'] * length_scores + + best_square = [] + + sorted_idx = np.argsort(score_array)[::-1] + score_array = score_array[sorted_idx] + squares = squares[sorted_idx] + + except Exception as e: + pass + + '''return list + merged_lines, squares, scores + ''' + + try: + new_segments[:, 0] = new_segments[:, 0] * 2 / input_shape[1] * original_shape[1] + new_segments[:, 1] = new_segments[:, 1] * 2 / input_shape[0] * original_shape[0] + new_segments[:, 2] = new_segments[:, 2] * 2 / input_shape[1] * original_shape[1] + new_segments[:, 3] = new_segments[:, 3] * 2 / input_shape[0] * original_shape[0] + except: + new_segments = [] + + try: + squares[:, :, 0] = squares[:, :, 0] * 2 / input_shape[1] * original_shape[1] + squares[:, :, 1] = squares[:, :, 1] * 2 / input_shape[0] * original_shape[0] + except: + squares = [] + score_array = [] + + try: + inter_points = np.array(inter_points) + inter_points[:, 0] = inter_points[:, 0] * 2 / input_shape[1] * original_shape[1] + inter_points[:, 1] = inter_points[:, 1] * 2 / input_shape[0] * original_shape[0] + except: + inter_points = [] + + return new_segments, squares, score_array, inter_points diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..210a2989138380559f23045b568d0fbbeb918c03 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +from .arraymisc import * +from .fileio import * +from .image import * +from .utils import * +from .version import * +from .video import * +from .visualization import * + +# The following modules are not imported to this level, so mmcv may be used +# without PyTorch. +# - runner +# - parallel +# - op diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b4700d6139ae3d604ff6e542468cce4200c020c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .quantization import dequantize, quantize + +__all__ = ['quantize', 'dequantize'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/quantization.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/quantization.py new file mode 100644 index 0000000000000000000000000000000000000000..8e47a3545780cf071a1ef8195efb0b7b662c8186 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/arraymisc/quantization.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + + +def quantize(arr, min_val, max_val, levels, dtype=np.int64): + """Quantize an array of (-inf, inf) to [0, levels-1]. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the quantized array. + + Returns: + tuple: Quantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + arr = np.clip(arr, min_val, max_val) - min_val + quantized_arr = np.minimum( + np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + + return quantized_arr + + +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): + """Dequantize an array. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the dequantized array. + + Returns: + tuple: Dequantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - + min_val) / levels + min_val + + return dequantized_arr diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7246c897430f0cc7ce12719ad8608824fc734446 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .alexnet import AlexNet +# yapf: disable +from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, + ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, + ConvTranspose2d, ConvTranspose3d, ConvWS2d, + DepthwiseSeparableConvModule, GeneralizedAttention, + HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, + NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, + build_activation_layer, build_conv_layer, + build_norm_layer, build_padding_layer, build_plugin_layer, + build_upsample_layer, conv_ws_2d, is_norm) +from .builder import MODELS, build_model_from_cfg +# yapf: enable +from .resnet import ResNet, make_res_layer +from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, + NormalInit, PretrainedInit, TruncNormalInit, UniformInit, + XavierInit, bias_init_with_prob, caffe2_xavier_init, + constant_init, fuse_conv_bn, get_model_complexity_info, + initialize, kaiming_init, normal_init, trunc_normal_init, + uniform_init, xavier_init) +from .vgg import VGG, make_vgg_layer + +__all__ = [ + 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', + 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', + 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', + 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', + 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', + 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', + 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', + 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', + 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', + 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', + 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/alexnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..89e36b8c7851f895d9ae7f07149f0e707456aab0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/alexnet.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + + +class AlexNet(nn.Module): + """AlexNet backbone. + + Args: + num_classes (int): number of classes for classification. + """ + + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + # use default initializer + pass + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + + x = self.features(x) + if self.num_classes > 0: + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f33124ed23fc6f27119a37bcb5ab004d3572be0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .activation import build_activation_layer +from .context_block import ContextBlock +from .conv import build_conv_layer +from .conv2d_adaptive_padding import Conv2dAdaptivePadding +from .conv_module import ConvModule +from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d +from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .drop import Dropout, DropPath +from .generalized_attention import GeneralizedAttention +from .hsigmoid import HSigmoid +from .hswish import HSwish +from .non_local import NonLocal1d, NonLocal2d, NonLocal3d +from .norm import build_norm_layer, is_norm +from .padding import build_padding_layer +from .plugin import build_plugin_layer +from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .scale import Scale +from .swish import Swish +from .upsample import build_upsample_layer +from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, + Linear, MaxPool2d, MaxPool3d) + +__all__ = [ + 'ConvModule', 'build_activation_layer', 'build_conv_layer', + 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', + 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', + 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', + 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', + 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', + 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/activation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..a8951058c8e77eda02c130f3401c9680702e231c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/activation.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version +from .registry import ACTIVATION_LAYERS + +for module in [ + nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, + nn.Sigmoid, nn.Tanh +]: + ACTIVATION_LAYERS.register_module(module=module) + + +@ACTIVATION_LAYERS.register_module(name='Clip') +@ACTIVATION_LAYERS.register_module() +class Clamp(nn.Module): + """Clamp activation layer. + + This activation function is to clamp the feature map value within + :math:`[min, max]`. More details can be found in ``torch.clamp()``. + + Args: + min (Number | optional): Lower-bound of the range to be clamped to. + Default to -1. + max (Number | optional): Upper-bound of the range to be clamped to. + Default to 1. + """ + + def __init__(self, min=-1., max=1.): + super(Clamp, self).__init__() + self.min = min + self.max = max + + def forward(self, x): + """Forward function. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: Clamped tensor. + """ + return torch.clamp(x, min=self.min, max=self.max) + + +class GELU(nn.Module): + r"""Applies the Gaussian Error Linear Units function: + + .. math:: + \text{GELU}(x) = x * \Phi(x) + where :math:`\Phi(x)` is the Cumulative Distribution Function for + Gaussian Distribution. + + Shape: + - Input: :math:`(N, *)` where `*` means, any number of additional + dimensions + - Output: :math:`(N, *)`, same shape as the input + + .. image:: scripts/activation_images/GELU.png + + Examples:: + + >>> m = nn.GELU() + >>> input = torch.randn(2) + >>> output = m(input) + """ + + def forward(self, input): + return F.gelu(input) + + +if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.4')): + ACTIVATION_LAYERS.register_module(module=GELU) +else: + ACTIVATION_LAYERS.register_module(module=nn.GELU) + + +def build_activation_layer(cfg): + """Build activation layer. + + Args: + cfg (dict): The activation layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + + Returns: + nn.Module: Created activation layer. + """ + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/context_block.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/context_block.py new file mode 100644 index 0000000000000000000000000000000000000000..d60fdb904c749ce3b251510dff3cc63cea70d42e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/context_block.py @@ -0,0 +1,125 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn + +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + + +def last_zero_init(m): + if isinstance(m, nn.Sequential): + constant_init(m[-1], val=0) + else: + constant_init(m, val=0) + + +@PLUGIN_LAYERS.register_module() +class ContextBlock(nn.Module): + """ContextBlock module in GCNet. + + See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + (https://arxiv.org/abs/1904.11492) for details. + + Args: + in_channels (int): Channels of the input feature map. + ratio (float): Ratio of channels of transform bottleneck + pooling_type (str): Pooling method for context modeling. + Options are 'att' and 'avg', stand for attention pooling and + average pooling respectively. Default: 'att'. + fusion_types (Sequence[str]): Fusion method for feature fusion, + Options are 'channels_add', 'channel_mul', stand for channelwise + addition and multiplication respectively. Default: ('channel_add',) + """ + + _abbr_ = 'context_block' + + def __init__(self, + in_channels, + ratio, + pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + valid_fusion_types = ['channel_add', 'channel_mul'] + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + self.in_channels = in_channels + self.ratio = ratio + self.planes = int(in_channels * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_mul_conv = None + self.reset_parameters() + + def reset_parameters(self): + if self.pooling_type == 'att': + kaiming_init(self.conv_mask, mode='fan_in') + self.conv_mask.inited = True + + if self.channel_add_conv is not None: + last_zero_init(self.channel_add_conv) + if self.channel_mul_conv is not None: + last_zero_init(self.channel_mul_conv) + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + + return out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..cf54491997a48ac3e7fadc4183ab7bf3e831024c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn + +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) + + +def build_conv_layer(cfg, *args, **kwargs): + """Build convolution layer. + + Args: + cfg (None or dict): The conv layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an conv layer. + args (argument list): Arguments passed to the `__init__` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the `__init__` + method of the corresponding conv layer. + + Returns: + nn.Module: Created conv layer. + """ + if cfg is None: + cfg_ = dict(type='Conv2d') + else: + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) + + layer = conv_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv2d_adaptive_padding.py new file mode 100644 index 0000000000000000000000000000000000000000..b45e758ac6cf8dfb0382d072fe09125bc7e9b888 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from torch import nn +from torch.nn import functional as F + +from .registry import CONV_LAYERS + + +@CONV_LAYERS.register_module() +class Conv2dAdaptivePadding(nn.Conv2d): + """Implementation of 2D convolution in tensorflow with `padding` as "same", + which applies padding to input (if needed) so that input image gets fully + covered by filter and stride you specified. For stride 1, this will ensure + that output image size is same as input. For stride of 2, output dimensions + will be half, for example. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, + dilation, groups, bias) + + def forward(self, x): + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + stride_h, stride_w = self.stride + output_h = math.ceil(img_h / stride_h) + output_w = math.ceil(img_w / stride_w) + pad_h = ( + max((output_h - 1) * self.stride[0] + + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) + pad_w = ( + max((output_w - 1) * self.stride[1] + + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_module.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_module.py new file mode 100644 index 0000000000000000000000000000000000000000..43cab72624ccc04b2f7877383588a4bbacf9117a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_module.py @@ -0,0 +1,206 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn + +from annotator.mmpkg.mmcv.utils import _BatchNorm, _InstanceNorm +from ..utils import constant_init, kaiming_init +from .activation import build_activation_layer +from .conv import build_conv_layer +from .norm import build_norm_layer +from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class ConvModule(nn.Module): + """A conv block that bundles conv/norm/activation layers. + + This block simplifies the usage of convolution layers, which are commonly + used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). + It is based upon three build methods: `build_conv_layer()`, + `build_norm_layer()` and `build_activation_layer()`. + + Besides, we add some additional features in this module. + 1. Automatically set `bias` of the conv layer. + 2. Spectral norm is supported. + 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only + supports zero and circular padding, and we add "reflect" padding mode. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. + groups (int): Number of blocked connections from input channels to + output channels. Same as that in ``nn._ConvNd``. + bias (bool | str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise + False. Default: "auto". + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + inplace (bool): Whether to use inplace mode for activation. + Default: True. + with_spectral_norm (bool): Whether use spectral norm in conv module. + Default: False. + padding_mode (str): If the `padding_mode` has not been supported by + current `Conv2d` in PyTorch, we will use our own padding layer + instead. Currently, we support ['zeros', 'circular'] with official + implementation and ['reflect'] with our own implementation. + Default: 'zeros'. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Default: ('conv', 'norm', 'act'). + """ + + _abbr_ = 'conv_block' + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act')): + super(ConvModule, self).__init__() + assert conv_cfg is None or isinstance(conv_cfg, dict) + assert norm_cfg is None or isinstance(norm_cfg, dict) + assert act_cfg is None or isinstance(act_cfg, dict) + official_padding_mode = ['zeros', 'circular'] + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.inplace = inplace + self.with_spectral_norm = with_spectral_norm + self.with_explicit_padding = padding_mode not in official_padding_mode + self.order = order + assert isinstance(self.order, tuple) and len(self.order) == 3 + assert set(order) == set(['conv', 'norm', 'act']) + + self.with_norm = norm_cfg is not None + self.with_activation = act_cfg is not None + # if the conv layer is before a norm layer, bias is unnecessary. + if bias == 'auto': + bias = not self.with_norm + self.with_bias = bias + + if self.with_explicit_padding: + pad_cfg = dict(type=padding_mode) + self.padding_layer = build_padding_layer(pad_cfg, padding) + + # reset padding to 0 for conv module + conv_padding = 0 if self.with_explicit_padding else padding + # build convolution layer + self.conv = build_conv_layer( + conv_cfg, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=conv_padding, + dilation=dilation, + groups=groups, + bias=bias) + # export the attributes of self.conv to a higher level for convenience + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_spectral_norm: + self.conv = nn.utils.spectral_norm(self.conv) + + # build normalization layers + if self.with_norm: + # norm layer is after conv layer + if order.index('norm') > order.index('conv'): + norm_channels = out_channels + else: + norm_channels = in_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + self.add_module(self.norm_name, norm) + if self.with_bias: + if isinstance(norm, (_BatchNorm, _InstanceNorm)): + warnings.warn( + 'Unnecessary conv bias before batch/instance norm') + else: + self.norm_name = None + + # build activation layer + if self.with_activation: + act_cfg_ = act_cfg.copy() + # nn.Tanh has no 'inplace' argument + if act_cfg_['type'] not in [ + 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' + ]: + act_cfg_.setdefault('inplace', inplace) + self.activate = build_activation_layer(act_cfg_) + + # Use msra init by default + self.init_weights() + + @property + def norm(self): + if self.norm_name: + return getattr(self, self.norm_name) + else: + return None + + def init_weights(self): + # 1. It is mainly for customized conv layers with their own + # initialization manners by calling their own ``init_weights()``, + # and we do not want ConvModule to override the initialization. + # 2. For customized conv layers without their own initialization + # manners (that is, they don't have their own ``init_weights()``) + # and PyTorch's conv layers, they will be initialized by + # this method with default ``kaiming_init``. + # Note: For PyTorch's conv layers, they will be overwritten by our + # initialization implementation using default ``kaiming_init``. + if not hasattr(self.conv, 'init_weights'): + if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': + nonlinearity = 'leaky_relu' + a = self.act_cfg.get('negative_slope', 0.01) + else: + nonlinearity = 'relu' + a = 0 + kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) + if self.with_norm: + constant_init(self.norm, 1, bias=0) + + def forward(self, x, activate=True, norm=True): + for layer in self.order: + if layer == 'conv': + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + elif layer == 'norm' and norm and self.with_norm: + x = self.norm(x) + elif layer == 'act' and activate and self.with_activation: + x = self.activate(x) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_ws.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_ws.py new file mode 100644 index 0000000000000000000000000000000000000000..a3941e27874993418b3b5708d5a7485f175ff9c8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/conv_ws.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .registry import CONV_LAYERS + + +def conv_ws_2d(input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + eps=1e-5): + c_in = weight.size(0) + weight_flat = weight.view(c_in, -1) + mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) + std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) + weight = (weight - mean) / (std + eps) + return F.conv2d(input, weight, bias, stride, padding, dilation, groups) + + +@CONV_LAYERS.register_module('ConvWS') +class ConvWS2d(nn.Conv2d): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + eps=1e-5): + super(ConvWS2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.eps = eps + + def forward(self, x): + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.eps) + + +@CONV_LAYERS.register_module(name='ConvAWS') +class ConvAWS2d(nn.Conv2d): + """AWS (Adaptive Weight Standardization) + + This is a variant of Weight Standardization + (https://arxiv.org/pdf/1903.10520.pdf) + It is used in DetectoRS to avoid NaN + (https://arxiv.org/pdf/2006.02334.pdf) + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the conv kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If set True, adds a learnable bias to the + output. Default: True + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.register_buffer('weight_gamma', + torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', + torch.zeros(self.out_channels, 1, 1, 1)) + + def _get_weight(self, weight): + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + weight = (weight - mean) / std + weight = self.weight_gamma * weight + self.weight_beta + return weight + + def forward(self, x): + weight = self._get_weight(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + """Override default load function. + + AWS overrides the function _load_from_state_dict to recover + weight_gamma and weight_beta if they are missing. If weight_gamma and + weight_beta are found in the checkpoint, this function will return + after super()._load_from_state_dict. Otherwise, it will compute the + mean and std of the pretrained weights and store them in weight_beta + and weight_gamma. + """ + + self.weight_gamma.data.fill_(-1) + local_missing_keys = [] + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, local_missing_keys, + unexpected_keys, error_msgs) + if self.weight_gamma.data.mean() > 0: + for k in local_missing_keys: + missing_keys.append(k) + return + weight = self.weight.data + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + self.weight_beta.data.copy_(mean) + self.weight_gamma.data.copy_(std) + missing_gamma_beta = [ + k for k in local_missing_keys + if k.endswith('weight_gamma') or k.endswith('weight_beta') + ] + for k in missing_gamma_beta: + local_missing_keys.remove(k) + for k in local_missing_keys: + missing_keys.append(k) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/depthwise_separable_conv_module.py new file mode 100644 index 0000000000000000000000000000000000000000..722d5d8d71f75486e2db3008907c4eadfca41d63 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .conv_module import ConvModule + + +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. + + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. Default: 1. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. Default: 0. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs): + super(DepthwiseSeparableConvModule, self).__init__() + assert 'groups' not in kwargs, 'groups should not be specified' + + # if norm/activation config of depthwise/pointwise ConvModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg + dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg + pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg + + # depthwise convolution + self.depthwise_conv = ConvModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs) + + self.pointwise_conv = ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, + **kwargs) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/drop.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/drop.py new file mode 100644 index 0000000000000000000000000000000000000000..465ed38339fe64dde8cdc959451b1236a3a55b95 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/drop.py @@ -0,0 +1,65 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from annotator.mmpkg.mmcv import build_from_cfg +from .registry import DROPOUT_LAYERS + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + # handle tensors with different dimensions, not just 4D tensors. + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + output = x.div(keep_prob) * random_tensor.floor() + return output + + +@DROPOUT_LAYERS.register_module() +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + + Args: + drop_prob (float): Probability of the path to be zeroed. Default: 0.1 + """ + + def __init__(self, drop_prob=0.1): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +@DROPOUT_LAYERS.register_module() +class Dropout(nn.Dropout): + """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of + ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with + ``DropPath`` + + Args: + drop_prob (float): Probability of the elements to be + zeroed. Default: 0.5. + inplace (bool): Do the operation inplace or not. Default: False. + """ + + def __init__(self, drop_prob=0.5, inplace=False): + super().__init__(p=drop_prob, inplace=inplace) + + +def build_dropout(cfg, default_args=None): + """Builder for drop out layers.""" + return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/generalized_attention.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/generalized_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..988d9adf2f289ef223bd1c680a5ae1d3387f0269 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/generalized_attention.py @@ -0,0 +1,412 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class GeneralizedAttention(nn.Module): + """GeneralizedAttention module. + + See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + (https://arxiv.org/abs/1711.07971) for details. + + Args: + in_channels (int): Channels of the input feature map. + spatial_range (int): The spatial range. -1 indicates no spatial range + constraint. Default: -1. + num_heads (int): The head number of empirical_attention module. + Default: 9. + position_embedding_dim (int): The position embedding dimension. + Default: -1. + position_magnitude (int): A multiplier acting on coord difference. + Default: 1. + kv_stride (int): The feature stride acting on key/value feature map. + Default: 2. + q_stride (int): The feature stride acting on query feature map. + Default: 1. + attention_type (str): A binary indicator string for indicating which + items in generalized empirical_attention module are used. + Default: '1111'. + + - '1000' indicates 'query and key content' (appr - appr) item, + - '0100' indicates 'query content and relative position' + (appr - position) item, + - '0010' indicates 'key content only' (bias - appr) item, + - '0001' indicates 'relative position only' (bias - position) item. + """ + + _abbr_ = 'gen_attention_block' + + def __init__(self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111'): + + super(GeneralizedAttention, self).__init__() + + # hard range means local range for non-local operation + self.position_embedding_dim = ( + position_embedding_dim + if position_embedding_dim > 0 else in_channels) + + self.position_magnitude = position_magnitude + self.num_heads = num_heads + self.in_channels = in_channels + self.spatial_range = spatial_range + self.kv_stride = kv_stride + self.q_stride = q_stride + self.attention_type = [bool(int(_)) for _ in attention_type] + self.qk_embed_dim = in_channels // num_heads + out_c = self.qk_embed_dim * num_heads + + if self.attention_type[0] or self.attention_type[1]: + self.query_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.query_conv.kaiming_init = True + + if self.attention_type[0] or self.attention_type[2]: + self.key_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.key_conv.kaiming_init = True + + self.v_dim = in_channels // num_heads + self.value_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=self.v_dim * num_heads, + kernel_size=1, + bias=False) + self.value_conv.kaiming_init = True + + if self.attention_type[1] or self.attention_type[3]: + self.appr_geom_fc_x = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x.kaiming_init = True + + self.appr_geom_fc_y = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y.kaiming_init = True + + if self.attention_type[2]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.appr_bias = nn.Parameter(appr_bias_value) + + if self.attention_type[3]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.geom_bias = nn.Parameter(geom_bias_value) + + self.proj_conv = nn.Conv2d( + in_channels=self.v_dim * num_heads, + out_channels=in_channels, + kernel_size=1, + bias=True) + self.proj_conv.kaiming_init = True + self.gamma = nn.Parameter(torch.zeros(1)) + + if self.spatial_range >= 0: + # only works when non local is after 3*3 conv + if in_channels == 256: + max_len = 84 + elif in_channels == 512: + max_len = 42 + + max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) + local_constraint_map = np.ones( + (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + for iy in range(max_len): + for ix in range(max_len): + local_constraint_map[ + iy, ix, + max((iy - self.spatial_range) // + self.kv_stride, 0):min((iy + self.spatial_range + + 1) // self.kv_stride + + 1, max_len), + max((ix - self.spatial_range) // + self.kv_stride, 0):min((ix + self.spatial_range + + 1) // self.kv_stride + + 1, max_len)] = 0 + + self.local_constraint_map = nn.Parameter( + torch.from_numpy(local_constraint_map).byte(), + requires_grad=False) + + if self.q_stride > 1: + self.q_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.q_stride) + else: + self.q_downsample = None + + if self.kv_stride > 1: + self.kv_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.kv_stride) + else: + self.kv_downsample = None + + self.init_weights() + + def get_position_embedding(self, + h, + w, + h_kv, + w_kv, + q_stride, + kv_stride, + device, + dtype, + feat_dim, + wave_length=1000): + # the default type of Tensor is float32, leading to type mismatch + # in fp16 mode. Cast it to support fp16 mode. + h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) + h_idxs = h_idxs.view((h, 1)) * q_stride + + w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) + w_idxs = w_idxs.view((w, 1)) * q_stride + + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to( + device=device, dtype=dtype) + h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride + + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to( + device=device, dtype=dtype) + w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride + + # (h, h_kv, 1) + h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) + h_diff *= self.position_magnitude + + # (w, w_kv, 1) + w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) + w_diff *= self.position_magnitude + + feat_range = torch.arange(0, feat_dim / 4).to( + device=device, dtype=dtype) + + dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) + dim_mat = dim_mat**((4. / feat_dim) * feat_range) + dim_mat = dim_mat.view((1, 1, -1)) + + embedding_x = torch.cat( + ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + + embedding_y = torch.cat( + ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + + return embedding_x, embedding_y + + def forward(self, x_input): + num_heads = self.num_heads + + # use empirical_attention + if self.q_downsample is not None: + x_q = self.q_downsample(x_input) + else: + x_q = x_input + n, _, h, w = x_q.shape + + if self.kv_downsample is not None: + x_kv = self.kv_downsample(x_input) + else: + x_kv = x_input + _, _, h_kv, w_kv = x_kv.shape + + if self.attention_type[0] or self.attention_type[1]: + proj_query = self.query_conv(x_q).view( + (n, num_heads, self.qk_embed_dim, h * w)) + proj_query = proj_query.permute(0, 1, 3, 2) + + if self.attention_type[0] or self.attention_type[2]: + proj_key = self.key_conv(x_kv).view( + (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + + if self.attention_type[1] or self.attention_type[3]: + position_embed_x, position_embed_y = self.get_position_embedding( + h, w, h_kv, w_kv, self.q_stride, self.kv_stride, + x_input.device, x_input.dtype, self.position_embedding_dim) + # (n, num_heads, w, w_kv, dim) + position_feat_x = self.appr_geom_fc_x(position_embed_x).\ + view(1, w, w_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + # (n, num_heads, h, h_kv, dim) + position_feat_y = self.appr_geom_fc_y(position_embed_y).\ + view(1, h, h_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + position_feat_x /= math.sqrt(2) + position_feat_y /= math.sqrt(2) + + # accelerate for saliency only + if (np.sum(self.attention_type) == 1) and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy = torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, h_kv * w_kv) + + h = 1 + w = 1 + else: + # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for + if not self.attention_type[0]: + energy = torch.zeros( + n, + num_heads, + h, + w, + h_kv, + w_kv, + dtype=x_input.dtype, + device=x_input.device) + + # attention_type[0]: appr - appr + # attention_type[1]: appr - position + # attention_type[2]: bias - appr + # attention_type[3]: bias - position + if self.attention_type[0] or self.attention_type[2]: + if self.attention_type[0] and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[0]: + energy = torch.matmul(proj_query, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy += torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, 1, h_kv, w_kv) + + if self.attention_type[1] or self.attention_type[3]: + if self.attention_type[1] and self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + + proj_query_reshape = (proj_query + geom_bias).\ + view(n, num_heads, h, w, self.qk_embed_dim) + + energy_x = torch.matmul( + proj_query_reshape.permute(0, 1, 3, 2, 4), + position_feat_x.permute(0, 1, 2, 4, 3)) + energy_x = energy_x.\ + permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul( + proj_query_reshape, + position_feat_y.permute(0, 1, 2, 4, 3)) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[1]: + proj_query_reshape = proj_query.\ + view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.\ + permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.\ + permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.\ + permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, + position_feat_x_reshape) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, + position_feat_y_reshape) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, self.qk_embed_dim, 1).\ + repeat(n, 1, 1, 1) + + position_feat_x_reshape = position_feat_x.\ + view(n, num_heads, w*w_kv, self.qk_embed_dim) + + position_feat_y_reshape = position_feat_y.\ + view(n, num_heads, h * h_kv, self.qk_embed_dim) + + energy_x = torch.matmul(position_feat_x_reshape, geom_bias) + energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) + + energy_y = torch.matmul(position_feat_y_reshape, geom_bias) + energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) + + energy += energy_x + energy_y + + energy = energy.view(n, num_heads, h * w, h_kv * w_kv) + + if self.spatial_range >= 0: + cur_local_constraint_map = \ + self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ + contiguous().\ + view(1, 1, h*w, h_kv*w_kv) + + energy = energy.masked_fill_(cur_local_constraint_map, + float('-inf')) + + attention = F.softmax(energy, 3) + + proj_value = self.value_conv(x_kv) + proj_value_reshape = proj_value.\ + view((n, num_heads, self.v_dim, h_kv * w_kv)).\ + permute(0, 1, 3, 2) + + out = torch.matmul(attention, proj_value_reshape).\ + permute(0, 1, 3, 2).\ + contiguous().\ + view(n, self.v_dim * self.num_heads, h, w) + + out = self.proj_conv(out) + + # output is downsampled, upsample back to input size + if self.q_downsample is not None: + out = F.interpolate( + out, + size=x_input.shape[2:], + mode='bilinear', + align_corners=False) + + out = self.gamma * out + x_input + return out + + def init_weights(self): + for m in self.modules(): + if hasattr(m, 'kaiming_init') and m.kaiming_init: + kaiming_init( + m, + mode='fan_in', + nonlinearity='leaky_relu', + bias=0, + distribution='uniform', + a=1) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hsigmoid.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hsigmoid.py new file mode 100644 index 0000000000000000000000000000000000000000..30b1a3d6580cf0360710426fbea1f05acdf07b4b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hsigmoid.py @@ -0,0 +1,34 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSigmoid(nn.Module): + """Hard Sigmoid Module. Apply the hard sigmoid function: + Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) + Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) + + Args: + bias (float): Bias of the input feature map. Default: 1.0. + divisor (float): Divisor of the input feature map. Default: 2.0. + min_value (float): Lower bound value. Default: 0.0. + max_value (float): Upper bound value. Default: 1.0. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): + super(HSigmoid, self).__init__() + self.bias = bias + self.divisor = divisor + assert self.divisor != 0 + self.min_value = min_value + self.max_value = max_value + + def forward(self, x): + x = (x + self.bias) / self.divisor + + return x.clamp_(self.min_value, self.max_value) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hswish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hswish.py new file mode 100644 index 0000000000000000000000000000000000000000..7e0c090ff037c99ee6c5c84c4592e87beae02208 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/hswish.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSwish(nn.Module): + """Hard Swish Module. + + This module applies the hard swish function: + + .. math:: + Hswish(x) = x * ReLU6(x + 3) / 6 + + Args: + inplace (bool): can optionally do the operation in-place. + Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, inplace=False): + super(HSwish, self).__init__() + self.act = nn.ReLU6(inplace) + + def forward(self, x): + return x * self.act(x + 3) / 6 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/non_local.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/non_local.py new file mode 100644 index 0000000000000000000000000000000000000000..92d00155ef275c1201ea66bba30470a1785cc5d7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/non_local.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta + +import torch +import torch.nn as nn + +from ..utils import constant_init, normal_init +from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS + + +class _NonLocalNd(nn.Module, metaclass=ABCMeta): + """Basic Non-local module. + + This module is proposed in + "Non-local Neural Networks" + Paper reference: https://arxiv.org/abs/1711.07971 + Code reference: https://github.com/AlexHex7/Non-local_pytorch + + Args: + in_channels (int): Channels of the input feature map. + reduction (int): Channel reduction ratio. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. + Default: True. + conv_cfg (None | dict): The config dict for convolution layers. + If not specified, it will use `nn.Conv2d` for convolution layers. + Default: None. + norm_cfg (None | dict): The config dict for normalization layers. + Default: None. (This parameter is only applicable to conv_out.) + mode (str): Options are `gaussian`, `concatenation`, + `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. + """ + + def __init__(self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs): + super(_NonLocalNd, self).__init__() + self.in_channels = in_channels + self.reduction = reduction + self.use_scale = use_scale + self.inter_channels = max(in_channels // reduction, 1) + self.mode = mode + + if mode not in [ + 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' + ]: + raise ValueError("Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.') + + # g, theta, phi are defaulted as `nn.ConvNd`. + # Here we use ConvModule for potential usage. + self.g = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.conv_out = ConvModule( + self.inter_channels, + self.in_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.mode != 'gaussian': + self.theta = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.phi = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + + if self.mode == 'concatenation': + self.concat_project = ConvModule( + self.inter_channels * 2, + 1, + kernel_size=1, + stride=1, + padding=0, + bias=False, + act_cfg=dict(type='ReLU')) + + self.init_weights(**kwargs) + + def init_weights(self, std=0.01, zeros_init=True): + if self.mode != 'gaussian': + for m in [self.g, self.theta, self.phi]: + normal_init(m.conv, std=std) + else: + normal_init(self.g.conv, std=std) + if zeros_init: + if self.conv_out.norm_cfg is None: + constant_init(self.conv_out.conv, 0) + else: + constant_init(self.conv_out.norm, 0) + else: + if self.conv_out.norm_cfg is None: + normal_init(self.conv_out.conv, std=std) + else: + normal_init(self.conv_out.norm, std=std) + + def gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def embedded_gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def dot_product(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight /= pairwise_weight.shape[-1] + return pairwise_weight + + def concatenation(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + + concat_feature = torch.cat([theta_x, phi_x], dim=1) + pairwise_weight = self.concat_project(concat_feature) + n, _, h, w = pairwise_weight.size() + pairwise_weight = pairwise_weight.view(n, h, w) + pairwise_weight /= pairwise_weight.shape[-1] + + return pairwise_weight + + def forward(self, x): + # Assume `reduction = 1`, then `inter_channels = C` + # or `inter_channels = C` when `mode="gaussian"` + + # NonLocal1d x: [N, C, H] + # NonLocal2d x: [N, C, H, W] + # NonLocal3d x: [N, C, T, H, W] + n = x.size(0) + + # NonLocal1d g_x: [N, H, C] + # NonLocal2d g_x: [N, HxW, C] + # NonLocal3d g_x: [N, TxHxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] + # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] + # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + pairwise_func = getattr(self, self.mode) + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # NonLocal1d y: [N, H, C] + # NonLocal2d y: [N, HxW, C] + # NonLocal3d y: [N, TxHxW, C] + y = torch.matmul(pairwise_weight, g_x) + # NonLocal1d y: [N, C, H] + # NonLocal2d y: [N, C, H, W] + # NonLocal3d y: [N, C, T, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + output = x + self.conv_out(y) + + return output + + +class NonLocal1d(_NonLocalNd): + """1D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv1d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv1d'), + **kwargs): + super(NonLocal1d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool1d(kernel_size=2) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +@PLUGIN_LAYERS.register_module() +class NonLocal2d(_NonLocalNd): + """2D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv2d'). + """ + + _abbr_ = 'nonlocal_block' + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv2d'), + **kwargs): + super(NonLocal2d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +class NonLocal3d(_NonLocalNd): + """3D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv3d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv3d'), + **kwargs): + super(NonLocal3d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/norm.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/norm.py new file mode 100644 index 0000000000000000000000000000000000000000..31f4e49b24080485fc1d85b3e8ff810dc1383c95 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/norm.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect + +import torch.nn as nn + +from annotator.mmpkg.mmcv.utils import is_tuple_of +from annotator.mmpkg.mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + When we build a norm layer with `build_norm_layer()`, we want to preserve + the norm type in variable names, e.g, self.bn1, self.gn. This method will + infer the abbreviation to map class types to abbreviations. + + Rule 1: If the class has the property "_abbr_", return the property. + Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or + InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and + "in" respectively. + Rule 3: If the class name contains "batch", "group", "layer" or "instance", + the abbreviation of this layer will be "bn", "gn", "ln" and "in" + respectively. + Rule 4: Otherwise, the abbreviation falls back to "norm". + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN + return 'in' + elif issubclass(class_type, _BatchNorm): + return 'bn' + elif issubclass(class_type, nn.GroupNorm): + return 'gn' + elif issubclass(class_type, nn.LayerNorm): + return 'ln' + else: + class_name = class_type.__name__.lower() + if 'batch' in class_name: + return 'bn' + elif 'group' in class_name: + return 'gn' + elif 'layer' in class_name: + return 'ln' + elif 'instance' in class_name: + return 'in' + else: + return 'norm_layer' + + +def build_norm_layer(cfg, num_features, postfix=''): + """Build normalization layer. + + Args: + cfg (dict): The norm layer config, which should contain: + + - type (str): Layer type. + - layer args: Args needed to instantiate a norm layer. + - requires_grad (bool, optional): Whether stop gradient updates. + num_features (int): Number of input channels. + postfix (int | str): The postfix to be appended into norm abbreviation + to create named layer. + + Returns: + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + + norm_layer = NORM_LAYERS.get(layer_type) + abbr = infer_abbr(norm_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + requires_grad = cfg_.pop('requires_grad', True) + cfg_.setdefault('eps', 1e-5) + if layer_type != 'GN': + layer = norm_layer(num_features, **cfg_) + if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): + layer._specify_ddp_gpu_num(1) + else: + assert 'num_groups' in cfg_ + layer = norm_layer(num_channels=num_features, **cfg_) + + for param in layer.parameters(): + param.requires_grad = requires_grad + + return name, layer + + +def is_norm(layer, exclude=None): + """Check if a layer is a normalization layer. + + Args: + layer (nn.Module): The layer to be checked. + exclude (type | tuple[type]): Types to be excluded. + + Returns: + bool: Whether the layer is a norm layer. + """ + if exclude is not None: + if not isinstance(exclude, tuple): + exclude = (exclude, ) + if not is_tuple_of(exclude, type): + raise TypeError( + f'"exclude" must be either None or type or a tuple of types, ' + f'but got {type(exclude)}: {exclude}') + + if exclude and isinstance(layer, exclude): + return False + + all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) + return isinstance(layer, all_norm_bases) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/padding.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/padding.py new file mode 100644 index 0000000000000000000000000000000000000000..e4ac6b28a1789bd551c613a7d3e7b622433ac7ec --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/padding.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) + + +def build_padding_layer(cfg, *args, **kwargs): + """Build padding layer. + + Args: + cfg (None or dict): The padding layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate a padding layer. + + Returns: + nn.Module: Created padding layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + + cfg_ = cfg.copy() + padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) + + layer = padding_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/plugin.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..07c010d4053174dd41107aa654ea67e82b46a25c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/plugin.py @@ -0,0 +1,88 @@ +import inspect +import platform + +from .registry import PLUGIN_LAYERS + +if platform.system() == 'Windows': + import regex as re +else: + import re + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + This method will infer the abbreviation to map class types to + abbreviations. + + Rule 1: If the class has the property "abbr", return the property. + Rule 2: Otherwise, the abbreviation falls back to snake case of class + name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + + def camel2snack(word): + """Convert camel case word into snack case. + + Modified from `inflection lib + `_. + + Example:: + + >>> camel2snack("FancyBlock") + 'fancy_block' + """ + + word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) + word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) + word = word.replace('-', '_') + return word.lower() + + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + else: + return camel2snack(class_type.__name__) + + +def build_plugin_layer(cfg, postfix='', **kwargs): + """Build plugin layer. + + Args: + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. + postfix (int, str): appended into norm abbreviation to + create named layer. Default: ''. + + Returns: + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') + + plugin_layer = PLUGIN_LAYERS.get(layer_type) + abbr = infer_abbr(plugin_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + layer = plugin_layer(**kwargs, **cfg_) + + return name, layer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/registry.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..4f374cca4961c06babf328bb7407723a14026c47 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/registry.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from annotator.mmpkg.mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') + +DROPOUT_LAYERS = Registry('drop out layers') +POSITIONAL_ENCODING = Registry('position encoding') +ATTENTION = Registry('attention') +FEEDFORWARD_NETWORK = Registry('feed-forward Network') +TRANSFORMER_LAYER = Registry('transformerLayer') +TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/scale.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/scale.py new file mode 100644 index 0000000000000000000000000000000000000000..c905fffcc8bf998d18d94f927591963c428025e2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/scale.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +class Scale(nn.Module): + """A learnable scale parameter. + + This layer scales the input by a learnable factor. It multiplies a + learnable scale parameter of shape (1,) with input of any shape. + + Args: + scale (float): Initial value of scale factor. Default: 1.0 + """ + + def __init__(self, scale=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) + + def forward(self, x): + return x * self.scale diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/swish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/swish.py new file mode 100644 index 0000000000000000000000000000000000000000..e2ca8ed7b749413f011ae54aac0cab27e6f0b51f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/swish.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class Swish(nn.Module): + """Swish Module. + + This module applies the swish function: + + .. math:: + Swish(x) = x * Sigmoid(x) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(Swish, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/transformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e16707142b645144b676059ffa992fc4306ef778 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/transformer.py @@ -0,0 +1,595 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings + +import torch +import torch.nn as nn + +from annotator.mmpkg.mmcv import ConfigDict, deprecated_api_warning +from annotator.mmpkg.mmcv.cnn import Linear, build_activation_layer, build_norm_layer +from annotator.mmpkg.mmcv.runner.base_module import BaseModule, ModuleList, Sequential +from annotator.mmpkg.mmcv.utils import build_from_cfg +from .drop import build_dropout +from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, + TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) + +# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file +try: + from annotator.mmpkg.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 + warnings.warn( + ImportWarning( + '``MultiScaleDeformableAttention`` has been moved to ' + '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 + '``from annotator.mmpkg.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 + 'to ``from annotator.mmpkg.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 + )) + +except ImportError: + warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' + '``mmcv.ops.multi_scale_deform_attn``, ' + 'You should install ``mmcv-full`` if you need this module. ') + + +def build_positional_encoding(cfg, default_args=None): + """Builder for Position Encoding.""" + return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) + + +def build_attention(cfg, default_args=None): + """Builder for attention.""" + return build_from_cfg(cfg, ATTENTION, default_args) + + +def build_feedforward_network(cfg, default_args=None): + """Builder for feed-forward network (FFN).""" + return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) + + +def build_transformer_layer(cfg, default_args=None): + """Builder for transformer layer.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) + + +def build_transformer_layer_sequence(cfg, default_args=None): + """Builder for transformer encoder and transformer decoder.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) + + +@ATTENTION.register_module() +class MultiheadAttention(BaseModule): + """A wrapper for ``torch.nn.MultiheadAttention``. + + This module implements MultiheadAttention with identity connection, + and positional encoding is also passed as input. + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): When it is True, Key, Query and Value are shape of + (batch, n, embed_dim), otherwise (n, batch, embed_dim). + Default to False. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=dict(type='Dropout', drop_prob=0.), + init_cfg=None, + batch_first=False, + **kwargs): + super(MultiheadAttention, self).__init__(init_cfg) + if 'dropout' in kwargs: + warnings.warn('The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ') + attn_drop = kwargs['dropout'] + dropout_layer['drop_prob'] = kwargs.pop('dropout') + + self.embed_dims = embed_dims + self.num_heads = num_heads + self.batch_first = batch_first + + self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, + **kwargs) + + self.proj_drop = nn.Dropout(proj_drop) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else nn.Identity() + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiheadAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_pos=None, + attn_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `MultiheadAttention`. + + **kwargs allow passing a more general data flow when combining + with other operations in `transformerlayer`. + + Args: + query (Tensor): The input query with shape [num_queries, bs, + embed_dims] if self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + If None, the ``query`` will be used. Defaults to None. + value (Tensor): The value tensor with same shape as `key`. + Same in `nn.MultiheadAttention.forward`. Defaults to None. + If None, the `key` will be used. + identity (Tensor): This tensor, with the same shape as x, + will be used for the identity link. + If None, `x` will be used. Defaults to None. + query_pos (Tensor): The positional encoding for query, with + the same shape as `x`. If not None, it will + be added to `x` before forward function. Defaults to None. + key_pos (Tensor): The positional encoding for `key`, with the + same shape as `key`. Defaults to None. If not None, it will + be added to `key` before forward function. If None, and + `query_pos` has the same shape as `key`, then `query_pos` + will be used for `key_pos`. Defaults to None. + attn_mask (Tensor): ByteTensor mask with shape [num_queries, + num_keys]. Same in `nn.MultiheadAttention.forward`. + Defaults to None. + key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. + Defaults to None. + + Returns: + Tensor: forwarded results with shape + [num_queries, bs, embed_dims] + if self.batch_first is False, else + [bs, num_queries embed_dims]. + """ + + if key is None: + key = query + if value is None: + value = key + if identity is None: + identity = query + if key_pos is None: + if query_pos is not None: + # use query_pos if key_pos is not available + if query_pos.shape == key.shape: + key_pos = query_pos + else: + warnings.warn(f'position encoding of key is' + f'missing in {self.__class__.__name__}.') + if query_pos is not None: + query = query + query_pos + if key_pos is not None: + key = key + key_pos + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + out = self.attn( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + +@FEEDFORWARD_NETWORK.register_module() +class FFN(BaseModule): + """Implements feed-forward networks (FFNs) with identity connection. + + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + num_fcs (int, optional): The number of fully-connected layers in + FFNs. Default: 2. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + add_identity (bool, optional): Whether to add the + identity connection. Default: `True`. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + @deprecated_api_warning( + { + 'dropout': 'ffn_drop', + 'add_residual': 'add_identity' + }, + cls_name='FFN') + def __init__(self, + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0., + dropout_layer=None, + add_identity=True, + init_cfg=None, + **kwargs): + super(FFN, self).__init__(init_cfg) + assert num_fcs >= 2, 'num_fcs should be no less ' \ + f'than 2. got {num_fcs}.' + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.num_fcs = num_fcs + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + layers = [] + in_channels = embed_dims + for _ in range(num_fcs - 1): + layers.append( + Sequential( + Linear(in_channels, feedforward_channels), self.activate, + nn.Dropout(ffn_drop))) + in_channels = feedforward_channels + layers.append(Linear(feedforward_channels, embed_dims)) + layers.append(nn.Dropout(ffn_drop)) + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else torch.nn.Identity() + self.add_identity = add_identity + + @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') + def forward(self, x, identity=None): + """Forward function for `FFN`. + + The function would add x to the output tensor if residue is None. + """ + out = self.layers(x) + if not self.add_identity: + return self.dropout_layer(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +@TRANSFORMER_LAYER.register_module() +class BaseTransformerLayer(BaseModule): + """Base `TransformerLayer` for vision transformer. + + It can be built from `mmcv.ConfigDict` and support more flexible + customization, for example, using any number of `FFN or LN ` and + use different kinds of `attention` by specifying a list of `ConfigDict` + named `attn_cfgs`. It is worth mentioning that it supports `prenorm` + when you specifying `norm` as the first element of `operation_order`. + More details about the `prenorm`: `On Layer Normalization in the + Transformer Architecture `_ . + + Args: + attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for `self_attention` or `cross_attention` modules, + The order of the configs in the list should be consistent with + corresponding attentions in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. Default: None. + ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for FFN, The order of the configs in the list should be + consistent with corresponding ffn in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. + operation_order (tuple[str]): The execution order of operation + in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). + Support `prenorm` when you specifying first element as `norm`. + Default:None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape + of (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + """ + + def __init__(self, + attn_cfgs=None, + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0., + act_cfg=dict(type='ReLU', inplace=True), + ), + operation_order=None, + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False, + **kwargs): + + deprecated_args = dict( + feedforward_channels='feedforward_channels', + ffn_dropout='ffn_drop', + ffn_num_fcs='num_fcs') + for ori_name, new_name in deprecated_args.items(): + if ori_name in kwargs: + warnings.warn( + f'The arguments `{ori_name}` in BaseTransformerLayer ' + f'has been deprecated, now you should set `{new_name}` ' + f'and other FFN related arguments ' + f'to a dict named `ffn_cfgs`. ') + ffn_cfgs[new_name] = kwargs[ori_name] + + super(BaseTransformerLayer, self).__init__(init_cfg) + + self.batch_first = batch_first + + assert set(operation_order) & set( + ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ + set(operation_order), f'The operation_order of' \ + f' {self.__class__.__name__} should ' \ + f'contains all four operation type ' \ + f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" + + num_attn = operation_order.count('self_attn') + operation_order.count( + 'cross_attn') + if isinstance(attn_cfgs, dict): + attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] + else: + assert num_attn == len(attn_cfgs), f'The length ' \ + f'of attn_cfg {num_attn} is ' \ + f'not consistent with the number of attention' \ + f'in operation_order {operation_order}.' + + self.num_attn = num_attn + self.operation_order = operation_order + self.norm_cfg = norm_cfg + self.pre_norm = operation_order[0] == 'norm' + self.attentions = ModuleList() + + index = 0 + for operation_name in operation_order: + if operation_name in ['self_attn', 'cross_attn']: + if 'batch_first' in attn_cfgs[index]: + assert self.batch_first == attn_cfgs[index]['batch_first'] + else: + attn_cfgs[index]['batch_first'] = self.batch_first + attention = build_attention(attn_cfgs[index]) + # Some custom attentions used as `self_attn` + # or `cross_attn` can have different behavior. + attention.operation_name = operation_name + self.attentions.append(attention) + index += 1 + + self.embed_dims = self.attentions[0].embed_dims + + self.ffns = ModuleList() + num_ffns = operation_order.count('ffn') + if isinstance(ffn_cfgs, dict): + ffn_cfgs = ConfigDict(ffn_cfgs) + if isinstance(ffn_cfgs, dict): + ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] + assert len(ffn_cfgs) == num_ffns + for ffn_index in range(num_ffns): + if 'embed_dims' not in ffn_cfgs[ffn_index]: + ffn_cfgs['embed_dims'] = self.embed_dims + else: + assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims + self.ffns.append( + build_feedforward_network(ffn_cfgs[ffn_index], + dict(type='FFN'))) + + self.norms = ModuleList() + num_norms = operation_order.count('norm') + for _ in range(num_norms): + self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) + + def forward(self, + query, + key=None, + value=None, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerDecoderLayer`. + + **kwargs contains some specific arguments of attentions. + + Args: + query (Tensor): The input query with shape + [num_queries, bs, embed_dims] if + self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + value (Tensor): The value tensor with same shape as `key`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor] | None): 2D Tensor used in + calculation of corresponding attention. The length of + it should equal to the number of `attention` in + `operation_order`. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in `self_attn` layer. + Defaults to None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: forwarded results with shape [num_queries, bs, embed_dims]. + """ + + norm_index = 0 + attn_index = 0 + ffn_index = 0 + identity = query + if attn_masks is None: + attn_masks = [None for _ in range(self.num_attn)] + elif isinstance(attn_masks, torch.Tensor): + attn_masks = [ + copy.deepcopy(attn_masks) for _ in range(self.num_attn) + ] + warnings.warn(f'Use same attn_mask in all attentions in ' + f'{self.__class__.__name__} ') + else: + assert len(attn_masks) == self.num_attn, f'The length of ' \ + f'attn_masks {len(attn_masks)} must be equal ' \ + f'to the number of attention in ' \ + f'operation_order {self.num_attn}' + + for layer in self.operation_order: + if layer == 'self_attn': + temp_key = temp_value = query + query = self.attentions[attn_index]( + query, + temp_key, + temp_value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=query_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=query_key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'norm': + query = self.norms[norm_index](query) + norm_index += 1 + + elif layer == 'cross_attn': + query = self.attentions[attn_index]( + query, + key, + value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=key_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'ffn': + query = self.ffns[ffn_index]( + query, identity if self.pre_norm else None) + ffn_index += 1 + + return query + + +@TRANSFORMER_LAYER_SEQUENCE.register_module() +class TransformerLayerSequence(BaseModule): + """Base class for TransformerEncoder and TransformerDecoder in vision + transformer. + + As base-class of Encoder and Decoder in vision transformer. + Support customization such as specifying different kind + of `transformer_layer` in `transformer_coder`. + + Args: + transformerlayer (list[obj:`mmcv.ConfigDict`] | + obj:`mmcv.ConfigDict`): Config of transformerlayer + in TransformerCoder. If it is obj:`mmcv.ConfigDict`, + it would be repeated `num_layer` times to a + list[`mmcv.ConfigDict`]. Default: None. + num_layers (int): The number of `TransformerLayer`. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): + super(TransformerLayerSequence, self).__init__(init_cfg) + if isinstance(transformerlayers, dict): + transformerlayers = [ + copy.deepcopy(transformerlayers) for _ in range(num_layers) + ] + else: + assert isinstance(transformerlayers, list) and \ + len(transformerlayers) == num_layers + self.num_layers = num_layers + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append(build_transformer_layer(transformerlayers[i])) + self.embed_dims = self.layers[0].embed_dims + self.pre_norm = self.layers[0].pre_norm + + def forward(self, + query, + key, + value, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerCoder`. + + Args: + query (Tensor): Input query with shape + `(num_queries, bs, embed_dims)`. + key (Tensor): The key tensor with shape + `(num_keys, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_keys, bs, embed_dims)`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor], optional): Each element is 2D Tensor + which is used in calculation of corresponding attention in + operation_order. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in self-attention + Default: None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: results with shape [num_queries, bs, embed_dims]. + """ + for layer in self.layers: + query = layer( + query, + key, + value, + query_pos=query_pos, + key_pos=key_pos, + attn_masks=attn_masks, + query_key_padding_mask=query_key_padding_mask, + key_padding_mask=key_padding_mask, + **kwargs) + return query diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/upsample.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a353767d0ce8518f0d7289bed10dba0178ed12 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/upsample.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) + + +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') +class PixelShufflePack(nn.Module): + """Pixel Shuffle upsample layer. + + This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to + achieve a simple upsampling with pixel shuffle. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of the conv layer to expand the + channels. + """ + + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super(PixelShufflePack, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2) + self.init_weights() + + def init_weights(self): + xavier_init(self.upsample_conv, distribution='uniform') + + def forward(self, x): + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + + +def build_upsample_layer(cfg, *args, **kwargs): + """Build upsample layer. + + Args: + cfg (dict): The upsample layer config, which should contain: + + - type (str): Layer type. + - scale_factor (int): Upsample ratio, which is not applicable to + deconv. + - layer args: Args needed to instantiate a upsample layer. + args (argument list): Arguments passed to the ``__init__`` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the + ``__init__`` method of the corresponding conv layer. + + Returns: + nn.Module: Created upsample layer. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + raise KeyError( + f'the cfg dict must contain the key "type", but got {cfg}') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) + + if upsample is nn.Upsample: + cfg_['mode'] = layer_type + layer = upsample(*args, **kwargs, **cfg_) + return layer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/wrappers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..8aebf67bf52355a513f21756ee74fe510902d075 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/bricks/wrappers.py @@ -0,0 +1,180 @@ +# Copyright (c) OpenMMLab. All rights reserved. +r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 + +Wrap some nn modules to support empty tensor input. Currently, these wrappers +are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask +heads are trained on only positive RoIs. +""" +import math + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair, _triple + +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + +if torch.__version__ == 'parrots': + TORCH_VERSION = torch.__version__ +else: + # torch.__version__ could be 1.3.1+cu92, we only need the first two + # for comparison + TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) + + +def obsolete_torch_version(torch_version, version_threshold): + return torch_version == 'parrots' or torch_version <= version_threshold + + +class NewEmptyTensorOp(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return NewEmptyTensorOp.apply(grad, shape), None + + +@CONV_LAYERS.register_module('Conv', force=True) +class Conv2d(nn.Conv2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module('Conv3d', force=True) +class Conv3d(nn.Conv3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) +class ConvTranspose2d(nn.ConvTranspose2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv3d') +@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) +class ConvTranspose3d(nn.ConvTranspose3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +class MaxPool2d(nn.MaxPool2d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), + _pair(self.padding), _pair(self.stride), + _pair(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class MaxPool3d(nn.MaxPool3d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size), + _triple(self.padding), + _triple(self.stride), + _triple(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class Linear(torch.nn.Linear): + + def forward(self, x): + # empty tensor forward of Linear layer is supported in Pytorch 1.6 + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): + out_shape = [x.shape[0], self.out_features] + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..7567316c566bd3aca6d8f65a84b00e9e890948a7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/builder.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..runner import Sequential +from ..utils import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/resnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1cb3ac057ee2d52c46fc94685b5d4e698aad8d5f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/resnet.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn +import torch.utils.checkpoint as cp + +from .utils import constant_init, kaiming_init + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + super(BasicBlock, self).__init__() + assert style in ['pytorch', 'caffe'] + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + assert not with_cp + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='pytorch', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + + self.out_indices = out_indices + self.style = style + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): + stride = strides[i] + dilation = dilations[i] + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, f'layer{i}') + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a263e31c1e3977712827ca229bbc04910b4e928e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .flops_counter import get_model_complexity_info +from .fuse_conv_bn import fuse_conv_bn +from .sync_bn import revert_sync_batchnorm +from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, + KaimingInit, NormalInit, PretrainedInit, + TruncNormalInit, UniformInit, XavierInit, + bias_init_with_prob, caffe2_xavier_init, + constant_init, initialize, kaiming_init, normal_init, + trunc_normal_init, uniform_init, xavier_init) + +__all__ = [ + 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', + 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', + 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'revert_sync_batchnorm' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/flops_counter.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/flops_counter.py new file mode 100644 index 0000000000000000000000000000000000000000..104240bfa524af727782ceb781147c5815529ee6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/flops_counter.py @@ -0,0 +1,599 @@ +# Modified from flops-counter.pytorch by Vladislav Sovrasov +# original repo: https://github.com/sovrasov/flops-counter.pytorch + +# MIT License + +# Copyright (c) 2018 Vladislav Sovrasov + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +from functools import partial + +import numpy as np +import torch +import torch.nn as nn + +import annotator.mmpkg.mmcv as mmcv + + +def get_model_complexity_info(model, + input_shape, + print_per_layer_stat=True, + as_strings=True, + input_constructor=None, + flush=False, + ost=sys.stdout): + """Get complexity information of a model. + + This method can calculate FLOPs and parameter counts of a model with + corresponding input shape. It can also print complexity information for + each layer in a model. + + Supported layers are listed as below: + - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. + - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, + ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, + ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. + - Linear: ``nn.Linear``. + - Deconvolution: ``nn.ConvTranspose2d``. + - Upsample: ``nn.Upsample``. + + Args: + model (nn.Module): The model for complexity calculation. + input_shape (tuple): Input shape used for calculation. + print_per_layer_stat (bool): Whether to print complexity information + for each layer in a model. Default: True. + as_strings (bool): Output FLOPs and params counts in a string form. + Default: True. + input_constructor (None | callable): If specified, it takes a callable + method that generates input. otherwise, it will generate a random + tensor with input shape to calculate FLOPs. Default: None. + flush (bool): same as that in :func:`print`. Default: False. + ost (stream): same as ``file`` param in :func:`print`. + Default: sys.stdout. + + Returns: + tuple[float | str]: If ``as_strings`` is set to True, it will return + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. + """ + assert type(input_shape) is tuple + assert len(input_shape) >= 1 + assert isinstance(model, nn.Module) + flops_model = add_flops_counting_methods(model) + flops_model.eval() + flops_model.start_flops_count() + if input_constructor: + input = input_constructor(input_shape) + _ = flops_model(**input) + else: + try: + batch = torch.ones(()).new_empty( + (1, *input_shape), + dtype=next(flops_model.parameters()).dtype, + device=next(flops_model.parameters()).device) + except StopIteration: + # Avoid StopIteration for models which have no parameters, + # like `nn.Relu()`, `nn.AvgPool2d`, etc. + batch = torch.ones(()).new_empty((1, *input_shape)) + + _ = flops_model(batch) + + flops_count, params_count = flops_model.compute_average_flops_cost() + if print_per_layer_stat: + print_model_with_flops( + flops_model, flops_count, params_count, ost=ost, flush=flush) + flops_model.stop_flops_count() + + if as_strings: + return flops_to_string(flops_count), params_to_string(params_count) + + return flops_count, params_count + + +def flops_to_string(flops, units='GFLOPs', precision=2): + """Convert FLOPs number into a string. + + Note that Here we take a multiply-add counts as one FLOP. + + Args: + flops (float): FLOPs number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', + 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically + choose the most suitable unit for FLOPs. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted FLOPs number with units. + + Examples: + >>> flops_to_string(1e9) + '1.0 GFLOPs' + >>> flops_to_string(2e5, 'MFLOPs') + '0.2 MFLOPs' + >>> flops_to_string(3e-9, None) + '3e-09 FLOPs' + """ + if units is None: + if flops // 10**9 > 0: + return str(round(flops / 10.**9, precision)) + ' GFLOPs' + elif flops // 10**6 > 0: + return str(round(flops / 10.**6, precision)) + ' MFLOPs' + elif flops // 10**3 > 0: + return str(round(flops / 10.**3, precision)) + ' KFLOPs' + else: + return str(flops) + ' FLOPs' + else: + if units == 'GFLOPs': + return str(round(flops / 10.**9, precision)) + ' ' + units + elif units == 'MFLOPs': + return str(round(flops / 10.**6, precision)) + ' ' + units + elif units == 'KFLOPs': + return str(round(flops / 10.**3, precision)) + ' ' + units + else: + return str(flops) + ' FLOPs' + + +def params_to_string(num_params, units=None, precision=2): + """Convert parameter number into a string. + + Args: + num_params (float): Parameter number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'M', + 'K' and ''. If set to None, it will automatically choose the most + suitable unit for Parameter number. Default: None. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted parameter number with units. + + Examples: + >>> params_to_string(1e9) + '1000.0 M' + >>> params_to_string(2e5) + '200.0 k' + >>> params_to_string(3e-9) + '3e-09' + """ + if units is None: + if num_params // 10**6 > 0: + return str(round(num_params / 10**6, precision)) + ' M' + elif num_params // 10**3: + return str(round(num_params / 10**3, precision)) + ' k' + else: + return str(num_params) + else: + if units == 'M': + return str(round(num_params / 10.**6, precision)) + ' ' + units + elif units == 'K': + return str(round(num_params / 10.**3, precision)) + ' ' + units + else: + return str(num_params) + + +def print_model_with_flops(model, + total_flops, + total_params, + units='GFLOPs', + precision=3, + ost=sys.stdout, + flush=False): + """Print a model with FLOPs for each layer. + + Args: + model (nn.Module): The model to be printed. + total_flops (float): Total FLOPs of the model. + total_params (float): Total parameter counts of the model. + units (str | None): Converted FLOPs units. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 3. + ost (stream): same as `file` param in :func:`print`. + Default: sys.stdout. + flush (bool): same as that in :func:`print`. Default: False. + + Example: + >>> class ExampleModel(nn.Module): + + >>> def __init__(self): + >>> super().__init__() + >>> self.conv1 = nn.Conv2d(3, 8, 3) + >>> self.conv2 = nn.Conv2d(8, 256, 3) + >>> self.conv3 = nn.Conv2d(256, 8, 3) + >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + >>> self.flatten = nn.Flatten() + >>> self.fc = nn.Linear(8, 1) + + >>> def forward(self, x): + >>> x = self.conv1(x) + >>> x = self.conv2(x) + >>> x = self.conv3(x) + >>> x = self.avg_pool(x) + >>> x = self.flatten(x) + >>> x = self.fc(x) + >>> return x + + >>> model = ExampleModel() + >>> x = (3, 16, 16) + to print the complexity information state for each layer, you can use + >>> get_model_complexity_info(model, x) + or directly use + >>> print_model_with_flops(model, 4579784.0, 37361) + ExampleModel( + 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, + (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 + (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) + (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) + (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) + (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) + (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) + ) + """ + + def accumulate_params(self): + if is_supported_instance(self): + return self.__params__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_params() + return sum + + def accumulate_flops(self): + if is_supported_instance(self): + return self.__flops__ / model.__batch_counter__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_flops() + return sum + + def flops_repr(self): + accumulated_num_params = self.accumulate_params() + accumulated_flops_cost = self.accumulate_flops() + return ', '.join([ + params_to_string( + accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string( + accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr() + ]) + + def add_extra_repr(m): + m.accumulate_flops = accumulate_flops.__get__(m) + m.accumulate_params = accumulate_params.__get__(m) + flops_extra_repr = flops_repr.__get__(m) + if m.extra_repr != flops_extra_repr: + m.original_extra_repr = m.extra_repr + m.extra_repr = flops_extra_repr + assert m.extra_repr != m.original_extra_repr + + def del_extra_repr(m): + if hasattr(m, 'original_extra_repr'): + m.extra_repr = m.original_extra_repr + del m.original_extra_repr + if hasattr(m, 'accumulate_flops'): + del m.accumulate_flops + + model.apply(add_extra_repr) + print(model, file=ost, flush=flush) + model.apply(del_extra_repr) + + +def get_model_parameters_number(model): + """Calculate parameter number of a model. + + Args: + model (nn.module): The model for parameter number calculation. + + Returns: + float: Parameter number of the model. + """ + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + return num_params + + +def add_flops_counting_methods(net_main_module): + # adding additional methods to the existing module object, + # this is done this way so that each function has access to self object + net_main_module.start_flops_count = start_flops_count.__get__( + net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__( + net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__( + net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 + net_main_module) + + net_main_module.reset_flops_count() + + return net_main_module + + +def compute_average_flops_cost(self): + """Compute average FLOPs cost. + + A method to compute average FLOPs cost, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + + Returns: + float: Current mean flops consumption per image. + """ + batches_count = self.__batch_counter__ + flops_sum = 0 + for module in self.modules(): + if is_supported_instance(module): + flops_sum += module.__flops__ + params_sum = get_model_parameters_number(self) + return flops_sum / batches_count, params_sum + + +def start_flops_count(self): + """Activate the computation of mean flops consumption per image. + + A method to activate the computation of mean flops consumption per image. + which will be available after ``add_flops_counting_methods()`` is called on + a desired net object. It should be called before running the network. + """ + add_batch_counter_hook_function(self) + + def add_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + return + + else: + handle = module.register_forward_hook( + get_modules_mapping()[type(module)]) + + module.__flops_handle__ = handle + + self.apply(partial(add_flops_counter_hook_function)) + + +def stop_flops_count(self): + """Stop computing the mean flops consumption per image. + + A method to stop computing the mean flops consumption per image, which will + be available after ``add_flops_counting_methods()`` is called on a desired + net object. It can be called to pause the computation whenever. + """ + remove_batch_counter_hook_function(self) + self.apply(remove_flops_counter_hook_function) + + +def reset_flops_count(self): + """Reset statistics computed so far. + + A method to Reset computed statistics, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + """ + add_batch_counter_variables_or_reset(self) + self.apply(add_flops_counter_variable_or_reset) + + +# ---- Internal functions +def empty_flops_counter_hook(module, input, output): + module.__flops__ += 0 + + +def upsample_flops_counter_hook(module, input, output): + output_size = output[0] + batch_size = output_size.shape[0] + output_elements_count = batch_size + for val in output_size.shape[1:]: + output_elements_count *= val + module.__flops__ += int(output_elements_count) + + +def relu_flops_counter_hook(module, input, output): + active_elements_count = output.numel() + module.__flops__ += int(active_elements_count) + + +def linear_flops_counter_hook(module, input, output): + input = input[0] + output_last_dim = output.shape[ + -1] # pytorch checks dimensions, so here we don't care much + module.__flops__ += int(np.prod(input.shape) * output_last_dim) + + +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) + + +def norm_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) + if (getattr(module, 'affine', False) + or getattr(module, 'elementwise_affine', False)): + batch_flops *= 2 + module.__flops__ += int(batch_flops) + + +def deconv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] + + kernel_height, kernel_width = conv_module.kernel_size + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = ( + kernel_height * kernel_width * in_channels * filters_per_channel) + + active_elements_count = batch_size * input_height * input_width + overall_conv_flops = conv_per_position_flops * active_elements_count + bias_flops = 0 + if conv_module.bias is not None: + output_height, output_width = output.shape[2:] + bias_flops = out_channels * batch_size * output_height * output_height + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def conv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(conv_module.kernel_size) + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = int( + np.prod(kernel_dims)) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + + bias_flops = 0 + + if conv_module.bias is not None: + + bias_flops = out_channels * active_elements_count + + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def batch_counter_hook(module, input, output): + batch_size = 1 + if len(input) > 0: + # Can have multiple inputs, getting the first one + input = input[0] + batch_size = len(input) + else: + pass + print('Warning! No positional inputs found for a module, ' + 'assuming batch size is 1.') + module.__batch_counter__ += batch_size + + +def add_batch_counter_variables_or_reset(module): + + module.__batch_counter__ = 0 + + +def add_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + return + + handle = module.register_forward_hook(batch_counter_hook) + module.__batch_counter_handle__ = handle + + +def remove_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + module.__batch_counter_handle__.remove() + del module.__batch_counter_handle__ + + +def add_flops_counter_variable_or_reset(module): + if is_supported_instance(module): + if hasattr(module, '__flops__') or hasattr(module, '__params__'): + print('Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + + ' ptflops can affect your code!') + module.__flops__ = 0 + module.__params__ = get_model_parameters_number(module) + + +def is_supported_instance(module): + if type(module) in get_modules_mapping(): + return True + return False + + +def remove_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + module.__flops_handle__.remove() + del module.__flops_handle__ + + +def get_modules_mapping(): + return { + # convolutions + nn.Conv1d: conv_flops_counter_hook, + nn.Conv2d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, + nn.Conv3d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, + # activations + nn.ReLU: relu_flops_counter_hook, + nn.PReLU: relu_flops_counter_hook, + nn.ELU: relu_flops_counter_hook, + nn.LeakyReLU: relu_flops_counter_hook, + nn.ReLU6: relu_flops_counter_hook, + # poolings + nn.MaxPool1d: pool_flops_counter_hook, + nn.AvgPool1d: pool_flops_counter_hook, + nn.AvgPool2d: pool_flops_counter_hook, + nn.MaxPool2d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, + nn.MaxPool3d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, + nn.AvgPool3d: pool_flops_counter_hook, + nn.AdaptiveMaxPool1d: pool_flops_counter_hook, + nn.AdaptiveAvgPool1d: pool_flops_counter_hook, + nn.AdaptiveMaxPool2d: pool_flops_counter_hook, + nn.AdaptiveAvgPool2d: pool_flops_counter_hook, + nn.AdaptiveMaxPool3d: pool_flops_counter_hook, + nn.AdaptiveAvgPool3d: pool_flops_counter_hook, + # normalizations + nn.BatchNorm1d: norm_flops_counter_hook, + nn.BatchNorm2d: norm_flops_counter_hook, + nn.BatchNorm3d: norm_flops_counter_hook, + nn.GroupNorm: norm_flops_counter_hook, + nn.InstanceNorm1d: norm_flops_counter_hook, + nn.InstanceNorm2d: norm_flops_counter_hook, + nn.InstanceNorm3d: norm_flops_counter_hook, + nn.LayerNorm: norm_flops_counter_hook, + # FC + nn.Linear: linear_flops_counter_hook, + mmcv.cnn.bricks.Linear: linear_flops_counter_hook, + # Upscale + nn.Upsample: upsample_flops_counter_hook, + # Deconvolution + nn.ConvTranspose2d: deconv_flops_counter_hook, + mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, + } diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/fuse_conv_bn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/fuse_conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..cb7076f80bf37f7931185bf0293ffcc1ce19c8ef --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/fuse_conv_bn.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/sync_bn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/sync_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..c0dbcb1b167ea0df690c0f47fe0217a3454b5d59 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/sync_bn.py @@ -0,0 +1,59 @@ +import torch + +import annotator.mmpkg.mmcv as mmcv + + +class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + """A general BatchNorm layer without input dimension check. + + Reproduced from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + is `_check_input_dim` that is designed for tensor sanity checks. + The check has been bypassed in this class for the convenience of converting + SyncBatchNorm. + """ + + def _check_input_dim(self, input): + return + + +def revert_sync_batchnorm(module): + """Helper function to convert all `SyncBatchNorm` (SyncBN) and + `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to + `BatchNormXd` layers. + + Adapted from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + + Args: + module (nn.Module): The module containing `SyncBatchNorm` layers. + + Returns: + module_output: The converted module with `BatchNormXd` layers. + """ + module_output = module + module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] + if hasattr(mmcv, 'ops'): + module_checklist.append(mmcv.ops.SyncBatchNorm) + if isinstance(module, tuple(module_checklist)): + module_output = _BatchNormXd(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + # no_grad() may not be needed here but + # just to be consistent with `convert_sync_batchnorm()` + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + module_output.training = module.training + # qconfig exists in quantized models + if hasattr(module, 'qconfig'): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/weight_init.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..096d0ddcccbec84675f0771cb546d0fa003417e7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,684 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import math +import warnings + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from annotator.mmpkg.mmcv.utils import Registry, build_from_cfg, get_logger, print_log + +INITIALIZERS = Registry('initializer') + + +def update_init_info(module, init_info): + """Update the `_params_init_info` in the module if the value of parameters + are changed. + + Args: + module (obj:`nn.Module`): The module of PyTorch with a user-defined + attribute `_params_init_info` which records the initialization + information. + init_info (str): The string that describes the initialization. + """ + assert hasattr( + module, + '_params_init_info'), f'Can not find `_params_init_info` in {module}' + for name, param in module.named_parameters(): + + assert param in module._params_init_info, ( + f'Find a new :obj:`Parameter` ' + f'named `{name}` during executing the ' + f'`init_weights` of ' + f'`{module.__class__.__name__}`. ' + f'Please do not add or ' + f'replace parameters during executing ' + f'the `init_weights`. ') + + # The parameter has been changed during executing the + # `init_weights` of module + mean_value = param.data.mean() + if module._params_init_info[param]['tmp_mean_value'] != mean_value: + module._params_init_info[param]['init_info'] = init_info + module._params_init_info[param]['tmp_mean_value'] = mean_value + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def trunc_normal_init(module: nn.Module, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + bias: float = 0) -> None: + if hasattr(module, 'weight') and module.weight is not None: + trunc_normal_(module.weight, mean, std, a, b) # type: ignore + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) # type: ignore + + +def uniform_init(module, a=0, b=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init( + module, + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + bias=bias, + distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to a given probability value.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +def _get_bases_name(m): + return [b.__name__ for b in m.__class__.__bases__] + + +class BaseInit(object): + + def __init__(self, *, bias=0, bias_prob=None, layer=None): + self.wholemodule = False + if not isinstance(bias, (int, float)): + raise TypeError(f'bias must be a number, but got a {type(bias)}') + + if bias_prob is not None: + if not isinstance(bias_prob, float): + raise TypeError(f'bias_prob type must be float, \ + but got {type(bias_prob)}') + + if layer is not None: + if not isinstance(layer, (str, list)): + raise TypeError(f'layer must be a str or a list of str, \ + but got a {type(layer)}') + else: + layer = [] + + if bias_prob is not None: + self.bias = bias_init_with_prob(bias_prob) + else: + self.bias = bias + self.layer = [layer] if isinstance(layer, str) else layer + + def _get_init_info(self): + info = f'{self.__class__.__name__}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Constant') +class ConstantInit(BaseInit): + """Initialize module parameters with constant values. + + Args: + val (int | float): the value to fill the weights in the module with + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, val, **kwargs): + super().__init__(**kwargs) + self.val = val + + def __call__(self, module): + + def init(m): + if self.wholemodule: + constant_init(m, self.val, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + constant_init(m, self.val, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Xavier') +class XavierInit(BaseInit): + r"""Initialize module parameters with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks - Glorot, X. & Bengio, Y. (2010). + `_ + + Args: + gain (int | float): an optional scaling factor. Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` + or ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, gain=1, distribution='normal', **kwargs): + super().__init__(**kwargs) + self.gain = gain + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + xavier_init(m, self.gain, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + xavier_init(m, self.gain, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: gain={self.gain}, ' \ + f'distribution={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Normal') +class NormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + Args: + mean (int | float):the mean of the normal distribution. Defaults to 0. + std (int | float): the standard deviation of the normal distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean=0, std=1, **kwargs): + super().__init__(**kwargs) + self.mean = mean + self.std = std + + def __call__(self, module): + + def init(m): + if self.wholemodule: + normal_init(m, self.mean, self.std, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + normal_init(m, self.mean, self.std, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: mean={self.mean},' \ + f' std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='TruncNormal') +class TruncNormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values + outside :math:`[a, b]`. + + Args: + mean (float): the mean of the normal distribution. Defaults to 0. + std (float): the standard deviation of the normal distribution. + Defaults to 1. + a (float): The minimum cutoff value. + b ( float): The maximum cutoff value. + bias (float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + **kwargs) -> None: + super().__init__(**kwargs) + self.mean = mean + self.std = std + self.a = a + self.b = b + + def __call__(self, module: nn.Module) -> None: + + def init(m): + if self.wholemodule: + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ + f' mean={self.mean}, std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Uniform') +class UniformInit(BaseInit): + r"""Initialize module parameters with values drawn from the uniform + distribution :math:`\mathcal{U}(a, b)`. + + Args: + a (int | float): the lower bound of the uniform distribution. + Defaults to 0. + b (int | float): the upper bound of the uniform distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, b=1, **kwargs): + super().__init__(**kwargs) + self.a = a + self.b = b + + def __call__(self, module): + + def init(m): + if self.wholemodule: + uniform_init(m, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + uniform_init(m, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a},' \ + f' b={self.b}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Kaiming') +class KaimingInit(BaseInit): + r"""Initialize module parameters with the values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015). + `_ + + Args: + a (int | float): the negative slope of the rectifier used after this + layer (only used with ``'leaky_relu'``). Defaults to 0. + mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing + ``'fan_in'`` preserves the magnitude of the variance of the weights + in the forward pass. Choosing ``'fan_out'`` preserves the + magnitudes in the backwards pass. Defaults to ``'fan_out'``. + nonlinearity (str): the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` . + Defaults to 'relu'. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` or + ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, + a=0, + mode='fan_out', + nonlinearity='relu', + distribution='normal', + **kwargs): + super().__init__(**kwargs) + self.a = a + self.mode = mode + self.nonlinearity = nonlinearity + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ + f'nonlinearity={self.nonlinearity}, ' \ + f'distribution ={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Caffe2Xavier') +class Caffe2XavierInit(KaimingInit): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + def __init__(self, **kwargs): + super().__init__( + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform', + **kwargs) + + def __call__(self, module): + super().__call__(module) + + +@INITIALIZERS.register_module(name='Pretrained') +class PretrainedInit(object): + """Initialize module by loading a pretrained model. + + Args: + checkpoint (str): the checkpoint file of the pretrained model should + be load. + prefix (str, optional): the prefix of a sub-module in the pretrained + model. it is for loading a part of the pretrained model to + initialize. For example, if we would like to only load the + backbone of a detector model, we can set ``prefix='backbone.'``. + Defaults to None. + map_location (str): map tensors into proper locations. + """ + + def __init__(self, checkpoint, prefix=None, map_location=None): + self.checkpoint = checkpoint + self.prefix = prefix + self.map_location = map_location + + def __call__(self, module): + from annotator.mmpkg.mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, + load_state_dict) + logger = get_logger('mmcv') + if self.prefix is None: + print_log(f'load model from: {self.checkpoint}', logger=logger) + load_checkpoint( + module, + self.checkpoint, + map_location=self.map_location, + strict=False, + logger=logger) + else: + print_log( + f'load {self.prefix} in model from: {self.checkpoint}', + logger=logger) + state_dict = _load_checkpoint_with_prefix( + self.prefix, self.checkpoint, map_location=self.map_location) + load_state_dict(module, state_dict, strict=False, logger=logger) + + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: load from {self.checkpoint}' + return info + + +def _initialize(module, cfg, wholemodule=False): + func = build_from_cfg(cfg, INITIALIZERS) + # wholemodule flag is for override mode, there is no layer key in override + # and initializer will give init values for the whole module with the name + # in override. + func.wholemodule = wholemodule + func(module) + + +def _initialize_override(module, override, cfg): + if not isinstance(override, (dict, list)): + raise TypeError(f'override must be a dict or a list of dict, \ + but got {type(override)}') + + override = [override] if isinstance(override, dict) else override + + for override_ in override: + + cp_override = copy.deepcopy(override_) + name = cp_override.pop('name', None) + if name is None: + raise ValueError('`override` must contain the key "name",' + f'but got {cp_override}') + # if override only has name key, it means use args in init_cfg + if not cp_override: + cp_override.update(cfg) + # if override has name key and other args except type key, it will + # raise error + elif 'type' not in cp_override.keys(): + raise ValueError( + f'`override` need "type" key, but got {cp_override}') + + if hasattr(module, name): + _initialize(getattr(module, name), cp_override, wholemodule=True) + else: + raise RuntimeError(f'module did not have attribute {name}, ' + f'but init_cfg is {cp_override}.') + + +def initialize(module, init_cfg): + """Initialize a module. + + Args: + module (``torch.nn.Module``): the module will be initialized. + init_cfg (dict | list[dict]): initialization configuration dict to + define initializer. OpenMMLab has implemented 6 initializers + including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, + ``Kaiming``, and ``Pretrained``. + Example: + >>> module = nn.Linear(2, 3, bias=True) + >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) + >>> initialize(module, init_cfg) + + >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + >>> # define key ``'layer'`` for initializing layer with different + >>> # configuration + >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Linear', val=2)] + >>> initialize(module, init_cfg) + + >>> # define key``'override'`` to initialize some specific part in + >>> # module + >>> class FooNet(nn.Module): + >>> def __init__(self): + >>> super().__init__() + >>> self.feat = nn.Conv2d(3, 16, 3) + >>> self.reg = nn.Conv2d(16, 10, 3) + >>> self.cls = nn.Conv2d(16, 5, 3) + >>> model = FooNet() + >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', + >>> override=dict(type='Constant', name='reg', val=3, bias=4)) + >>> initialize(model, init_cfg) + + >>> model = ResNet(depth=50) + >>> # Initialize weights with the pretrained model. + >>> init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + >>> initialize(model, init_cfg) + + >>> # Initialize weights of a sub-module with the specific part of + >>> # a pretrained model by using "prefix". + >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + >>> 'retinanet_r50_fpn_1x_coco/'\ + >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + >>> init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + """ + if not isinstance(init_cfg, (dict, list)): + raise TypeError(f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}') + + if isinstance(init_cfg, dict): + init_cfg = [init_cfg] + + for cfg in init_cfg: + # should deeply copy the original config because cfg may be used by + # other modules, e.g., one init_cfg shared by multiple bottleneck + # blocks, the expected cfg will be changed after pop and will change + # the initialization behavior of other modules + cp_cfg = copy.deepcopy(cfg) + override = cp_cfg.pop('override', None) + _initialize(module, cp_cfg) + + if override is not None: + cp_cfg.pop('layer', None) + _initialize_override(module, override, cp_cfg) + else: + # All attributes in module have same initialization. + pass + + +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, + b: float) -> Tensor: + # Method based on + # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + # Modified from + # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower = norm_cdf((a - mean) / std) + upper = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [lower, upper], then translate + # to [2lower-1, 2upper-1]. + tensor.uniform_(2 * lower - 1, 2 * upper - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor: Tensor, + mean: float = 0., + std: float = 1., + a: float = -2., + b: float = 2.) -> Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + Modified from + https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. + mean (float): the mean of the normal distribution. + std (float): the standard deviation of the normal distribution. + a (float): the minimum cutoff value. + b (float): the maximum cutoff value. + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/vgg.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..8778b649561a45a9652b1a15a26c2d171e58f3e1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/cnn/vgg.py @@ -0,0 +1,175 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + +from .utils import constant_init, kaiming_init, normal_init + + +def conv3x3(in_planes, out_planes, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + padding=dilation, + dilation=dilation) + + +def make_vgg_layer(inplanes, + planes, + num_blocks, + dilation=1, + with_bn=False, + ceil_mode=False): + layers = [] + for _ in range(num_blocks): + layers.append(conv3x3(inplanes, planes, dilation)) + if with_bn: + layers.append(nn.BatchNorm2d(planes)) + layers.append(nn.ReLU(inplace=True)) + inplanes = planes + layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) + + return layers + + +class VGG(nn.Module): + """VGG backbone. + + Args: + depth (int): Depth of vgg, from {11, 13, 16, 19}. + with_bn (bool): Use BatchNorm or not. + num_classes (int): number of classes for classification. + num_stages (int): VGG stages, normally 5. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + """ + + arch_settings = { + 11: (1, 1, 2, 2, 2), + 13: (2, 2, 2, 2, 2), + 16: (2, 2, 3, 3, 3), + 19: (2, 2, 4, 4, 4) + } + + def __init__(self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True): + super(VGG, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for vgg') + assert num_stages >= 1 and num_stages <= 5 + stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + assert len(dilations) == num_stages + assert max(out_indices) <= num_stages + + self.num_classes = num_classes + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + + self.inplanes = 3 + start_idx = 0 + vgg_layers = [] + self.range_sub_modules = [] + for i, num_blocks in enumerate(self.stage_blocks): + num_modules = num_blocks * (2 + with_bn) + 1 + end_idx = start_idx + num_modules + dilation = dilations[i] + planes = 64 * 2**i if i < 4 else 512 + vgg_layer = make_vgg_layer( + self.inplanes, + planes, + num_blocks, + dilation=dilation, + with_bn=with_bn, + ceil_mode=ceil_mode) + vgg_layers.extend(vgg_layer) + self.inplanes = planes + self.range_sub_modules.append([start_idx, end_idx]) + start_idx = end_idx + if not with_last_pool: + vgg_layers.pop(-1) + self.range_sub_modules[-1][1] -= 1 + self.module_name = 'features' + self.add_module(self.module_name, nn.Sequential(*vgg_layers)) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, std=0.01) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + vgg_layers = getattr(self, self.module_name) + for i in range(len(self.stage_blocks)): + for j in range(*self.range_sub_modules[i]): + vgg_layer = vgg_layers[j] + x = vgg_layer(x) + if i in self.out_indices: + outs.append(x) + if self.num_classes > 0: + x = x.view(x.size(0), -1) + x = self.classifier(x) + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(VGG, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + vgg_layers = getattr(self, self.module_name) + if mode and self.frozen_stages >= 0: + for i in range(self.frozen_stages): + for j in range(*self.range_sub_modules[i]): + mod = vgg_layers[j] + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3193b7f664e19ce2458d81c836597fa22e4bb082 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, + single_gpu_test) + +__all__ = [ + 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', + 'single_gpu_test' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/test.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/test.py new file mode 100644 index 0000000000000000000000000000000000000000..ad5f55c4b181f7ad7bf17ed9003496f7377bbd3e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/engine/test.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import pickle +import shutil +import tempfile +import time + +import torch +import torch.distributed as dist + +import annotator.mmpkg.mmcv as mmcv +from annotator.mmpkg.mmcv.runner import get_dist_info + + +def single_gpu_test(model, data_loader): + """Test model with a single gpu. + + This method tests model with a single gpu and displays test progress bar. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + # Assume result has the same length of batch_size + # refer to https://github.com/open-mmlab/mmcv/issues/985 + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting + ``gpu_collect=True``, it encodes results to gpu tensors and use gpu + communication for results collection. On cpu mode it saves the results on + different gpus to ``tmpdir`` and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + time.sleep(2) # This line can prevent deadlock problem in some cases. + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + if rank == 0: + batch_size = len(result) + batch_size_all = batch_size * world_size + if batch_size_all + prog_bar.completed > len(dataset): + batch_size_all = len(dataset) - prog_bar.completed + for _ in range(batch_size_all): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results under cpu mode. + + On cpu mode, this function will save the results on different gpus to + ``tmpdir`` and collect them by the rank 0 worker. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + tmpdir (str | None): temporal directory for collected results to + store. If set to None, it will create a random temporal directory + for it. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + mmcv.mkdir_or_exist('.dist_test') + tmpdir = tempfile.mkdtemp(dir='.dist_test') + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, f'part_{i}.pkl') + part_result = mmcv.load(part_file) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results under gpu mode. + + On gpu mode, this function will encode results to gpu tensors and use gpu + communication for results collection. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2051b85f7e59bff7bdbaa131849ce8cd31f059a4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', + 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', + 'list_from_file', 'dict_from_file' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/file_client.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/file_client.py new file mode 100644 index 0000000000000000000000000000000000000000..1ed2bf5f41a29000f9a080066497d8f3674fae15 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/file_client.py @@ -0,0 +1,1148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os +import os.path as osp +import re +import tempfile +import warnings +from abc import ABCMeta, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator, Optional, Tuple, Union +from urllib.request import urlopen + +import annotator.mmpkg.mmcv as mmcv +from annotator.mmpkg.mmcv.utils.misc import has_method +from annotator.mmpkg.mmcv.utils.path import is_filepath + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + # a flag to indicate whether the backend can create a symlink for a file + _allow_symlink = False + + @property + def name(self): + return self.__class__.__name__ + + @property + def allow_symlink(self): + return self._allow_symlink + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + PetrelBackend supports reading and writing data to multiple clusters. + If the file path contains the cluster name, PetrelBackend will read data + from specified cluster or write data to it. Otherwise, PetrelBackend will + access the default cluster. + + Args: + path_mapping (dict, optional): Path mapping dict from local path to + Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in + ``filepath`` will be replaced by ``dst``. Default: None. + enable_mc (bool, optional): Whether to enable memcached support. + Default: True. + + Examples: + >>> filepath1 = 's3://path/of/file' + >>> filepath2 = 'cluster-name:s3://path/of/file' + >>> client = PetrelBackend() + >>> client.get(filepath1) # get data from default cluster + >>> client.get(filepath2) # get data from 'cluster-name' cluster + """ + + def __init__(self, + path_mapping: Optional[dict] = None, + enable_mc: bool = True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' + 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def _map_path(self, filepath: Union[str, Path]) -> str: + """Map ``filepath`` to a string path whose prefix will be replaced by + :attr:`self.path_mapping`. + + Args: + filepath (str): Path to be mapped. + """ + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + return filepath + + def _format_path(self, filepath: str) -> str: + """Convert a ``filepath`` to standard format of petrel oss. + + If the ``filepath`` is concatenated by ``os.path.join``, in a Windows + environment, the ``filepath`` will be the format of + 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the + above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. + + Args: + filepath (str): Path to be formatted. + """ + return re.sub(r'\\+', '/', filepath) + + def get(self, filepath: Union[str, Path]) -> memoryview: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + memoryview: A memory view of expected bytes object to avoid + copying. The memoryview object can be converted to bytes by + ``value_buf.tobytes()``. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return str(self.get(filepath), encoding=encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Save data to a given ``filepath``. + + Args: + obj (bytes): Data to be saved. + filepath (str or Path): Path to write data. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.put(filepath, obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Save data to a given ``filepath``. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to encode the ``obj``. + Default: 'utf-8'. + """ + self.put(bytes(obj, encoding=encoding), filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + if not has_method(self._client, 'delete'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.delete(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + if not (has_method(self._client, 'contains') + and has_method(self._client, 'isdir')): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) or self._client.isdir(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + if not has_method(self._client, 'isdir'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + if not has_method(self._client, 'contains'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result after concatenation. + """ + filepath = self._format_path(self._map_path(filepath)) + if filepath.endswith('/'): + filepath = filepath[:-1] + formatted_paths = [filepath] + for path in filepaths: + formatted_paths.append(self._format_path(self._map_path(path))) + return '/'.join(formatted_paths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download a file from ``filepath`` and return a temporary path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str | Path): Download a file from ``filepath``. + + Examples: + >>> client = PetrelBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('s3://path/of/your/file') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one temporary path. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + assert self.isfile(filepath) + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + Petrel has no concept of directories but it simulates the directory + hierarchy in the filesystem through public prefixes. In addition, + if the returned path ends with '/', it means the path is a public + prefix which is a logical directory. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + In addition, the returned path of directory will not contains the + suffix '/' which is consistent with other backends. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if not has_method(self._client, 'list'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.')) + + dir_path = self._map_path(dir_path) + dir_path = self._format_path(dir_path) + if list_dir and suffix is not None: + raise TypeError( + '`list_dir` should be False when `suffix` is not None') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + # Petrel's simulated directory hierarchy assumes that directory paths + # should end with `/` + if not dir_path.endswith('/'): + dir_path += '/' + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for path in self._client.list(dir_path): + # the `self.isdir` is not used here to determine whether path + # is a directory, because `self.isdir` relies on + # `self._client.list` + if path.endswith('/'): # a directory path + next_dir_path = self.join_path(dir_path, path) + if list_dir: + # get the relative path and exclude the last + # character '/' + rel_dir = next_dir_path[len(root):-1] + yield rel_dir + if recursive: + yield from _list_dir_or_file(next_dir_path, list_dir, + list_file, suffix, + recursive) + else: # a file path + absolute_path = self.join_path(dir_path, path) + rel_path = absolute_path[len(root):] + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError( + 'Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, + self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, + db_path, + readonly=True, + lock=False, + readahead=False, + **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open( + self.db_path, + readonly=readonly, + lock=lock, + readahead=readahead, + **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + _allow_symlink = True + + def get(self, filepath: Union[str, Path]) -> bytes: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes: Expected bytes object. + """ + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + with open(filepath, 'r', encoding=encoding) as f: + value_buf = f.read() + return value_buf + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` will create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'wb') as f: + f.write(obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` will create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'w', encoding=encoding) as f: + f.write(obj) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + os.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return osp.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return osp.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return osp.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return osp.join(filepath, *filepaths) + + @contextmanager + def get_local_path( + self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + """Only for unified API and do nothing.""" + yield filepath + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if list_dir and suffix is not None: + raise TypeError('`suffix` should be None when `list_dir` is True') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + elif osp.isdir(entry.path): + if list_dir: + rel_dir = osp.relpath(entry.path, root) + yield rel_dir + if recursive: + yield from _list_dir_or_file(entry.path, list_dir, + list_file, suffix, + recursive) + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class HTTPBackend(BaseStorageBackend): + """HTTP and HTTPS storage bachend.""" + + def get(self, filepath): + value_buf = urlopen(filepath).read() + return value_buf + + def get_text(self, filepath, encoding='utf-8'): + value_buf = urlopen(filepath).read() + return value_buf.decode(encoding) + + @contextmanager + def get_local_path(self, filepath: str) -> Iterable[str]: + """Download a file from ``filepath``. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str): Download a file from ``filepath``. + + Examples: + >>> client = HTTPBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('http://path/of/your/file') as path: + ... # do something here + """ + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + +class FileClient: + """A general file client to access files in different backends. + + The client loads a file or text in a specified backend from its path + and returns it as a binary or text file. There are two ways to choose a + backend, the name of backend and the prefix of path. Although both of them + can be used to choose a storage backend, ``backend`` has a higher priority + that is if they are all set, the storage backend will be chosen by the + backend argument. If they are all `None`, the disk backend will be chosen. + Note that It can also register other backend accessor with a given name, + prefixes, and backend class. In addition, We use the singleton pattern to + avoid repeated object creation. If the arguments are the same, the same + object will be returned. + + Args: + backend (str, optional): The storage backend type. Options are "disk", + "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. + prefix (str, optional): The prefix of the registered storage backend. + Options are "s3", "http", "https". Default: None. + + Examples: + >>> # only set backend + >>> file_client = FileClient(backend='petrel') + >>> # only set prefix + >>> file_client = FileClient(prefix='s3') + >>> # set both backend and prefix but use backend to choose client + >>> file_client = FileClient(backend='petrel', prefix='s3') + >>> # if the arguments are the same, the same object is returned + >>> file_client1 = FileClient(backend='petrel') + >>> file_client1 is file_client + True + + Attributes: + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + 'http': HTTPBackend, + } + # This collection is used to record the overridden backends, and when a + # backend appears in the collection, the singleton pattern is disabled for + # that backend, because if the singleton pattern is used, then the object + # returned will be the backend before overwriting + _overridden_backends = set() + _prefix_to_backends = { + 's3': PetrelBackend, + 'http': HTTPBackend, + 'https': HTTPBackend, + } + _overridden_prefixes = set() + + _instances = {} + + def __new__(cls, backend=None, prefix=None, **kwargs): + if backend is None and prefix is None: + backend = 'disk' + if backend is not None and backend not in cls._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' + f' are {list(cls._backends.keys())}') + if prefix is not None and prefix not in cls._prefix_to_backends: + raise ValueError( + f'prefix {prefix} is not supported. Currently supported ones ' + f'are {list(cls._prefix_to_backends.keys())}') + + # concatenate the arguments to a unique key for determining whether + # objects with the same arguments were created + arg_key = f'{backend}:{prefix}' + for key, value in kwargs.items(): + arg_key += f':{key}:{value}' + + # if a backend was overridden, it will create a new object + if (arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes): + _instance = cls._instances[arg_key] + else: + # create a new object and put it to _instance + _instance = super().__new__(cls) + if backend is not None: + _instance.client = cls._backends[backend](**kwargs) + else: + _instance.client = cls._prefix_to_backends[prefix](**kwargs) + + cls._instances[arg_key] = _instance + + return _instance + + @property + def name(self): + return self.client.name + + @property + def allow_symlink(self): + return self.client.allow_symlink + + @staticmethod + def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: + """Parse the prefix of a uri. + + Args: + uri (str | Path): Uri to be parsed that contains the file prefix. + + Examples: + >>> FileClient.parse_uri_prefix('s3://path/of/your/file') + 's3' + + Returns: + str | None: Return the prefix of uri if the uri contains '://' + else ``None``. + """ + assert is_filepath(uri) + uri = str(uri) + if '://' not in uri: + return None + else: + prefix, _ = uri.split('://') + # In the case of PetrelBackend, the prefix may contains the cluster + # name like clusterName:s3 + if ':' in prefix: + _, prefix = prefix.split(':') + return prefix + + @classmethod + def infer_client(cls, + file_client_args: Optional[dict] = None, + uri: Optional[Union[str, Path]] = None) -> 'FileClient': + """Infer a suitable file client based on the URI and arguments. + + Args: + file_client_args (dict, optional): Arguments to instantiate a + FileClient. Default: None. + uri (str | Path, optional): Uri to be parsed that contains the file + prefix. Default: None. + + Examples: + >>> uri = 's3://path/of/your/file' + >>> file_client = FileClient.infer_client(uri=uri) + >>> file_client_args = {'backend': 'petrel'} + >>> file_client = FileClient.infer_client(file_client_args) + + Returns: + FileClient: Instantiated FileClient object. + """ + assert file_client_args is not None or uri is not None + if file_client_args is None: + file_prefix = cls.parse_uri_prefix(uri) # type: ignore + return cls(prefix=file_prefix) + else: + return cls(**file_client_args) + + @classmethod + def _register_backend(cls, name, backend, force=False, prefixes=None): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' + f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError( + f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError( + f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' + 'add "force=True" if you want to override it') + + if name in cls._backends and force: + cls._overridden_backends.add(name) + cls._backends[name] = backend + + if prefixes is not None: + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if prefix not in cls._prefix_to_backends: + cls._prefix_to_backends[prefix] = backend + elif (prefix in cls._prefix_to_backends) and force: + cls._overridden_prefixes.add(prefix) + cls._prefix_to_backends[prefix] = backend + else: + raise KeyError( + f'{prefix} is already registered as a storage backend,' + ' add "force=True" if you want to override it') + + @classmethod + def register_backend(cls, name, backend=None, force=False, prefixes=None): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + prefixes (str or list[str] or tuple[str], optional): The prefixes + of the registered storage backend. Default: None. + `New in version 1.3.15.` + """ + if backend is not None: + cls._register_backend( + name, backend, force=force, prefixes=prefixes) + return + + def _register(backend_cls): + cls._register_backend( + name, backend_cls, force=force, prefixes=prefixes) + return backend_cls + + return _register + + def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: + """Read data from a given ``filepath`` with 'rb' mode. + + Note: + There are two types of return values for ``get``, one is ``bytes`` + and the other is ``memoryview``. The advantage of using memoryview + is that you can avoid copying, and if you want to convert it to + ``bytes``, you can use ``.tobytes()``. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes | memoryview: Expected bytes object or a memory view of the + bytes object. + """ + return self.client.get(filepath) + + def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return self.client.get_text(filepath, encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` should create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + self.client.put(obj, filepath) + + def put_text(self, obj: str, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` should create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str, optional): The encoding format used to open the + `filepath`. Default: 'utf-8'. + """ + self.client.put_text(obj, filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str, Path): Path to be removed. + """ + self.client.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return self.client.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return self.client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return self.client.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return self.client.join_path(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download data from ``filepath`` and write the data to local path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Note: + If the ``filepath`` is a local path, just return itself. + + .. warning:: + ``get_local_path`` is an experimental interface that may change in + the future. + + Args: + filepath (str or Path): Path to be read data. + + Examples: + >>> file_client = FileClient(prefix='s3') + >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one path. + """ + with self.client.get_local_path(str(filepath)) as local_path: + yield local_path + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, + suffix, recursive) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa24d91972837b8756b225f4879bac20436eb72a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/base.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..288878bc57282fbb2f12b32290152ca8e9d3cab0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/base.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + # `str_like` is a flag to indicate whether the type of file object is + # str-like object or bytes-like object. Pickle only processes bytes-like + # objects but json only processes str-like object. If it is str-like + # object, `StringIO` will be used to process the buffer. + str_like = True + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/json_handler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..18d4f15f74139d20adff18b20be5529c592a66b6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/pickle_handler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..b37c79bed4ef9fd8913715e62dbe3fc5cafdc3aa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + str_like = False + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/yaml_handler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..c5aa2eea1e8c76f8baf753d1c8c959dee665e543 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/io.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/io.py new file mode 100644 index 0000000000000000000000000000000000000000..aaefde58aa3ea5b58f86249ce7e1c40c186eb8dd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/io.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from io import BytesIO, StringIO +from pathlib import Path + +from ..utils import is_list_of, is_str +from .file_client import FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler() +} + + +def load(file, file_format=None, file_client_args=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Note: + In v1.3.16 and later, ``load`` supports loading data from serialized + files those can be storaged in different backends. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> load('/path/of/your/file') # file is storaged in disk + >>> load('https://path/of/your/file') # file is storaged in Internet + >>> load('s3://path/of/your/file') # file is storaged in petrel + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO(file_client.get_text(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + else: + with BytesIO(file_client.get(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Note: + In v1.3.16 and later, ``dump`` supports dumping data as strings or to + files which is saved to different backends. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dumped to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dump('hello world', '/path/of/your/file') # disk + >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError( + 'file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put_text(f.getvalue(), file) + else: + with BytesIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put(f.getvalue(), file) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError( + f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/parse.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/parse.py new file mode 100644 index 0000000000000000000000000000000000000000..f60f0d611b8d75692221d0edd7dc993b0a6445c9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/fileio/parse.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from io import StringIO + +from .file_client import FileClient + + +def list_from_file(filename, + prefix='', + offset=0, + max_num=0, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a list of strings. + + Note: + In v1.3.16 and later, ``list_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a list for strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the beginning of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> list_from_file('/path/of/your/file') # disk + ['hello', 'world'] + >>> list_from_file('s3://path/of/your/file') # ceph or petrel + ['hello', 'world'] + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for _ in range(offset): + f.readline() + for line in f: + if 0 < max_num <= cnt: + break + item_list.append(prefix + line.rstrip('\n\r')) + cnt += 1 + return item_list + + +def dict_from_file(filename, + key_type=str, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns split by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Note: + In v1.3.16 and later, ``dict_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a dict. + + Args: + filename(str): Filename. + key_type(type): Type of the dict keys. str is user by default and + type conversion will be performed if specified. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dict_from_file('/path/of/your/file') # disk + {'key1': 'value1', 'key2': 'value2'} + >>> dict_from_file('s3://path/of/your/file') # ceph or petrel + {'key1': 'value1', 'key2': 'value2'} + + Returns: + dict: The parsed contents. + """ + mapping = {} + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0051d609d3de4e7562e3fe638335c66617c4d91 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, + gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, + rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) +from .geometric import (cutout, imcrop, imflip, imflip_, impad, + impad_to_multiple, imrescale, imresize, imresize_like, + imresize_to_multiple, imrotate, imshear, imtranslate, + rescale_size) +from .io import imfrombytes, imread, imwrite, supported_backends, use_backend +from .misc import tensor2imgs +from .photometric import (adjust_brightness, adjust_color, adjust_contrast, + adjust_lighting, adjust_sharpness, auto_contrast, + clahe, imdenormalize, imequalize, iminvert, + imnormalize, imnormalize_, lut_transform, posterize, + solarize) + +__all__ = [ + 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', + 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', + 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', + 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', + 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', + 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', + 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', + 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', + 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', + 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/colorspace.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/colorspace.py new file mode 100644 index 0000000000000000000000000000000000000000..814533952fdfda23d67cb6a3073692d8c1156add --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/colorspace.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + + +def imconvert(img, src, dst): + """Convert an image from the src colorspace to dst colorspace. + + Args: + img (ndarray): The input image. + src (str): The source colorspace, e.g., 'rgb', 'hsv'. + dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. + + Returns: + ndarray: The converted image. + """ + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + out_img = cv2.cvtColor(img, code) + return out_img + + +def bgr2gray(img, keepdim=False): + """Convert a BGR image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def rgb2gray(img, keepdim=False): + """Convert a RGB image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def gray2bgr(img): + """Convert a grayscale image to BGR image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted BGR image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return out_img + + +def gray2rgb(img): + """Convert a grayscale image to RGB image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted RGB image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + return out_img + + +def _convert_input_type_range(img): + """Convert the type and range of the input image. + + It converts the input image to np.float32 type and range of [0, 1]. + It is mainly used for pre-processing the input image in colorspace + conversion functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + (ndarray): The converted image with type of np.float32 and range of + [0, 1]. + """ + img_type = img.dtype + img = img.astype(np.float32) + if img_type == np.float32: + pass + elif img_type == np.uint8: + img /= 255. + else: + raise TypeError('The img type should be np.float32 or np.uint8, ' + f'but got {img_type}') + return img + + +def _convert_output_type_range(img, dst_type): + """Convert the type and range of the image according to dst_type. + + It converts the image to desired type and range. If `dst_type` is np.uint8, + images will be converted to np.uint8 type with range [0, 255]. If + `dst_type` is np.float32, it converts the image to np.float32 type with + range [0, 1]. + It is mainly used for post-processing images in colorspace conversion + functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The image to be converted with np.float32 type and + range [0, 255]. + dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it + converts the image to np.uint8 type with range [0, 255]. If + dst_type is np.float32, it converts the image to np.float32 type + with range [0, 1]. + + Returns: + (ndarray): The converted image with desired type and range. + """ + if dst_type not in (np.uint8, np.float32): + raise TypeError('The dst_type should be np.float32 or np.uint8, ' + f'but got {dst_type}') + if dst_type == np.uint8: + img = img.round() + else: + img /= 255. + return img.astype(dst_type) + + +def rgb2ycbcr(img, y_only=False): + """Convert a RGB image to YCbCr image. + + This function produces the same results as Matlab's `rgb2ycbcr` function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 + else: + out_img = np.matmul( + img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], + [24.966, 112.0, -18.214]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul( + img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], + [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2rgb(img): + """Convert a YCbCr image to RGB image. + + This function produces the same results as Matlab's ycbcr2rgb function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted RGB image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0, -0.00153632, 0.00791071], + [0.00625893, -0.00318811, 0]]) * 255.0 + [ + -222.921, 135.576, -276.836 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2bgr(img): + """Convert a YCbCr image to BGR image. + + The bgr version of ycbcr2rgb. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted BGR image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0.00791071, -0.00153632, 0], + [0, -0.00318811, 0.00625893]]) * 255.0 + [ + -276.836, 135.576, -222.921 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def convert_color_factory(src, dst): + + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + + def convert_color(img): + out_img = cv2.cvtColor(img, code) + return out_img + + convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} + image. + + Args: + img (ndarray or str): The input image. + + Returns: + ndarray: The converted {dst.upper()} image. + """ + + return convert_color + + +bgr2rgb = convert_color_factory('bgr', 'rgb') + +rgb2bgr = convert_color_factory('rgb', 'bgr') + +bgr2hsv = convert_color_factory('bgr', 'hsv') + +hsv2bgr = convert_color_factory('hsv', 'bgr') + +bgr2hls = convert_color_factory('bgr', 'hls') + +hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/geometric.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/geometric.py new file mode 100644 index 0000000000000000000000000000000000000000..cf97c201cb4e43796c911919d03fb26a07ed817d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/geometric.py @@ -0,0 +1,728 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers + +import cv2 +import numpy as np + +from ..utils import to_2tuple +from .io import imread_backend + +try: + from PIL import Image +except ImportError: + Image = None + + +def _scale_size(size, scale): + """Rescale a size by a ratio. + + Args: + size (tuple[int]): (w, h). + scale (float | tuple(float)): Scaling factor. + + Returns: + tuple[int]: scaled size. + """ + if isinstance(scale, (float, int)): + scale = (scale, scale) + w, h = size + return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) + + +cv2_interp_codes = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, + 'area': cv2.INTER_AREA, + 'lanczos': cv2.INTER_LANCZOS4 +} + +if Image is not None: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + +def imresize(img, + size, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image to a given size. + + Args: + img (ndarray): The input image. + size (tuple[int]): Target size (w, h). + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if backend is None: + backend = imread_backend + if backend not in ['cv2', 'pillow']: + raise ValueError(f'backend: {backend} is not supported for resize.' + f"Supported backends are 'cv2', 'pillow'") + + if backend == 'pillow': + assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' + pil_image = Image.fromarray(img) + pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) + resized_img = np.array(pil_image) + else: + resized_img = cv2.resize( + img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + if not return_scale: + return resized_img + else: + w_scale = size[0] / w + h_scale = size[1] / h + return resized_img, w_scale, h_scale + + +def imresize_to_multiple(img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image according to a given size or scale factor and then rounds + up the the resized or rescaled image size to the nearest value that can be + divided by the divisor. + + Args: + img (ndarray): The input image. + divisor (int | tuple): Resized image size will be a multiple of + divisor. If divisor is a tuple, divisor should be + (w_divisor, h_divisor). + size (None | int | tuple[int]): Target size (w, h). Default: None. + scale_factor (None | float | tuple[float]): Multiplier for spatial + size. Should match input size if it is a tuple and the 2D style is + (w_scale_factor, h_scale_factor). Default: None. + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Default: False. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if size is not None and scale_factor is not None: + raise ValueError('only one of size or scale_factor should be defined') + elif size is None and scale_factor is None: + raise ValueError('one of size or scale_factor should be defined') + elif size is not None: + size = to_2tuple(size) + if keep_ratio: + size = rescale_size((w, h), size, return_scale=False) + else: + size = _scale_size((w, h), scale_factor) + + divisor = to_2tuple(divisor) + size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) + resized_img, w_scale, h_scale = imresize( + img, + size, + return_scale=True, + interpolation=interpolation, + out=out, + backend=backend) + if return_scale: + return resized_img, w_scale, h_scale + else: + return resized_img + + +def imresize_like(img, + dst_img, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image to the same size of a given image. + + Args: + img (ndarray): The input image. + dst_img (ndarray): The target image. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = dst_img.shape[:2] + return imresize(img, (w, h), return_scale, interpolation, backend=backend) + + +def rescale_size(old_size, scale, return_scale=False): + """Calculate the new size to be rescaled to. + + Args: + old_size (tuple[int]): The old size (w, h) of image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image size. + + Returns: + tuple[int]: The new rescaled image size. + """ + w, h = old_size + if isinstance(scale, (float, int)): + if scale <= 0: + raise ValueError(f'Invalid scale {scale}, must be positive.') + scale_factor = scale + elif isinstance(scale, tuple): + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + else: + raise TypeError( + f'Scale must be a number or tuple of int, but got {type(scale)}') + + new_size = _scale_size((w, h), scale_factor) + + if return_scale: + return new_size, scale_factor + else: + return new_size + + +def imrescale(img, + scale, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image while keeping the aspect ratio. + + Args: + img (ndarray): The input image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + ndarray: The rescaled image. + """ + h, w = img.shape[:2] + new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) + rescaled_img = imresize( + img, new_size, interpolation=interpolation, backend=backend) + if return_scale: + return rescaled_img, scale_factor + else: + return rescaled_img + + +def imflip(img, direction='horizontal'): + """Flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image. + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return np.flip(img, axis=1) + elif direction == 'vertical': + return np.flip(img, axis=0) + else: + return np.flip(img, axis=(0, 1)) + + +def imflip_(img, direction='horizontal'): + """Inplace flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image (inplace). + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return cv2.flip(img, 1, img) + elif direction == 'vertical': + return cv2.flip(img, 0, img) + else: + return cv2.flip(img, -1, img) + + +def imrotate(img, + angle, + center=None, + scale=1.0, + border_value=0, + interpolation='bilinear', + auto_bound=False): + """Rotate an image. + + Args: + img (ndarray): Image to be rotated. + angle (float): Rotation angle in degrees, positive values mean + clockwise rotation. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. + scale (float): Isotropic scale factor. + border_value (int): Border value. + interpolation (str): Same as :func:`resize`. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. + + Returns: + ndarray: The rotated image. + """ + if center is not None and auto_bound: + raise ValueError('`auto_bound` conflicts with `center`') + h, w = img.shape[:2] + if center is None: + center = ((w - 1) * 0.5, (h - 1) * 0.5) + assert isinstance(center, tuple) + + matrix = cv2.getRotationMatrix2D(center, -angle, scale) + if auto_bound: + cos = np.abs(matrix[0, 0]) + sin = np.abs(matrix[0, 1]) + new_w = h * sin + w * cos + new_h = h * cos + w * sin + matrix[0, 2] += (new_w - w) * 0.5 + matrix[1, 2] += (new_h - h) * 0.5 + w = int(np.round(new_w)) + h = int(np.round(new_h)) + rotated = cv2.warpAffine( + img, + matrix, (w, h), + flags=cv2_interp_codes[interpolation], + borderValue=border_value) + return rotated + + +def bbox_clip(bboxes, img_shape): + """Clip bboxes to fit the image shape. + + Args: + bboxes (ndarray): Shape (..., 4*k) + img_shape (tuple[int]): (height, width) of the image. + + Returns: + ndarray: Clipped bboxes. + """ + assert bboxes.shape[-1] % 4 == 0 + cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) + cmin[0::2] = img_shape[1] - 1 + cmin[1::2] = img_shape[0] - 1 + clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) + return clipped_bboxes + + +def bbox_scaling(bboxes, scale, clip_shape=None): + """Scaling bboxes w.r.t the box center. + + Args: + bboxes (ndarray): Shape(..., 4). + scale (float): Scaling factor. + clip_shape (tuple[int], optional): If specified, bboxes that exceed the + boundary will be clipped according to the given shape (h, w). + + Returns: + ndarray: Scaled bboxes. + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes + + +def imcrop(img, bboxes, scale=1.0, pad_fill=None): + """Crop image patches. + + 3 steps: scale the bboxes -> clip bboxes -> crop and pad. + + Args: + img (ndarray): Image to be cropped. + bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. + scale (float, optional): Scale ratio of bboxes, the default value + 1.0 means no padding. + pad_fill (Number | list[Number]): Value to be filled for padding. + Default: None, which means no padding. + + Returns: + list[ndarray] | ndarray: The cropped image patches. + """ + chn = 1 if img.ndim == 2 else img.shape[2] + if pad_fill is not None: + if isinstance(pad_fill, (int, float)): + pad_fill = [pad_fill for _ in range(chn)] + assert len(pad_fill) == chn + + _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes + scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) + clipped_bbox = bbox_clip(scaled_bboxes, img.shape) + + patches = [] + for i in range(clipped_bbox.shape[0]): + x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) + if pad_fill is None: + patch = img[y1:y2 + 1, x1:x2 + 1, ...] + else: + _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) + if chn == 1: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) + else: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) + patch = np.array( + pad_fill, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + x_start = 0 if _x1 >= 0 else -_x1 + y_start = 0 if _y1 >= 0 else -_y1 + w = x2 - x1 + 1 + h = y2 - y1 + 1 + patch[y_start:y_start + h, x_start:x_start + w, + ...] = img[y1:y1 + h, x1:x1 + w, ...] + patches.append(patch) + + if bboxes.ndim == 1: + return patches[0] + else: + return patches + + +def impad(img, + *, + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. + + Args: + img (ndarray): Image to be padded. + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. + pad_val (Number | Sequence[Number]): Values to be filled in padding + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + ndarray: The padded image. + """ + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): + assert len(pad_val) == img.shape[-1] + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' + f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' + f'But received {padding}') + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT + } + img = cv2.copyMakeBorder( + img, + padding[1], + padding[3], + padding[0], + padding[2], + border_type[padding_mode], + value=pad_val) + + return img + + +def impad_to_multiple(img, divisor, pad_val=0): + """Pad an image to ensure each edge to be multiple to some number. + + Args: + img (ndarray): Image to be padded. + divisor (int): Padded image edges will be multiple to divisor. + pad_val (Number | Sequence[Number]): Same as :func:`impad`. + + Returns: + ndarray: The padded image. + """ + pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor + pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor + return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) + + +def cutout(img, shape, pad_val=0): + """Randomly cut out a rectangle from the original img. + + Args: + img (ndarray): Image to be cutout. + shape (int | tuple[int]): Expected cutout shape (h, w). If given as a + int, the value will be used for both h and w. + pad_val (int | float | tuple[int | float]): Values to be filled in the + cut area. Defaults to 0. + + Returns: + ndarray: The cutout image. + """ + + channels = 1 if img.ndim == 2 else img.shape[2] + if isinstance(shape, int): + cut_h, cut_w = shape, shape + else: + assert isinstance(shape, tuple) and len(shape) == 2, \ + f'shape must be a int or a tuple with length 2, but got type ' \ + f'{type(shape)} instead.' + cut_h, cut_w = shape + if isinstance(pad_val, (int, float)): + pad_val = tuple([pad_val] * channels) + elif isinstance(pad_val, tuple): + assert len(pad_val) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(pad_val), channels) + else: + raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') + + img_h, img_w = img.shape[:2] + y0 = np.random.uniform(img_h) + x0 = np.random.uniform(img_w) + + y1 = int(max(0, y0 - cut_h / 2.)) + x1 = int(max(0, x0 - cut_w / 2.)) + y2 = min(img_h, y1 + cut_h) + x2 = min(img_w, x1 + cut_w) + + if img.ndim == 2: + patch_shape = (y2 - y1, x2 - x1) + else: + patch_shape = (y2 - y1, x2 - x1, channels) + + img_cutout = img.copy() + patch = np.array( + pad_val, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + img_cutout[y1:y2, x1:x2, ...] = patch + + return img_cutout + + +def _get_shear_matrix(magnitude, direction='horizontal'): + """Generate the shear matrix for transformation. + + Args: + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + + Returns: + ndarray: The shear matrix with dtype float32. + """ + if direction == 'horizontal': + shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) + elif direction == 'vertical': + shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) + return shear_matrix + + +def imshear(img, + magnitude, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Shear an image. + + Args: + img (ndarray): Image to be sheared with format (h, w) + or (h, w, c). + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The sheared image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`') + shear_matrix = _get_shear_matrix(magnitude, direction) + sheared = cv2.warpAffine( + img, + shear_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. shearing masks whose channels large + # than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return sheared + + +def _get_translate_matrix(offset, direction='horizontal'): + """Generate the translate matrix. + + Args: + offset (int | float): The offset used for translate. + direction (str): The translate direction, either + "horizontal" or "vertical". + + Returns: + ndarray: The translate matrix with dtype float32. + """ + if direction == 'horizontal': + translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) + elif direction == 'vertical': + translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) + return translate_matrix + + +def imtranslate(img, + offset, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Translate an image. + + Args: + img (ndarray): Image to be translated with format + (h, w) or (h, w, c). + offset (int | float): The offset used for translate. + direction (str): The translate direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The translated image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`.') + translate_matrix = _get_translate_matrix(offset, direction) + translated = cv2.warpAffine( + img, + translate_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. translating masks whose channels + # large than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return translated diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/io.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/io.py new file mode 100644 index 0000000000000000000000000000000000000000..4e8f1877978840aede93774d86643b129751db13 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/io.py @@ -0,0 +1,258 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os.path as osp +from pathlib import Path + +import cv2 +import numpy as np +from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, + IMREAD_UNCHANGED) + +from annotator.mmpkg.mmcv.utils import check_file_exist, is_str, mkdir_or_exist + +try: + from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG +except ImportError: + TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None + +try: + from PIL import Image, ImageOps +except ImportError: + Image = None + +try: + import tifffile +except ImportError: + tifffile = None + +jpeg = None +supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] + +imread_flags = { + 'color': IMREAD_COLOR, + 'grayscale': IMREAD_GRAYSCALE, + 'unchanged': IMREAD_UNCHANGED, + 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, + 'grayscale_ignore_orientation': + IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE +} + +imread_backend = 'cv2' + + +def use_backend(backend): + """Select a backend for image decoding. + + Args: + backend (str): The image decoding backend type. Options are `cv2`, + `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) + and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` + file format. + """ + assert backend in supported_backends + global imread_backend + imread_backend = backend + if imread_backend == 'turbojpeg': + if TurboJPEG is None: + raise ImportError('`PyTurboJPEG` is not installed') + global jpeg + if jpeg is None: + jpeg = TurboJPEG() + elif imread_backend == 'pillow': + if Image is None: + raise ImportError('`Pillow` is not installed') + elif imread_backend == 'tifffile': + if tifffile is None: + raise ImportError('`tifffile` is not installed') + + +def _jpegflag(flag='color', channel_order='bgr'): + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'color': + if channel_order == 'bgr': + return TJPF_BGR + elif channel_order == 'rgb': + return TJCS_RGB + elif flag == 'grayscale': + return TJPF_GRAY + else: + raise ValueError('flag must be "color" or "grayscale"') + + +def _pillow2array(img, flag='color', channel_order='bgr'): + """Convert a pillow image to numpy array. + + Args: + img (:obj:`PIL.Image.Image`): The image loaded using PIL + flag (str): Flags specifying the color type of a loaded image, + candidates are 'color', 'grayscale' and 'unchanged'. + Default to 'color'. + channel_order (str): The channel order of the output image array, + candidates are 'bgr' and 'rgb'. Default to 'bgr'. + + Returns: + np.ndarray: The converted numpy array + """ + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'unchanged': + array = np.array(img) + if array.ndim >= 3 and array.shape[2] >= 3: # color image + array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR + else: + # Handle exif orientation tag + if flag in ['color', 'grayscale']: + img = ImageOps.exif_transpose(img) + # If the image mode is not 'RGB', convert it to 'RGB' first. + if img.mode != 'RGB': + if img.mode != 'LA': + # Most formats except 'LA' can be directly converted to RGB + img = img.convert('RGB') + else: + # When the mode is 'LA', the default conversion will fill in + # the canvas with black, which sometimes shadows black objects + # in the foreground. + # + # Therefore, a random color (124, 117, 104) is used for canvas + img_rgba = img.convert('RGBA') + img = Image.new('RGB', img_rgba.size, (124, 117, 104)) + img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha + if flag in ['color', 'color_ignore_orientation']: + array = np.array(img) + if channel_order != 'rgb': + array = array[:, :, ::-1] # RGB to BGR + elif flag in ['grayscale', 'grayscale_ignore_orientation']: + img = img.convert('L') + array = np.array(img) + else: + raise ValueError( + 'flag must be "color", "grayscale", "unchanged", ' + f'"color_ignore_orientation" or "grayscale_ignore_orientation"' + f' but got {flag}') + return array + + +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): + """Read an image. + + Args: + img_or_path (ndarray or str or Path): Either a numpy array or str or + pathlib.Path. If it is a numpy array (loaded image), then + it will be returned as is. + flag (str): Flags specifying the color type of a loaded image, + candidates are `color`, `grayscale`, `unchanged`, + `color_ignore_orientation` and `grayscale_ignore_orientation`. + By default, `cv2` and `pillow` backend would rotate the image + according to its EXIF info unless called with `unchanged` or + `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend + always ignore image's EXIF info regardless of the flag. + The `turbojpeg` backend only supports `color` and `grayscale`. + channel_order (str): Order of channel, candidates are `bgr` and `rgb`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. + If backend is None, the global imread_backend specified by + ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if isinstance(img_or_path, Path): + img_or_path = str(img_or_path) + + if isinstance(img_or_path, np.ndarray): + return img_or_path + elif is_str(img_or_path): + check_file_exist(img_or_path, + f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), + _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + elif backend == 'tifffile': + img = tifffile.imread(img_or_path) + return img + else: + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + else: + raise TypeError('"img" must be a numpy array or a str or ' + 'a pathlib.Path object') + + +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): + """Read an image from bytes. + + Args: + content (bytes): Image bytes got from files or other streams. + flag (str): Same as :func:`imread`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if backend == 'turbojpeg': + img = jpeg.decode(content, _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) + return img + else: + img_np = np.frombuffer(content, np.uint8) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imdecode(img_np, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + + +def imwrite(img, file_path, params=None, auto_mkdir=True): + """Write image to file. + + Args: + img (ndarray): Image array to be written. + file_path (str): Image file path. + params (None or list): Same as opencv :func:`imwrite` interface. + auto_mkdir (bool): If the parent folder of `file_path` does not exist, + whether to create it automatically. + + Returns: + bool: Successful or not. + """ + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/misc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..cd60e66131719ca0627569598809366b9c1ac64d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/misc.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +import annotator.mmpkg.mmcv as mmcv + +try: + import torch +except ImportError: + torch = None + + +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. + + Args: + tensor (torch.Tensor): Tensor that contains multiple images, shape ( + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). + to_rgb (bool, optional): Whether the tensor was converted to RGB + format in the first place. If so, convert it back to BGR. + Defaults to True. + + Returns: + list[np.ndarray]: A list that contains multiple images. + """ + + if torch is None: + raise RuntimeError('pytorch is not installed') + assert torch.is_tensor(tensor) and tensor.ndim == 4 + assert len(mean) == 3 + assert len(std) == 3 + + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + imgs = [] + for img_id in range(num_imgs): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) + imgs.append(np.ascontiguousarray(img)) + return imgs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/photometric.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/photometric.py new file mode 100644 index 0000000000000000000000000000000000000000..5085d012019c0cbf56f66f421a378278c1a058ae --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/image/photometric.py @@ -0,0 +1,428 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from ..utils import is_tuple_of +from .colorspace import bgr2gray, gray2bgr + + +def imnormalize(img, mean, std, to_rgb=True): + """Normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + img = img.copy().astype(np.float32) + return imnormalize_(img, mean, std, to_rgb) + + +def imnormalize_(img, mean, std, to_rgb=True): + """Inplace normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(mean.reshape(1, -1)) + stdinv = 1 / np.float64(std.reshape(1, -1)) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def imdenormalize(img, mean, std, to_bgr=True): + assert img.dtype != np.uint8 + mean = mean.reshape(1, -1).astype(np.float64) + std = std.reshape(1, -1).astype(np.float64) + img = cv2.multiply(img, std) # make a copy + cv2.add(img, mean, img) # inplace + if to_bgr: + cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace + return img + + +def iminvert(img): + """Invert (negate) an image. + + Args: + img (ndarray): Image to be inverted. + + Returns: + ndarray: The inverted image. + """ + return np.full_like(img, 255) - img + + +def solarize(img, thr=128): + """Solarize an image (invert all pixel values above a threshold) + + Args: + img (ndarray): Image to be solarized. + thr (int): Threshold for solarizing (0 - 255). + + Returns: + ndarray: The solarized image. + """ + img = np.where(img < thr, img, 255 - img) + return img + + +def posterize(img, bits): + """Posterize an image (reduce the number of bits for each color channel) + + Args: + img (ndarray): Image to be posterized. + bits (int): Number of bits (1 to 8) to use for posterizing. + + Returns: + ndarray: The posterized image. + """ + shift = 8 - bits + img = np.left_shift(np.right_shift(img, shift), shift) + return img + + +def adjust_color(img, alpha=1, beta=None, gamma=0): + r"""It blends the source image and its gray image: + + .. math:: + output = img * alpha + gray\_img * beta + gamma + + Args: + img (ndarray): The input source image. + alpha (int | float): Weight for the source image. Default 1. + beta (int | float): Weight for the converted gray image. + If None, it's assigned the value (1 - `alpha`). + gamma (int | float): Scalar added to each sum. + Same as :func:`cv2.addWeighted`. Default 0. + + Returns: + ndarray: Colored image which has the same size and dtype as input. + """ + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not the default `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img + + +def imequalize(img): + """Equalize the image histogram. + + This function applies a non-linear mapping to the input image, + in order to create a uniform distribution of grayscale values + in the output image. + + Args: + img (ndarray): Image to be equalized. + + Returns: + ndarray: The equalized image. + """ + + def _scale_channel(im, c): + """Scale the data in the corresponding channel.""" + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # For computing the step, filter out the nonzeros. + nonzero_histo = histo[histo > 0] + step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 + if not step: + lut = np.array(range(256)) + else: + # Compute the cumulative sum, shifted by step // 2 + # and then normalized by step. + lut = (np.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = np.concatenate([[0], lut[:-1]], 0) + # handle potential integer overflow + lut[lut > 255] = 255 + # If step is zero, return the original image. + # Otherwise, index from lut. + return np.where(np.equal(step, 0), im, lut[im]) + + # Scales each channel independently and then stacks + # the result. + s1 = _scale_channel(img, 0) + s2 = _scale_channel(img, 1) + s3 = _scale_channel(img, 2) + equalized_img = np.stack([s1, s2, s3], axis=-1) + return equalized_img.astype(img.dtype) + + +def adjust_brightness(img, factor=1.): + """Adjust image brightness. + + This function controls the brightness of an image. An + enhancement factor of 0.0 gives a black image. + A factor of 1.0 gives the original image. This function + blends the source image and the degenerated black image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be brightened. + factor (float): A value controls the enhancement. + Factor 1.0 returns the original image, lower + factors mean less color (brightness, contrast, + etc), and higher values more. Default 1. + + Returns: + ndarray: The brightened image. + """ + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + brightened_img = np.clip(brightened_img, 0, 255) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.): + """Adjust image contrast. + + This function controls the contrast of an image. An + enhancement factor of 0.0 gives a solid grey + image. A factor of 1.0 gives the original image. It + blends the source image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be contrasted. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + + Returns: + ndarray: The contrasted image. + """ + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + contrasted_img = np.clip(contrasted_img, 0, 255) + return contrasted_img.astype(img.dtype) + + +def auto_contrast(img, cutoff=0): + """Auto adjust image contrast. + + This function maximize (normalize) image contrast by first removing cutoff + percent of the lightest and darkest pixels from the histogram and remapping + the image so that the darkest pixel becomes black (0), and the lightest + becomes white (255). + + Args: + img (ndarray): Image to be contrasted. BGR order. + cutoff (int | float | tuple): The cutoff percent of the lightest and + darkest pixels to be removed. If given as tuple, it shall be + (low, high). Otherwise, the single value will be used for both. + Defaults to 0. + + Returns: + ndarray: The contrasted image. + """ + + def _auto_contrast_channel(im, c, cutoff): + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # Remove cut-off percent pixels from histo + histo_sum = np.cumsum(histo) + cut_low = histo_sum[-1] * cutoff[0] // 100 + cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 + histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low + histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) + + # Compute mapping + low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] + # If all the values have been cut off, return the origin img + if low >= high: + return im + scale = 255.0 / (high - low) + offset = -low * scale + lut = np.array(range(256)) + lut = lut * scale + offset + lut = np.clip(lut, 0, 255) + return lut[im] + + if isinstance(cutoff, (int, float)): + cutoff = (cutoff, cutoff) + else: + assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \ + f'float or tuple, but got {type(cutoff)} instead.' + # Auto adjusts contrast for each channel independently and then stacks + # the result. + s1 = _auto_contrast_channel(img, 0, cutoff) + s2 = _auto_contrast_channel(img, 1, cutoff) + s3 = _auto_contrast_channel(img, 2, cutoff) + contrasted_img = np.stack([s1, s2, s3], axis=-1) + return contrasted_img.astype(img.dtype) + + +def adjust_sharpness(img, factor=1., kernel=None): + """Adjust image sharpness. + + This function controls the sharpness of an image. An + enhancement factor of 0.0 gives a blurred image. A + factor of 1.0 gives the original image. And a factor + of 2.0 gives a sharpened image. It blends the source + image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be sharpened. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + kernel (np.ndarray, optional): Filter kernel to be applied on the img + to obtain the degenerated img. Defaults to None. + + Note: + No value sanity check is enforced on the kernel set by users. So with + an inappropriate kernel, the ``adjust_sharpness`` may fail to perform + the function its name indicates but end up performing whatever + transform determined by the kernel. + + Returns: + ndarray: The sharpened image. + """ + + if kernel is None: + # adopted from PIL.ImageFilter.SMOOTH + kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13 + assert isinstance(kernel, np.ndarray), \ + f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' + assert kernel.ndim == 2, \ + f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' + + degenerated = cv2.filter2D(img, -1, kernel) + sharpened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + sharpened_img = np.clip(sharpened_img, 0, 255) + return sharpened_img.astype(img.dtype) + + +def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): + """AlexNet-style PCA jitter. + + This data augmentation is proposed in `ImageNet Classification with Deep + Convolutional Neural Networks + `_. + + Args: + img (ndarray): Image to be adjusted lighting. BGR order. + eigval (ndarray): the eigenvalue of the convariance matrix of pixel + values, respectively. + eigvec (ndarray): the eigenvector of the convariance matrix of pixel + values, respectively. + alphastd (float): The standard deviation for distribution of alpha. + Defaults to 0.1 + to_rgb (bool): Whether to convert img to rgb. + + Returns: + ndarray: The adjusted image. + """ + assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \ + f'eigval and eigvec should both be of type np.ndarray, got ' \ + f'{type(eigval)} and {type(eigvec)} instead.' + + assert eigval.ndim == 1 and eigvec.ndim == 2 + assert eigvec.shape == (3, eigval.shape[0]) + n_eigval = eigval.shape[0] + assert isinstance(alphastd, float), 'alphastd should be of type float, ' \ + f'got {type(alphastd)} instead.' + + img = img.copy().astype(np.float32) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + + alpha = np.random.normal(0, alphastd, n_eigval) + alter = eigvec \ + * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \ + * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) + alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) + img_adjusted = img + alter + return img_adjusted + + +def lut_transform(img, lut_table): + """Transform array by look-up table. + + The function lut_transform fills the output array with values from the + look-up table. Indices of the entries are taken from the input array. + + Args: + img (ndarray): Image to be transformed. + lut_table (ndarray): look-up table of 256 elements; in case of + multi-channel input array, the table should either have a single + channel (in this case the same table is used for all channels) or + the same number of channels as in the input array. + + Returns: + ndarray: The transformed image. + """ + assert isinstance(img, np.ndarray) + assert 0 <= np.min(img) and np.max(img) <= 255 + assert isinstance(lut_table, np.ndarray) + assert lut_table.shape == (256, ) + + return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) + + +def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + img (ndarray): Image to be processed. + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + + Returns: + ndarray: The processed image. + """ + assert isinstance(img, np.ndarray) + assert img.ndim == 2 + assert isinstance(clip_limit, (float, int)) + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + + clahe = cv2.createCLAHE(clip_limit, tile_grid_size) + return clahe.apply(np.array(img, dtype=np.uint8)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/deprecated.json b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/deprecated.json new file mode 100644 index 0000000000000000000000000000000000000000..25cf6f28caecc22a77e3136fefa6b8dfc0e6cb5b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/mmcls.json b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/mmcls.json new file mode 100644 index 0000000000000000000000000000000000000000..bdb311d9fe6d9f317290feedc9e37236c6cf6e8f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/mmcls.json @@ -0,0 +1,31 @@ +{ + "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", + "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", + "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", + "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", + "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", + "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", + "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", + "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", + "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth", + "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth", + "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth", + "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth", + "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth", + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth", + "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", + "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", + "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", + "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", + "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", + "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", + "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", + "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth" +} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/open_mmlab.json b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 0000000000000000000000000000000000000000..8311db4feef92faa0841c697d75efbee8430c3a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,50 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", + "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", + "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" +} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..999e090a458ee148ceca0649f1e3806a40e909bd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/__init__.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .assign_score_withk import assign_score_withk +from .ball_query import ball_query +from .bbox import bbox_overlaps +from .border_align import BorderAlign, border_align +from .box_iou_rotated import box_iou_rotated +from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive +from .cc_attention import CrissCrossAttention +from .contour_expand import contour_expand +from .corner_pool import CornerPool +from .correlation import Correlation +from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d +from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, + ModulatedDeformRoIPoolPack, deform_roi_pool) +from .deprecated_wrappers import Conv2d_deprecated as Conv2d +from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d +from .deprecated_wrappers import Linear_deprecated as Linear +from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d +from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, + sigmoid_focal_loss, softmax_focal_loss) +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) +from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu +from .gather_points import gather_points +from .group_points import GroupAll, QueryAndGroup, grouping_operation +from .info import (get_compiler_version, get_compiling_cuda_version, + get_onnxruntime_op_path) +from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev +from .knn import knn +from .masked_conv import MaskedConv2d, masked_conv2d +from .modulated_deform_conv import (ModulatedDeformConv2d, + ModulatedDeformConv2dPack, + modulated_deform_conv2d) +from .multi_scale_deform_attn import MultiScaleDeformableAttention +from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms +from .pixel_group import pixel_group +from .point_sample import (SimpleRoIAlign, point_sample, + rel_roi_point_to_rel_img_point) +from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, + points_in_boxes_part) +from .points_sampler import PointsSampler +from .psa_mask import PSAMask +from .roi_align import RoIAlign, roi_align +from .roi_align_rotated import RoIAlignRotated, roi_align_rotated +from .roi_pool import RoIPool, roi_pool +from .roiaware_pool3d import RoIAwarePool3d +from .roipoint_pool3d import RoIPointPool3d +from .saconv import SAConv2d +from .scatter_points import DynamicScatter, dynamic_scatter +from .sync_bn import SyncBatchNorm +from .three_interpolate import three_interpolate +from .three_nn import three_nn +from .tin_shift import TINShift, tin_shift +from .upfirdn2d import upfirdn2d +from .voxelize import Voxelization, voxelization + +__all__ = [ + 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', + 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', + 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', + 'get_compiler_version', 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', + 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', + 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', + 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', + 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', + 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', + 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', + 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', + 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', + 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', + 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', + 'border_align', 'gather_points', 'furthest_point_sample', + 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', + 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', + 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', + 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/assign_score_withk.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/assign_score_withk.py new file mode 100644 index 0000000000000000000000000000000000000000..4906adaa2cffd1b46912fbe7d4f87ef2f9fa0012 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/assign_score_withk.py @@ -0,0 +1,123 @@ +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) + + +class AssignScoreWithK(Function): + r"""Perform weighted sum to generate output features according to scores. + Modified from `PAConv `_. + + This is a memory-efficient CUDA implementation of assign_scores operation, + which first transform all point features with weight bank, then assemble + neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. + + See the `paper `_ appendix Sec. D for + more detailed descriptions. + + Note: + This implementation assumes using ``neighbor`` kernel input, which is + (point_features - center_features, point_features). + See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ + pointnet2/paconv.py#L128 for more details. + """ + + @staticmethod + def forward(ctx, + scores, + point_features, + center_features, + knn_idx, + aggregate='sum'): + """ + Args: + scores (torch.Tensor): (B, npoint, K, M), predicted scores to + aggregate weight matrices in the weight bank. + ``npoint`` is the number of sampled centers. + ``K`` is the number of queried neighbors. + ``M`` is the number of weight matrices in the weight bank. + point_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed point features to be aggregated. + center_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed center features to be aggregated. + knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. + We assume the first idx in each row is the idx of the center. + aggregate (str, optional): Aggregation method. + Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. + + Returns: + torch.Tensor: (B, out_dim, npoint, K), the aggregated features. + """ + agg = {'sum': 0, 'avg': 1, 'max': 2} + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + output = point_features.new_zeros((B, out_dim, npoint, K)) + ext_module.assign_score_withk_forward( + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + output, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg[aggregate]) + + ctx.save_for_backward(output, point_features, center_features, scores, + knn_idx) + ctx.agg = agg[aggregate] + + return output + + @staticmethod + def backward(ctx, grad_out): + """ + Args: + grad_out (torch.Tensor): (B, out_dim, npoint, K) + + Returns: + grad_scores (torch.Tensor): (B, npoint, K, M) + grad_point_features (torch.Tensor): (B, N, M, out_dim) + grad_center_features (torch.Tensor): (B, N, M, out_dim) + """ + _, point_features, center_features, scores, knn_idx = ctx.saved_tensors + + agg = ctx.agg + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + grad_point_features = point_features.new_zeros(point_features.shape) + grad_center_features = center_features.new_zeros(center_features.shape) + grad_scores = scores.new_zeros(scores.shape) + + ext_module.assign_score_withk_backward( + grad_out.contiguous(), + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + grad_point_features, + grad_center_features, + grad_scores, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg) + + return grad_scores, grad_point_features, \ + grad_center_features, None, None + + +assign_score_withk = AssignScoreWithK.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/ball_query.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/ball_query.py new file mode 100644 index 0000000000000000000000000000000000000000..d0466847c6e5c1239e359a0397568413ebc1504a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/ball_query.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) + + +class BallQuery(Function): + """Find nearby points in spherical space.""" + + @staticmethod + def forward(ctx, min_radius: float, max_radius: float, sample_num: int, + xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: + """ + Args: + min_radius (float): minimum radius of the balls. + max_radius (float): maximum radius of the balls. + sample_num (int): maximum number of features in the balls. + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) centers of the ball query. + + Returns: + Tensor: (B, npoint, nsample) tensor with the indices of + the features that form the query balls. + """ + assert center_xyz.is_contiguous() + assert xyz.is_contiguous() + assert min_radius < max_radius + + B, N, _ = xyz.size() + npoint = center_xyz.size(1) + idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) + + ext_module.ball_query_forward( + center_xyz, + xyz, + idx, + b=B, + n=N, + m=npoint, + min_radius=min_radius, + max_radius=max_radius, + nsample=sample_num) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/bbox.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/bbox.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4d58b6c91f652933974f519acd3403a833e906 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/bbox.py @@ -0,0 +1,72 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): + """Calculate overlap between two set of bboxes. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + mode_dict = {'iou': 0, 'iof': 1} + assert mode in mode_dict.keys() + mode_flag = mode_dict[mode] + # Either the boxes are empty or the length of boxes' last dimension is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + assert offset == 1 or offset == 0 + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) + ext_module.bbox_overlaps( + bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + return ious diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/border_align.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/border_align.py new file mode 100644 index 0000000000000000000000000000000000000000..ff305be328e9b0a15e1bbb5e6b41beb940f55c81 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/border_align.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from +# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['border_align_forward', 'border_align_backward']) + + +class BorderAlignFunction(Function): + + @staticmethod + def symbolic(g, input, boxes, pool_size): + return g.op( + 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) + + @staticmethod + def forward(ctx, input, boxes, pool_size): + ctx.pool_size = pool_size + ctx.input_shape = input.size() + + assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' + assert boxes.size(2) == 4, \ + 'the last dimension of boxes must be (x1, y1, x2, y2)' + assert input.size(1) % 4 == 0, \ + 'the channel for input feature must be divisible by factor 4' + + # [B, C//4, H*W, 4] + output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) + output = input.new_zeros(output_shape) + # `argmax_idx` only used for backward + argmax_idx = input.new_zeros(output_shape).to(torch.int) + + ext_module.border_align_forward( + input, boxes, output, argmax_idx, pool_size=ctx.pool_size) + + ctx.save_for_backward(boxes, argmax_idx) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + boxes, argmax_idx = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous + grad_output = grad_output.contiguous() + ext_module.border_align_backward( + grad_output, + boxes, + argmax_idx, + grad_input, + pool_size=ctx.pool_size) + return grad_input, None, None + + +border_align = BorderAlignFunction.apply + + +class BorderAlign(nn.Module): + r"""Border align pooling layer. + + Applies border_align over the input feature based on predicted bboxes. + The details were described in the paper + `BorderDet: Border Feature for Dense Object Detection + `_. + + For each border line (e.g. top, left, bottom or right) of each box, + border_align does the following: + 1. uniformly samples `pool_size`+1 positions on this line, involving \ + the start and end points. + 2. the corresponding features on these points are computed by \ + bilinear interpolation. + 3. max pooling over all the `pool_size`+1 positions are used for \ + computing pooled feature. + + Args: + pool_size (int): number of positions sampled over the boxes' borders + (e.g. top, bottom, left, right). + + """ + + def __init__(self, pool_size): + super(BorderAlign, self).__init__() + self.pool_size = pool_size + + def forward(self, input, boxes): + """ + Args: + input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), + [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, + right features respectively. + boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). + + Returns: + Tensor: Pooled features with shape [N,C,H*W,4]. The order is + (top,left,bottom,right) for the last dimension. + """ + return border_align(input, boxes, self.pool_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(pool_size={self.pool_size})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/box_iou_rotated.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/box_iou_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..2d78015e9c2a9e7a52859b4e18f84a9aa63481a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/box_iou_rotated.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) + + +def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): + """Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Arguments: + boxes1 (Tensor): rotated bboxes 1. \ + It has shape (N, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + boxes2 (Tensor): rotated bboxes 2. \ + It has shape (M, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (N, M) if aligned == False else shape (N,) + """ + assert mode in ['iou', 'iof'] + mode_dict = {'iou': 0, 'iof': 1} + mode_flag = mode_dict[mode] + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows * cols)) + bboxes1 = bboxes1.contiguous() + bboxes2 = bboxes2.contiguous() + ext_module.box_iou_rotated( + bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) + if not aligned: + ious = ious.view(rows, cols) + return ious diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/carafe.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/carafe.py new file mode 100644 index 0000000000000000000000000000000000000000..5154cb3abfccfbbe0a1b2daa67018dbf80aaf6d2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/carafe.py @@ -0,0 +1,287 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.nn.modules.module import Module + +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', + 'carafe_backward' +]) + + +class CARAFENaiveFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFENaive', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + ext_module.carafe_naive_forward( + features, + masks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + grad_input = torch.zeros_like(features) + grad_masks = torch.zeros_like(masks) + ext_module.carafe_naive_backward( + grad_output.contiguous(), + features, + masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + return grad_input, grad_masks, None, None, None + + +carafe_naive = CARAFENaiveFunction.apply + + +class CARAFENaive(Module): + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe_naive(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +class CARAFEFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFE', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + routput = features.new_zeros(output.size(), requires_grad=False) + rfeatures = features.new_zeros(features.size(), requires_grad=False) + rmasks = masks.new_zeros(masks.size(), requires_grad=False) + ext_module.carafe_forward( + features, + masks, + rfeatures, + routput, + rmasks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks, rfeatures) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks, rfeatures = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + rgrad_output = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input = torch.zeros_like(features, requires_grad=False) + rgrad_masks = torch.zeros_like(masks, requires_grad=False) + grad_input = torch.zeros_like(features, requires_grad=False) + grad_masks = torch.zeros_like(masks, requires_grad=False) + ext_module.carafe_backward( + grad_output.contiguous(), + rfeatures, + masks, + rgrad_output, + rgrad_input_hs, + rgrad_input, + rgrad_masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + return grad_input, grad_masks, None, None, None + + +carafe = CARAFEFunction.apply + + +class CARAFE(Module): + """ CARAFE: Content-Aware ReAssembly of FEatures + + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + kernel_size (int): reassemble kernel size + group_size (int): reassemble group size + scale_factor (int): upsample ratio + + Returns: + upsampled feature map + """ + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +@UPSAMPLE_LAYERS.register_module(name='carafe') +class CARAFEPack(nn.Module): + """A unified package of CARAFE upsampler that contains: 1) channel + compressor 2) content encoder 3) CARAFE op. + + Official implementation of ICCV 2019 paper + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + channels (int): input feature channels + scale_factor (int): upsample ratio + up_kernel (int): kernel size of CARAFE op + up_group (int): group size of CARAFE op + encoder_kernel (int): kernel size of content encoder + encoder_dilation (int): dilation of content encoder + compressed_channels (int): output channels of channels compressor + + Returns: + upsampled feature map + """ + + def __init__(self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64): + super(CARAFEPack, self).__init__() + self.channels = channels + self.scale_factor = scale_factor + self.up_kernel = up_kernel + self.up_group = up_group + self.encoder_kernel = encoder_kernel + self.encoder_dilation = encoder_dilation + self.compressed_channels = compressed_channels + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, + 1) + self.content_encoder = nn.Conv2d( + self.compressed_channels, + self.up_kernel * self.up_kernel * self.up_group * + self.scale_factor * self.scale_factor, + self.encoder_kernel, + padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), + dilation=self.encoder_dilation, + groups=1) + self.init_weights() + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + normal_init(self.content_encoder, std=0.001) + + def kernel_normalizer(self, mask): + mask = F.pixel_shuffle(mask, self.scale_factor) + n, mask_c, h, w = mask.size() + # use float division explicitly, + # to void inconsistency while exporting to onnx + mask_channel = int(mask_c / float(self.up_kernel**2)) + mask = mask.view(n, mask_channel, -1, h, w) + + mask = F.softmax(mask, dim=2, dtype=mask.dtype) + mask = mask.view(n, mask_c, h, w).contiguous() + + return mask + + def feature_reassemble(self, x, mask): + x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) + return x + + def forward(self, x): + compressed_x = self.channel_compressor(x) + mask = self.content_encoder(compressed_x) + mask = self.kernel_normalizer(mask) + + x = self.feature_reassemble(x, mask) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/cc_attention.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/cc_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..8982f467185b5d839832baa2e51722613a8b87a2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/cc_attention.py @@ -0,0 +1,83 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.mmpkg.mmcv.cnn import PLUGIN_LAYERS, Scale + + +def NEG_INF_DIAG(n, device): + """Returns a diagonal matrix of size [n, n]. + + The diagonal are all "-inf". This is for avoiding calculating the + overlapped element in the Criss-Cross twice. + """ + return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) + + +@PLUGIN_LAYERS.register_module() +class CrissCrossAttention(nn.Module): + """Criss-Cross Attention Module. + + .. note:: + Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch + to a pure PyTorch and equivalent implementation. For more + details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. + + Speed comparison for one forward pass + + - Input size: [2,512,97,97] + - Device: 1 NVIDIA GeForce RTX 2080 Ti + + +-----------------------+---------------+------------+---------------+ + | |PyTorch version|CUDA version|Relative speed | + +=======================+===============+============+===============+ + |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | + +-----------------------+---------------+------------+---------------+ + |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | + +-----------------------+---------------+------------+---------------+ + + Args: + in_channels (int): Channels of the input feature map. + """ + + def __init__(self, in_channels): + super().__init__() + self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.value_conv = nn.Conv2d(in_channels, in_channels, 1) + self.gamma = Scale(0.) + self.in_channels = in_channels + + def forward(self, x): + """forward function of Criss-Cross Attention. + + Args: + x (Tensor): Input feature. \ + shape (batch_size, in_channels, height, width) + Returns: + Tensor: Output of the layer, with shape of \ + (batch_size, in_channels, height, width) + """ + B, C, H, W = x.size() + query = self.query_conv(x) + key = self.key_conv(x) + value = self.value_conv(x) + energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG( + H, query.device) + energy_H = energy_H.transpose(1, 2) + energy_W = torch.einsum('bchw,bchj->bhwj', query, key) + attn = F.softmax( + torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] + out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) + out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) + + out = self.gamma(out) + x + out = out.contiguous() + + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/contour_expand.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/contour_expand.py new file mode 100644 index 0000000000000000000000000000000000000000..ea1111e1768b5f27e118bf7dbc0d9c70a7afd6d7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/contour_expand.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['contour_expand']) + + +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, + kernel_num): + """Expand kernel contours so that foreground pixels are assigned into + instances. + + Arguments: + kernel_mask (np.array or Tensor): The instance kernel mask with + size hxw. + internal_kernel_label (np.array or Tensor): The instance internal + kernel label with size hxw. + min_kernel_area (int): The minimum kernel area. + kernel_num (int): The instance kernel number. + + Returns: + label (list): The instance index map with size hxw. + """ + assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) + assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(min_kernel_area, int) + assert isinstance(kernel_num, int) + + if isinstance(kernel_mask, np.ndarray): + kernel_mask = torch.from_numpy(kernel_mask) + if isinstance(internal_kernel_label, np.ndarray): + internal_kernel_label = torch.from_numpy(internal_kernel_label) + + if torch.__version__ == 'parrots': + if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: + label = [] + else: + label = ext_module.contour_expand( + kernel_mask, + internal_kernel_label, + min_kernel_area=min_kernel_area, + kernel_num=kernel_num) + label = label.tolist() + else: + label = ext_module.contour_expand(kernel_mask, internal_kernel_label, + min_kernel_area, kernel_num) + return label diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/corner_pool.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/corner_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..a33d798b43d405e4c86bee4cd6389be21ca9c637 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/corner_pool.py @@ -0,0 +1,161 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', + 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', + 'right_pool_forward', 'right_pool_backward' +]) + +_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} + + +class TopPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output + + +class LeftPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output + + +class CornerPool(nn.Module): + """Corner Pooling. + + Corner Pooling is a new type of pooling layer that helps a + convolutional network better localize corners of bounding boxes. + + Please refer to https://arxiv.org/abs/1808.01244 for more details. + Code is modified from https://github.com/princeton-vl/CornerNet-Lite. + + Args: + mode(str): Pooling orientation for the pooling layer + + - 'bottom': Bottom Pooling + - 'left': Left Pooling + - 'right': Right Pooling + - 'top': Top Pooling + + Returns: + Feature map after pooling. + """ + + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + + cummax_dim_flip = { + 'bottom': (2, False), + 'left': (3, True), + 'right': (3, False), + 'top': (2, True), + } + + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions + self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + if torch.onnx.is_in_onnx_export(): + assert torch.__version__ >= '1.7.0', \ + 'When `cummax` serves as an intermediate component whose '\ + 'outputs is used as inputs for another modules, it\'s '\ + 'expected that pytorch version must be >= 1.7.0, '\ + 'otherwise Error appears like: `RuntimeError: tuple '\ + 'appears in op that does not forward tuples, unsupported '\ + 'kind: prim::PythonOp`.' + + dim, flip = self.cummax_dim_flip[self.mode] + if flip: + x = x.flip(dim) + pool_tensor, _ = torch.cummax(x, dim=dim) + if flip: + pool_tensor = pool_tensor.flip(dim) + return pool_tensor + else: + return self.corner_pool.apply(x) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/correlation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/correlation.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0b79c301b29915dfaf4d2b1846c59be73127d3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/correlation.py @@ -0,0 +1,196 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import Tensor, nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['correlation_forward', 'correlation_backward']) + + +class CorrelationFunction(Function): + + @staticmethod + def forward(ctx, + input1, + input2, + kernel_size=1, + max_displacement=1, + stride=1, + padding=1, + dilation=1, + dilation_patch=1): + + ctx.save_for_backward(input1, input2) + + kH, kW = ctx.kernel_size = _pair(kernel_size) + patch_size = max_displacement * 2 + 1 + ctx.patch_size = patch_size + dH, dW = ctx.stride = _pair(stride) + padH, padW = ctx.padding = _pair(padding) + dilationH, dilationW = ctx.dilation = _pair(dilation) + dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair( + dilation_patch) + + output_size = CorrelationFunction._output_size(ctx, input1) + + output = input1.new_zeros(output_size) + + ext_module.correlation_forward( + input1, + input2, + output, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilation_patchH, dilation_patchW = ctx.dilation_patch + dH, dW = ctx.stride + grad_input1 = torch.zeros_like(input1) + grad_input2 = torch.zeros_like(input2) + + ext_module.correlation_backward( + grad_output, + input1, + input2, + grad_input1, + grad_input2, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + return grad_input1, grad_input2, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input1): + iH, iW = input1.size(2), input1.size(3) + batch_size = input1.size(0) + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + dH, dW = ctx.stride + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilatedKH = (kH - 1) * dilationH + 1 + dilatedKW = (kW - 1) * dilationW + 1 + + oH = int((iH + 2 * padH - dilatedKH) / dH + 1) + oW = int((iW + 2 * padW - dilatedKW) / dW + 1) + + output_size = (batch_size, patch_size, patch_size, oH, oW) + return output_size + + +class Correlation(nn.Module): + r"""Correlation operator + + This correlation operator works for optical flow correlation computation. + + There are two batched tensors with shape :math:`(N, C, H, W)`, + and the correlation output's shape is :math:`(N, max\_displacement \times + 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` + + where + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - + dilation \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation + \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding + window convolution between input1 and shifted input2, + + .. math:: + Corr(N_i, dx, dy) = + \sum_{c=0}^{C-1} + input1(N_i, c) \star + \mathcal{S}(input2(N_i, c), dy, dx) + + where :math:`\star` is the valid 2d sliding window convolution operator, + and :math:`\mathcal{S}` means shifting the input features (auto-complete + zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in + [-max\_displacement \times dilation\_patch, max\_displacement \times + dilation\_patch]`. + + Args: + kernel_size (int): The size of sliding window i.e. local neighborhood + representing the center points and involved in correlation + computation. Defaults to 1. + max_displacement (int): The radius for computing correlation volume, + but the actual working space can be dilated by dilation_patch. + Defaults to 1. + stride (int): The stride of the sliding blocks in the input spatial + dimensions. Defaults to 1. + padding (int): Zero padding added to all four sides of the input1. + Defaults to 0. + dilation (int): The spacing of local neighborhood that will involved + in correlation. Defaults to 1. + dilation_patch (int): The spacing between position need to compute + correlation. Defaults to 1. + """ + + def __init__(self, + kernel_size: int = 1, + max_displacement: int = 1, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + dilation_patch: int = 1) -> None: + super().__init__() + self.kernel_size = kernel_size + self.max_displacement = max_displacement + self.stride = stride + self.padding = padding + self.dilation = dilation + self.dilation_patch = dilation_patch + + def forward(self, input1: Tensor, input2: Tensor) -> Tensor: + return CorrelationFunction.apply(input1, input2, self.kernel_size, + self.max_displacement, self.stride, + self.padding, self.dilation, + self.dilation_patch) + + def __repr__(self) -> str: + s = self.__class__.__name__ + s += f'(kernel_size={self.kernel_size}, ' + s += f'max_displacement={self.max_displacement}, ' + s += f'stride={self.stride}, ' + s += f'padding={self.padding}, ' + s += f'dilation={self.dilation}, ' + s += f'dilation_patch={self.dilation_patch})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_conv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..3de3aae1e7b2258360aef3ad9eb3a351f080f10f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_conv.py @@ -0,0 +1,405 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from annotator.mmpkg.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext('_ext', [ + 'deform_conv_forward', 'deform_conv_backward_input', + 'deform_conv_backward_parameters' +]) + + +class DeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, + input, + offset, + weight, + stride, + padding, + dilation, + groups, + deform_groups, + bias=False, + im2col_step=32): + return g.op( + 'mmcv::MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + bias_i=bias, + im2col_step_i=im2col_step) + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + assert bias is False, 'Only support bias is False.' + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.im2col_step = im2col_step + + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + DeformConv2dFunction._output_size(ctx, input, weight)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % + cur_im2col_step) == 0, 'im2col step must divide batchsize' + ext_module.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % cur_im2col_step + ) == 0, 'batch size must be divisible by im2col_step' + + grad_output = grad_output.contiguous() + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + ext_module.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + ext_module.deform_conv_backward_parameters( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + scale=1, + im2col_step=cur_im2col_step) + + return grad_input, grad_offset, grad_weight, \ + None, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +deform_conv2d = DeformConv2dFunction.apply + + +class DeformConv2d(nn.Module): + r"""Deformable 2D convolution. + + Applies a deformable 2D convolution over an input signal composed of + several input planes. DeformConv2d was described in the paper + `Deformable Convolutional Networks + `_ + + Note: + The argument ``im2col_step`` was added in version 1.3.17, which means + number of samples processed by the ``im2col_cuda_kernel`` per call. + It enables users to define ``batch_size`` and ``im2col_step`` more + flexibly and solved `issue mmcv#1440 + `_. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + im2col_step (int): Number of samples processed by im2col_cuda_kernel + per call. It will work when ``batch_size`` > ``im2col_step``, but + ``batch_size`` must be divisible by ``im2col_step``. Default: 32. + `New in version 1.3.17.` + """ + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='DeformConv2d') + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]], + stride: Union[int, Tuple[int, ...]] = 1, + padding: Union[int, Tuple[int, ...]] = 0, + dilation: Union[int, Tuple[int, ...]] = 1, + groups: int = 1, + deform_groups: int = 1, + bias: bool = False, + im2col_step: int = 32) -> None: + super(DeformConv2d, self).__init__() + + assert not bias, \ + f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, \ + f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert out_channels % groups == 0, \ + f'out_channels {out_channels} cannot be divisible by groups \ + {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + self.im2col_step = im2col_step + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + # only weight, no bias + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, + *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + # switch the initialization of `self.weight` to the standard kaiming + # method described in `Delving deep into rectifiers: Surpassing + # human-level performance on ImageNet classification` - He, K. et al. + # (2015), using a uniform distribution + nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') + + def forward(self, x: Tensor, offset: Tensor) -> Tensor: + """Deformable Convolutional forward function. + + Args: + x (Tensor): Input feature, shape (B, C_in, H_in, W_in) + offset (Tensor): Offset for deformable convolution, shape + (B, deform_groups*kernel_size[0]*kernel_size[1]*2, + H_out, W_out), H_out, W_out are equal to the output's. + + An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Returns: + Tensor: Output of the layer. + """ + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < + self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) + offset = offset.contiguous() + out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - + pad_w].contiguous() + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels},\n' + s += f'out_channels={self.out_channels},\n' + s += f'kernel_size={self.kernel_size},\n' + s += f'stride={self.stride},\n' + s += f'padding={self.padding},\n' + s += f'dilation={self.dilation},\n' + s += f'groups={self.groups},\n' + s += f'deform_groups={self.deform_groups},\n' + # bias is not supported in DeformConv2d. + s += 'bias=False)' + return s + + +@CONV_LAYERS.register_module('DCN') +class DeformConv2dPack(DeformConv2d): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, DeformConvPack loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_roi_pool.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..cc245ba91fee252226ba22e76bb94a35db9a629b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deform_roi_pool.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) + + +class DeformRoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, offset, output_size, spatial_scale, + sampling_ratio, gamma): + return g.op( + 'mmcv::MMCVDeformRoIPool', + input, + rois, + offset, + pooled_height_i=output_size[0], + pooled_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_f=sampling_ratio, + gamma_f=gamma) + + @staticmethod + def forward(ctx, + input, + rois, + offset, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + if offset is None: + offset = input.new_zeros(0) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = float(spatial_scale) + ctx.sampling_ratio = int(sampling_ratio) + ctx.gamma = float(gamma) + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + + ext_module.deform_roi_pool_forward( + input, + rois, + offset, + output, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + + ctx.save_for_backward(input, rois, offset) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, offset = ctx.saved_tensors + grad_input = grad_output.new_zeros(input.shape) + grad_offset = grad_output.new_zeros(offset.shape) + + ext_module.deform_roi_pool_backward( + grad_output, + input, + rois, + offset, + grad_input, + grad_offset, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + if grad_offset.numel() == 0: + grad_offset = None + return grad_input, None, grad_offset, None, None, None, None + + +deform_roi_pool = DeformRoIPoolFunction.apply + + +class DeformRoIPool(nn.Module): + + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPool, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.gamma = float(gamma) + + def forward(self, input, rois, offset=None): + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class DeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, + sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class ModulatedDeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(ModulatedDeformRoIPoolPack, + self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + self.mask_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid()) + self.mask_fc[2].weight.data.zero_() + self.mask_fc[2].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + mask = self.mask_fc(x.view(rois_num, -1)) + mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) + d = deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + return d * mask diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..a2e593df9ee57637038683d7a1efaa347b2b69e7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This file is for backward compatibility. +# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. +import warnings + +from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + + +class Conv2d_deprecated(Conv2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class ConvTranspose2d_deprecated(ConvTranspose2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' + 'deprecated in the future. Please import them from "mmcv.cnn" ' + 'instead') + + +class MaxPool2d_deprecated(MaxPool2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class Linear_deprecated(Linear): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/focal_loss.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..763bc93bd2575c49ca8ccf20996bbd92d1e0d1a4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/focal_loss.py @@ -0,0 +1,212 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' +]) + + +class SigmoidFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSigmoidFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + output = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_forward( + input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input, target, weight) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, target, weight = ctx.saved_tensors + + grad_input = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_backward( + input, + target, + weight, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input.size(0) + return grad_input, None, None, None, None, None + + +sigmoid_focal_loss = SigmoidFocalLossFunction.apply + + +class SigmoidFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SigmoidFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s + + +class SoftmaxFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSoftmaxFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + channel_stats, _ = torch.max(input, dim=1) + input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) + input_softmax.exp_() + + channel_stats = input_softmax.sum(dim=1) + input_softmax /= channel_stats.unsqueeze(1).expand_as(input) + + output = input.new_zeros(input.size(0)) + ext_module.softmax_focal_loss_forward( + input_softmax, + target, + weight, + output, + gamma=ctx.gamma, + alpha=ctx.alpha) + + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input_softmax, target, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input_softmax, target, weight = ctx.saved_tensors + buff = input_softmax.new_zeros(input_softmax.size(0)) + grad_input = input_softmax.new_zeros(input_softmax.size()) + + ext_module.softmax_focal_loss_backward( + input_softmax, + target, + weight, + buff, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input_softmax.size(0) + return grad_input, None, None, None, None, None + + +softmax_focal_loss = SoftmaxFocalLossFunction.apply + + +class SoftmaxFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return softmax_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/furthest_point_sample.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/furthest_point_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..374b7a878f1972c183941af28ba1df216ac1a60f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/furthest_point_sample.py @@ -0,0 +1,83 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'furthest_point_sampling_forward', + 'furthest_point_sampling_with_dist_forward' +]) + + +class FurthestPointSampling(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_xyz: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_xyz (Tensor): (B, N, 3) where N > num_points. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_xyz.is_contiguous() + + B, N = points_xyz.size()[:2] + output = torch.cuda.IntTensor(B, num_points) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + ext_module.furthest_point_sampling_forward( + points_xyz, + temp, + output, + b=B, + n=N, + m=num_points, + ) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +class FurthestPointSamplingWithDist(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_dist: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_dist (Tensor): (B, N, N) Distance between each point pair. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_dist.is_contiguous() + + B, N, _ = points_dist.size() + output = points_dist.new_zeros([B, num_points], dtype=torch.int32) + temp = points_dist.new_zeros([B, N]).fill_(1e10) + + ext_module.furthest_point_sampling_with_dist_forward( + points_dist, temp, output, b=B, n=N, m=num_points) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply +furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py new file mode 100644 index 0000000000000000000000000000000000000000..6d12508469c6c8fa1884debece44c58d158cb6fa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py @@ -0,0 +1,268 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +import torch.nn.functional as F +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) + + +class FusedBiasLeakyReLUFunctionBackward(Function): + """Calculate second order deviation. + + This function is to compute the second order deviation for the fused leaky + relu operation. + """ + + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = ext_module.fused_bias_leakyrelu( + grad_output, + empty, + out, + act=3, + grad=1, + alpha=negative_slope, + scale=scale) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + out, = ctx.saved_tensors + + # The second order deviation, in fact, contains two parts, while the + # the first part is zero. Thus, we direct consider the second part + # which is similar with the first order deviation in implementation. + gradgrad_out = ext_module.fused_bias_leakyrelu( + gradgrad_input, + gradgrad_bias.to(out.dtype), + out, + act=3, + grad=1, + alpha=ctx.negative_slope, + scale=ctx.scale) + + return gradgrad_out, None, None, None + + +class FusedBiasLeakyReLUFunction(Function): + + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + + out = ext_module.fused_bias_leakyrelu( + input, + bias, + empty, + act=3, + grad=0, + alpha=negative_slope, + scale=scale) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + out, = ctx.saved_tensors + + grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale) + + return grad_input, grad_bias, None, None + + +class FusedBiasLeakyReLU(nn.Module): + """Fused bias leaky ReLU. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + TODO: Implement the CPU version. + + Args: + channel (int): The channel number of the feature map. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + """ + + def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): + super(FusedBiasLeakyReLU, self).__init__() + + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.negative_slope = negative_slope + self.scale = scale + + def forward(self, input): + return fused_bias_leakyrelu(input, self.bias, self.negative_slope, + self.scale) + + +def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): + """Fused bias leaky ReLU function. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + Args: + input (torch.Tensor): Input feature map. + bias (nn.Parameter): The bias from convolution operation. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + + Returns: + torch.Tensor: Feature map after non-linear activation. + """ + + if not input.is_cuda: + return bias_leakyrelu_ref(input, bias, negative_slope, scale) + + return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), + negative_slope, scale) + + +def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): + + if bias is not None: + assert bias.ndim == 1 + assert bias.shape[0] == x.shape[1] + x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) + + x = F.leaky_relu(x, negative_slope) + if scale != 1: + x = x * scale + + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/gather_points.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/gather_points.py new file mode 100644 index 0000000000000000000000000000000000000000..f52f1677d8ea0facafc56a3672d37adb44677ff3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/gather_points.py @@ -0,0 +1,57 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['gather_points_forward', 'gather_points_backward']) + + +class GatherPoints(Function): + """Gather points with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) features to gather. + indices (Tensor): (B, M) where M is the number of points. + + Returns: + Tensor: (B, C, M) where M is the number of points. + """ + assert features.is_contiguous() + assert indices.is_contiguous() + + B, npoint = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + ext_module.gather_points_forward( + features, indices, output, b=B, c=C, n=N, npoints=npoint) + + ctx.for_backwards = (indices, C, N) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(indices) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + grad_out_data = grad_out.data.contiguous() + ext_module.gather_points_backward( + grad_out_data, + idx, + grad_features.data, + b=B, + c=C, + n=N, + npoints=npoint) + return grad_features, None + + +gather_points = GatherPoints.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/group_points.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/group_points.py new file mode 100644 index 0000000000000000000000000000000000000000..6c3ec9d758ebe4e1c2205882af4be154008253a5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/group_points.py @@ -0,0 +1,224 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple + +import torch +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader +from .ball_query import ball_query +from .knn import knn + +ext_module = ext_loader.load_ext( + '_ext', ['group_points_forward', 'group_points_backward']) + + +class QueryAndGroup(nn.Module): + """Groups points with a ball query of radius. + + Args: + max_radius (float): The maximum radius of the balls. + If None is given, we will use kNN sampling instead of ball query. + sample_num (int): Maximum number of features to gather in the ball. + min_radius (float, optional): The minimum radius of the balls. + Default: 0. + use_xyz (bool, optional): Whether to use xyz. + Default: True. + return_grouped_xyz (bool, optional): Whether to return grouped xyz. + Default: False. + normalize_xyz (bool, optional): Whether to normalize xyz. + Default: False. + uniform_sample (bool, optional): Whether to sample uniformly. + Default: False + return_unique_cnt (bool, optional): Whether to return the count of + unique samples. Default: False. + return_grouped_idx (bool, optional): Whether to return grouped idx. + Default: False. + """ + + def __init__(self, + max_radius, + sample_num, + min_radius=0, + use_xyz=True, + return_grouped_xyz=False, + normalize_xyz=False, + uniform_sample=False, + return_unique_cnt=False, + return_grouped_idx=False): + super().__init__() + self.max_radius = max_radius + self.min_radius = min_radius + self.sample_num = sample_num + self.use_xyz = use_xyz + self.return_grouped_xyz = return_grouped_xyz + self.normalize_xyz = normalize_xyz + self.uniform_sample = uniform_sample + self.return_unique_cnt = return_unique_cnt + self.return_grouped_idx = return_grouped_idx + if self.return_unique_cnt: + assert self.uniform_sample, \ + 'uniform_sample should be True when ' \ + 'returning the count of unique samples' + if self.max_radius is None: + assert not self.normalize_xyz, \ + 'can not normalize grouped xyz when max_radius is None' + + def forward(self, points_xyz, center_xyz, features=None): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. + """ + # if self.max_radius is None, we will perform kNN instead of ball query + # idx is of shape [B, npoint, sample_num] + if self.max_radius is None: + idx = knn(self.sample_num, points_xyz, center_xyz, False) + idx = idx.transpose(1, 2).contiguous() + else: + idx = ball_query(self.min_radius, self.max_radius, self.sample_num, + points_xyz, center_xyz) + + if self.uniform_sample: + unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) + for i_batch in range(idx.shape[0]): + for i_region in range(idx.shape[1]): + unique_ind = torch.unique(idx[i_batch, i_region, :]) + num_unique = unique_ind.shape[0] + unique_cnt[i_batch, i_region] = num_unique + sample_ind = torch.randint( + 0, + num_unique, (self.sample_num - num_unique, ), + dtype=torch.long) + all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) + idx[i_batch, i_region, :] = all_ind + + xyz_trans = points_xyz.transpose(1, 2).contiguous() + # (B, 3, npoint, sample_num) + grouped_xyz = grouping_operation(xyz_trans, idx) + grouped_xyz_diff = grouped_xyz - \ + center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets + if self.normalize_xyz: + grouped_xyz_diff /= self.max_radius + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + # (B, C + 3, npoint, sample_num) + new_features = torch.cat([grouped_xyz_diff, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + assert (self.use_xyz + ), 'Cannot have not features and not use xyz as a feature!' + new_features = grouped_xyz_diff + + ret = [new_features] + if self.return_grouped_xyz: + ret.append(grouped_xyz) + if self.return_unique_cnt: + ret.append(unique_cnt) + if self.return_grouped_idx: + ret.append(idx) + if len(ret) == 1: + return ret[0] + else: + return tuple(ret) + + +class GroupAll(nn.Module): + """Group xyz with feature. + + Args: + use_xyz (bool): Whether to use xyz. + """ + + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, + xyz: torch.Tensor, + new_xyz: torch.Tensor, + features: torch.Tensor = None): + """ + Args: + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + new_xyz (Tensor): new xyz coordinates of the features. + features (Tensor): (B, C, N) features to group. + + Returns: + Tensor: (B, C + 3, 1, N) Grouped feature. + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + # (B, 3 + C, 1, N) + new_features = torch.cat([grouped_xyz, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features + + +class GroupingOperation(Function): + """Group feature with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) tensor of features to group. + indices (Tensor): (B, npoint, nsample) the indices of + features to group with. + + Returns: + Tensor: (B, C, npoint, nsample) Grouped features. + """ + features = features.contiguous() + indices = indices.contiguous() + + B, nfeatures, nsample = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, + indices, output) + + ctx.for_backwards = (indices, N) + return output + + @staticmethod + def backward(ctx, + grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients + of the output from forward. + + Returns: + Tensor: (B, C, N) gradient of the features. + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + + grad_out_data = grad_out.data.contiguous() + ext_module.group_points_backward(B, C, N, npoint, nsample, + grad_out_data, idx, + grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/info.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/info.py new file mode 100644 index 0000000000000000000000000000000000000000..29f2e5598ae2bb5866ccd15a7d3b4de33c0cd14d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/info.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os + +import torch + +if torch.__version__ == 'parrots': + import parrots + + def get_compiler_version(): + return 'GCC ' + parrots.version.compiler + + def get_compiling_cuda_version(): + return parrots.version.cuda +else: + from ..utils import ext_loader + ext_module = ext_loader.load_ext( + '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + def get_compiler_version(): + return ext_module.get_compiler_version() + + def get_compiling_cuda_version(): + return ext_module.get_compiling_cuda_version() + + +def get_onnxruntime_op_path(): + wildcard = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(__file__))), + '_ext_ort.*.so') + + paths = glob.glob(wildcard) + if len(paths) > 0: + return paths[0] + else: + return '' diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/iou3d.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/iou3d.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc71979190323f44c09f8b7e1761cf49cd2d76b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/iou3d.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', + 'iou3d_nms_normal_forward' +]) + + +def boxes_iou_bev(boxes_a, boxes_b): + """Calculate boxes IoU in the Bird's Eye View. + + Args: + boxes_a (torch.Tensor): Input boxes a with shape (M, 5). + boxes_b (torch.Tensor): Input boxes b with shape (N, 5). + + Returns: + ans_iou (torch.Tensor): IoU result with shape (M, N). + """ + ans_iou = boxes_a.new_zeros( + torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) + + ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), + boxes_b.contiguous(), ans_iou) + + return ans_iou + + +def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): + """NMS function GPU implementation (for BEV boxes). The overlap of two + boxes for IoU calculation is defined as the exact overlapping area of the + two boxes. In this function, one can also set ``pre_max_size`` and + ``post_max_size``. + + Args: + boxes (torch.Tensor): Input boxes with the shape of [N, 5] + ([x1, y1, x2, y2, ry]). + scores (torch.Tensor): Scores of boxes with the shape of [N]. + thresh (float): Overlap threshold of NMS. + pre_max_size (int, optional): Max size of boxes before NMS. + Default: None. + post_max_size (int, optional): Max size of boxes after NMS. + Default: None. + + Returns: + torch.Tensor: Indexes after NMS. + """ + assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + if pre_max_size is not None: + order = order[:pre_max_size] + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh) + keep = order[keep[:num_out].cuda(boxes.device)].contiguous() + if post_max_size is not None: + keep = keep[:post_max_size] + return keep + + +def nms_normal_bev(boxes, scores, thresh): + """Normal NMS function GPU implementation (for BEV boxes). The overlap of + two boxes for IoU calculation is defined as the exact overlapping area of + the two boxes WITH their yaw angle set to 0. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (float): Overlap threshold of NMS. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. + """ + assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh) + return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/knn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..f335785036669fc19239825b0aae6dde3f73bf92 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/knn.py @@ -0,0 +1,77 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['knn_forward']) + + +class KNN(Function): + r"""KNN (CUDA) based on heap data structure. + Modified from `PAConv `_. + + Find k-nearest points. + """ + + @staticmethod + def forward(ctx, + k: int, + xyz: torch.Tensor, + center_xyz: torch.Tensor = None, + transposed: bool = False) -> torch.Tensor: + """ + Args: + k (int): number of nearest neighbors. + xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). + xyz coordinates of the features. + center_xyz (Tensor, optional): (B, npoint, 3) if transposed == + False, else (B, 3, npoint). centers of the knn query. + Default: None. + transposed (bool, optional): whether the input tensors are + transposed. Should not explicitly use this keyword when + calling knn (=KNN.apply), just add the fourth param. + Default: False. + + Returns: + Tensor: (B, k, npoint) tensor with the indices of + the features that form k-nearest neighbours. + """ + assert (k > 0) & (k < 100), 'k should be in range(0, 100)' + + if center_xyz is None: + center_xyz = xyz + + if transposed: + xyz = xyz.transpose(2, 1).contiguous() + center_xyz = center_xyz.transpose(2, 1).contiguous() + + assert xyz.is_contiguous() # [B, N, 3] + assert center_xyz.is_contiguous() # [B, npoint, 3] + + center_xyz_device = center_xyz.get_device() + assert center_xyz_device == xyz.get_device(), \ + 'center_xyz and xyz should be put on the same device' + if torch.cuda.current_device() != center_xyz_device: + torch.cuda.set_device(center_xyz_device) + + B, npoint, _ = center_xyz.shape + N = xyz.shape[1] + + idx = center_xyz.new_zeros((B, npoint, k)).int() + dist2 = center_xyz.new_zeros((B, npoint, k)).float() + + ext_module.knn_forward( + xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) + # idx shape to [B, k, npoint] + idx = idx.transpose(2, 1).contiguous() + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None + + +knn = KNN.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/masked_conv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/masked_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..cd514cc204c1d571ea5dc7e74b038c0f477a008b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/masked_conv.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) + + +class MaskedConv2dFunction(Function): + + @staticmethod + def symbolic(g, features, mask, weight, bias, padding, stride): + return g.op( + 'mmcv::MMCVMaskedConv2d', + features, + mask, + weight, + bias, + padding_i=padding, + stride_i=stride) + + @staticmethod + def forward(ctx, features, mask, weight, bias, padding=0, stride=1): + assert mask.dim() == 3 and mask.size(0) == 1 + assert features.dim() == 4 and features.size(0) == 1 + assert features.size()[2:] == mask.size()[1:] + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair(stride) + if stride_h != 1 or stride_w != 1: + raise ValueError( + 'Stride could not only be 1 in masked_conv2d currently.') + out_channel, in_channel, kernel_h, kernel_w = weight.size() + + batch_size = features.size(0) + out_h = int( + math.floor((features.size(2) + 2 * pad_h - + (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int( + math.floor((features.size(3) + 2 * pad_w - + (kernel_h - 1) - 1) / stride_w + 1)) + mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) + output = features.new_zeros(batch_size, out_channel, out_h, out_w) + if mask_inds.numel() > 0: + mask_h_idx = mask_inds[:, 0].contiguous() + mask_w_idx = mask_inds[:, 1].contiguous() + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, + mask_inds.size(0)) + ext_module.masked_im2col_forward( + features, + mask_h_idx, + mask_w_idx, + data_col, + kernel_h=kernel_h, + kernel_w=kernel_w, + pad_h=pad_h, + pad_w=pad_w) + + masked_output = torch.addmm(1, bias[:, None], 1, + weight.view(out_channel, -1), data_col) + ext_module.masked_col2im_forward( + masked_output, + mask_h_idx, + mask_w_idx, + output, + height=out_h, + width=out_w, + channels=out_channel) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + return (None, ) * 5 + + +masked_conv2d = MaskedConv2dFunction.apply + + +class MaskedConv2d(nn.Conv2d): + """A MaskedConv2d which inherits the official Conv2d. + + The masked forward doesn't implement the backward function and only + supports the stride parameter to be 1 currently. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super(MaskedConv2d, + self).__init__(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, bias) + + def forward(self, input, mask=None): + if mask is None: # fallback to the normal Conv2d + return super(MaskedConv2d, self).forward(input) + else: + return masked_conv2d(input, mask, self.weight, self.bias, + self.padding) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/merge_cells.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/merge_cells.py new file mode 100644 index 0000000000000000000000000000000000000000..48ca8cc0a8aca8432835bd760c0403a3c35b34cf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/merge_cells.py @@ -0,0 +1,149 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..cnn import ConvModule + + +class BaseMergeCell(nn.Module): + """The basic class for cells used in NAS-FPN and NAS-FCOS. + + BaseMergeCell takes 2 inputs. After applying convolution + on them, they are resized to the target size. Then, + they go through binary_op, which depends on the type of cell. + If with_out_conv is True, the result of output will go through + another convolution layer. + + Args: + in_channels (int): number of input channels in out_conv layer. + out_channels (int): number of output channels in out_conv layer. + with_out_conv (bool): Whether to use out_conv layer + out_conv_cfg (dict): Config dict for convolution layer, which should + contain "groups", "kernel_size", "padding", "bias" to build + out_conv layer. + out_norm_cfg (dict): Config dict for normalization layer in out_conv. + out_conv_order (tuple): The order of conv/norm/activation layers in + out_conv. + with_input1_conv (bool): Whether to use convolution on input1. + with_input2_conv (bool): Whether to use convolution on input2. + input_conv_cfg (dict): Config dict for building input1_conv layer and + input2_conv layer, which is expected to contain the type of + convolution. + Default: None, which means using conv2d. + input_norm_cfg (dict): Config dict for normalization layer in + input1_conv and input2_conv layer. Default: None. + upsample_mode (str): Interpolation method used to resize the output + of input1_conv and input2_conv to target size. Currently, we + support ['nearest', 'bilinear']. Default: 'nearest'. + """ + + def __init__(self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict( + groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest'): + super(BaseMergeCell, self).__init__() + assert upsample_mode in ['nearest', 'bilinear'] + self.with_out_conv = with_out_conv + self.with_input1_conv = with_input1_conv + self.with_input2_conv = with_input2_conv + self.upsample_mode = upsample_mode + + if self.with_out_conv: + self.out_conv = ConvModule( + fused_channels, + out_channels, + **out_conv_cfg, + norm_cfg=out_norm_cfg, + order=out_conv_order) + + self.input1_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input1_conv else nn.Sequential() + self.input2_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input2_conv else nn.Sequential() + + def _build_input_conv(self, channel, conv_cfg, norm_cfg): + return ConvModule( + channel, + channel, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True) + + @abstractmethod + def _binary_op(self, x1, x2): + pass + + def _resize(self, x, size): + if x.shape[-2:] == size: + return x + elif x.shape[-2:] < size: + return F.interpolate(x, size=size, mode=self.upsample_mode) + else: + assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 + kernel_size = x.shape[-1] // size[-1] + x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) + return x + + def forward(self, x1, x2, out_size=None): + assert x1.shape[:2] == x2.shape[:2] + assert out_size is None or len(out_size) == 2 + if out_size is None: # resize to larger one + out_size = max(x1.size()[2:], x2.size()[2:]) + + x1 = self.input1_conv(x1) + x2 = self.input2_conv(x2) + + x1 = self._resize(x1, out_size) + x2 = self._resize(x2, out_size) + + x = self._binary_op(x1, x2) + if self.with_out_conv: + x = self.out_conv(x) + return x + + +class SumCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(SumCell, self).__init__(in_channels, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + return x1 + x2 + + +class ConcatCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(ConcatCell, self).__init__(in_channels * 2, out_channels, + **kwargs) + + def _binary_op(self, x1, x2): + ret = torch.cat([x1, x2], dim=1) + return ret + + +class GlobalPoolingCell(BaseMergeCell): + + def __init__(self, in_channels=None, out_channels=None, **kwargs): + super().__init__(in_channels, out_channels, **kwargs) + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + + def _binary_op(self, x1, x2): + x2_att = self.global_pool(x2).sigmoid() + return x2 + x2_att * x1 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..f97278361d5262b1a87432dc5e3eb842b39ceb10 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py @@ -0,0 +1,282 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from annotator.mmpkg.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext( + '_ext', + ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, input, offset, mask, weight, bias, stride, padding, + dilation, groups, deform_groups): + input_tensors = [input, offset, mask, weight] + if bias is not None: + input_tensors.append(bias) + return g.op( + 'mmcv::MMCVModulatedDeformConv2d', + *input_tensors, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups) + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(0) # fake tensor + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty( + ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + ext_module.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + grad_output = grad_output.contiguous() + ext_module.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None) + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply + + +class ModulatedDeformConv2d(nn.Module): + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='ModulatedDeformConv2d') + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True): + super(ModulatedDeformConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, + *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + +@CONV_LAYERS.register_module('DCNv2') +class ModulatedDeformConv2dPack(ModulatedDeformConv2d): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv + layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int): Same as nn.Conv2d, while tuple is not supported. + padding (int): Same as nn.Conv2d, while tuple is not supported. + dilation (int): Same as nn.Conv2d, while tuple is not supported. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConv2dPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, ModulatedDeformConvPack + # loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..fe755eaa931565aab77ecc387990328c01447343 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py @@ -0,0 +1,358 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import Function, once_differentiable + +from annotator.mmpkg.mmcv import deprecated_api_warning +from annotator.mmpkg.mmcv.cnn import constant_init, xavier_init +from annotator.mmpkg.mmcv.cnn.bricks.registry import ATTENTION +from annotator.mmpkg.mmcv.runner import BaseModule +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) + + +class MultiScaleDeformableAttnFunction(Function): + + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, + sampling_locations, attention_weights, im2col_step): + """GPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + im2col_step (Tensor): The step used in image to column. + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + ctx.im2col_step = im2col_step + output = ext_module.ms_deform_attn_forward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + im2col_step=ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, + value_level_start_index, sampling_locations, + attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + """GPU version of backward function. + + Args: + grad_output (Tensor): Gradient + of output tensor of forward. + + Returns: + Tuple[Tensor]: Gradient + of input tensors in forward. + """ + value, value_spatial_shapes, value_level_start_index,\ + sampling_locations, attention_weights = ctx.saved_tensors + grad_value = torch.zeros_like(value) + grad_sampling_loc = torch.zeros_like(sampling_locations) + grad_attn_weight = torch.zeros_like(attention_weights) + + ext_module.ms_deform_attn_backward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + grad_output.contiguous(), + grad_value, + grad_sampling_loc, + grad_attn_weight, + im2col_step=ctx.im2col_step) + + return grad_value, None, None, \ + grad_sampling_loc, grad_attn_weight, None + + +def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, + sampling_locations, attention_weights): + """CPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ =\ + sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for level, (H_, W_) in enumerate(value_spatial_shapes): + # bs, H_*W_, num_heads, embed_dims -> + # bs, H_*W_, num_heads*embed_dims -> + # bs, num_heads*embed_dims, H_*W_ -> + # bs*num_heads, embed_dims, H_, W_ + value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape( + bs * num_heads, embed_dims, H_, W_) + # bs, num_queries, num_heads, num_points, 2 -> + # bs, num_heads, num_queries, num_points, 2 -> + # bs*num_heads, num_queries, num_points, 2 + sampling_grid_l_ = sampling_grids[:, :, :, + level].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (bs, num_queries, num_heads, num_levels, num_points) -> + # (bs, num_heads, num_queries, num_levels, num_points) -> + # (bs, num_heads, 1, num_queries, num_levels*num_points) + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * + attention_weights).sum(-1).view(bs, num_heads * embed_dims, + num_queries) + return output.transpose(1, 2).contiguous() + + +@ATTENTION.register_module() +class MultiScaleDeformableAttention(BaseModule): + """An attention module used in Deformable-Detr. + + `Deformable DETR: Deformable Transformers for End-to-End Object Detection. + `_. + + Args: + embed_dims (int): The embedding dimension of Attention. + Default: 256. + num_heads (int): Parallel attention heads. Default: 64. + num_levels (int): The number of feature map used in + Attention. Default: 4. + num_points (int): The number of sampling points for + each query in each head. Default: 4. + im2col_step (int): The step used in image_to_column. + Default: 64. + dropout (float): A Dropout layer on `inp_identity`. + Default: 0.1. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + norm_cfg (dict): Config dict for normalization layer. + Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims=256, + num_heads=8, + num_levels=4, + num_points=4, + im2col_step=64, + dropout=0.1, + batch_first=False, + norm_cfg=None, + init_cfg=None): + super().__init__(init_cfg) + if embed_dims % num_heads != 0: + raise ValueError(f'embed_dims must be divisible by num_heads, ' + f'but got {embed_dims} and {num_heads}') + dim_per_head = embed_dims // num_heads + self.norm_cfg = norm_cfg + self.dropout = nn.Dropout(dropout) + self.batch_first = batch_first + + # you'd better set dim_per_head to a power of 2 + # which is more efficient in the CUDA implementation + def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError( + 'invalid input for _is_power_of_2: {} (type: {})'.format( + n, type(n))) + return (n & (n - 1) == 0) and n != 0 + + if not _is_power_of_2(dim_per_head): + warnings.warn( + "You'd better set embed_dims in " + 'MultiScaleDeformAttention to make ' + 'the dimension of each attention head a power of 2 ' + 'which is more efficient in our CUDA implementation.') + + self.im2col_step = im2col_step + self.embed_dims = embed_dims + self.num_levels = num_levels + self.num_heads = num_heads + self.num_points = num_points + self.sampling_offsets = nn.Linear( + embed_dims, num_heads * num_levels * num_points * 2) + self.attention_weights = nn.Linear(embed_dims, + num_heads * num_levels * num_points) + self.value_proj = nn.Linear(embed_dims, embed_dims) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.init_weights() + + def init_weights(self): + """Default initialization for Parameters of Module.""" + constant_init(self.sampling_offsets, 0.) + thetas = torch.arange( + self.num_heads, + dtype=torch.float32) * (2.0 * math.pi / self.num_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / + grid_init.abs().max(-1, keepdim=True)[0]).view( + self.num_heads, 1, 1, + 2).repeat(1, self.num_levels, self.num_points, 1) + for i in range(self.num_points): + grid_init[:, :, i, :] *= i + 1 + + self.sampling_offsets.bias.data = grid_init.view(-1) + constant_init(self.attention_weights, val=0., bias=0.) + xavier_init(self.value_proj, distribution='uniform', bias=0.) + xavier_init(self.output_proj, distribution='uniform', bias=0.) + self._is_init = True + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiScaleDeformableAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_padding_mask=None, + reference_points=None, + spatial_shapes=None, + level_start_index=None, + **kwargs): + """Forward Function of MultiScaleDeformAttention. + + Args: + query (Tensor): Query of Transformer with shape + (num_query, bs, embed_dims). + key (Tensor): The key tensor with shape + `(num_key, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_key, bs, embed_dims)`. + identity (Tensor): The tensor used for addition, with the + same shape as `query`. Default None. If None, + `query` will be used. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. Default + None. + reference_points (Tensor): The normalized reference + points with shape (bs, num_query, num_levels, 2), + all elements is range in [0, 1], top-left (0,0), + bottom-right (1, 1), including padding area. + or (N, Length_{query}, num_levels, 4), add + additional two dimensions is (w, h) to + form reference boxes. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_key]. + spatial_shapes (Tensor): Spatial shape of features in + different levels. With shape (num_levels, 2), + last dimension represents (h, w). + level_start_index (Tensor): The start index of each level. + A tensor has shape ``(num_levels, )`` and can be represented + as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. + + Returns: + Tensor: forwarded results with shape [num_query, bs, embed_dims]. + """ + + if value is None: + value = query + + if identity is None: + identity = query + if query_pos is not None: + query = query + query_pos + if not self.batch_first: + # change to (bs, num_query ,embed_dims) + query = query.permute(1, 0, 2) + value = value.permute(1, 0, 2) + + bs, num_query, _ = query.shape + bs, num_value, _ = value.shape + assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value + + value = self.value_proj(value) + if key_padding_mask is not None: + value = value.masked_fill(key_padding_mask[..., None], 0.0) + value = value.view(bs, num_value, self.num_heads, -1) + sampling_offsets = self.sampling_offsets(query).view( + bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) + attention_weights = self.attention_weights(query).view( + bs, num_query, self.num_heads, self.num_levels * self.num_points) + attention_weights = attention_weights.softmax(-1) + + attention_weights = attention_weights.view(bs, num_query, + self.num_heads, + self.num_levels, + self.num_points) + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack( + [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) + sampling_locations = reference_points[:, :, None, :, None, :] \ + + sampling_offsets \ + / offset_normalizer[None, None, None, :, None, :] + elif reference_points.shape[-1] == 4: + sampling_locations = reference_points[:, :, None, :, None, :2] \ + + sampling_offsets / self.num_points \ + * reference_points[:, :, None, :, None, 2:] \ + * 0.5 + else: + raise ValueError( + f'Last dim of reference_points must be' + f' 2 or 4, but get {reference_points.shape[-1]} instead.') + if torch.cuda.is_available() and value.is_cuda: + output = MultiScaleDeformableAttnFunction.apply( + value, spatial_shapes, level_start_index, sampling_locations, + attention_weights, self.im2col_step) + else: + output = multi_scale_deformable_attn_pytorch( + value, spatial_shapes, sampling_locations, attention_weights) + + output = self.output_proj(output) + + if not self.batch_first: + # (num_query, bs ,embed_dims) + output = output.permute(1, 0, 2) + + return self.dropout(output) + identity diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/nms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..908ac66645eef29fb55fce82497eb9f6af1a2667 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/nms.py @@ -0,0 +1,417 @@ +import os + +import numpy as np +import torch + +from annotator.mmpkg.mmcv.utils import deprecated_api_warning +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) + + +# This function is modified from: https://github.com/pytorch/vision/ +class NMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + is_filtering_by_score = score_threshold > 0 + if is_filtering_by_score: + valid_mask = scores > score_threshold + bboxes, scores = bboxes[valid_mask], scores[valid_mask] + valid_inds = torch.nonzero( + valid_mask, as_tuple=False).squeeze(dim=1) + + inds = ext_module.nms( + bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + + if max_num > 0: + inds = inds[:max_num] + if is_filtering_by_score: + inds = valid_inds[inds] + return inds + + @staticmethod + def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + # TensorRT nms plugin is aligned with original nms in ONNXRuntime + is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' + if has_custom_op and (not is_trt_backend): + return g.op( + 'mmcv::NonMaxSuppression', + bboxes, + scores, + iou_threshold_f=float(iou_threshold), + offset_i=int(offset)) + else: + from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze + from ..onnx.onnx_utils.symbolic_helper import _size_helper + + boxes = unsqueeze(g, bboxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + + if max_num > 0: + max_num = g.op( + 'Constant', + value_t=torch.tensor(max_num, dtype=torch.long)) + else: + dim = g.op('Constant', value_t=torch.tensor(0)) + max_num = _size_helper(g, bboxes, dim) + max_output_per_class = max_num + iou_threshold = g.op( + 'Constant', + value_t=torch.tensor([iou_threshold], dtype=torch.float)) + score_threshold = g.op( + 'Constant', + value_t=torch.tensor([score_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, + max_output_per_class, iou_threshold, + score_threshold) + return squeeze( + g, + select( + g, nms_out, 1, + g.op( + 'Constant', + value_t=torch.tensor([2], dtype=torch.long))), 1) + + +class SoftNMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + inds = ext_module.softnms( + boxes.cpu(), + scores.cpu(), + dets.cpu(), + iou_threshold=float(iou_threshold), + sigma=float(sigma), + min_score=float(min_score), + method=int(method), + offset=int(offset)) + return dets, inds + + @staticmethod + def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + from packaging import version + assert version.parse(torch.__version__) >= version.parse('1.7.0') + nms_out = g.op( + 'mmcv::SoftNonMaxSuppression', + boxes, + scores, + iou_threshold_f=float(iou_threshold), + sigma_f=float(sigma), + min_score_f=float(min_score), + method_i=int(method), + offset_i=int(offset), + outputs=2) + return nms_out + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): + """Dispatch to either CPU or GPU NMS implementations. + + The input can be either torch tensor or numpy array. GPU NMS will be used + if the input is gpu tensor, otherwise CPU NMS + will be used. The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + score_threshold (float): score threshold for NMS. + max_num (int): maximum number of boxes after NMS. + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], + >>> [49.3, 32.9, 51.0, 35.3], + >>> [49.2, 31.8, 51.0, 35.4], + >>> [35.1, 11.5, 39.1, 15.7], + >>> [35.6, 11.8, 39.3, 14.2], + >>> [35.3, 11.5, 39.9, 14.5], + >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ + dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = nms(boxes, scores, iou_threshold) + >>> assert len(inds) == len(dets) == 3 + """ + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + + if torch.__version__ == 'parrots': + indata_list = [boxes, scores] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'offset': int(offset) + } + inds = ext_module.nms(*indata_list, **indata_dict) + else: + inds = NMSop.apply(boxes, scores, iou_threshold, offset, + score_threshold, max_num) + dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def soft_nms(boxes, + scores, + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='linear', + offset=0): + """Dispatch to only CPU Soft NMS implementations. + + The input can be either a torch tensor or numpy array. + The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + sigma (float): hyperparameter for gaussian method + min_score (float): score filter threshold + method (str): either 'linear' or 'gaussian' + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[4., 3., 5., 3.], + >>> [4., 3., 5., 4.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) + >>> assert len(inds) == len(dets) == 5 + """ + + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} + assert method in method_dict.keys() + + if torch.__version__ == 'parrots': + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'sigma': float(sigma), + 'min_score': min_score, + 'method': method_dict[method], + 'offset': int(offset) + } + inds = ext_module.softnms(*indata_list, **indata_dict) + else: + dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(), + float(iou_threshold), float(sigma), + float(min_score), method_dict[method], + int(offset)) + + dets = dets[:inds.size(0)] + + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + else: + return dets.to(device=boxes.device), inds.to(device=boxes.device) + + +def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): + """Performs non-maximum suppression in a batched fashion. + + Modified from https://github.com/pytorch/vision/blob + /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. + In order to perform NMS independently per class, we add an offset to all + the boxes. The offset is dependent only on the class idx, and is large + enough so that boxes from different classes do not overlap. + + Arguments: + boxes (torch.Tensor): boxes in shape (N, 4). + scores (torch.Tensor): scores in shape (N, ). + idxs (torch.Tensor): each index value correspond to a bbox cluster, + and NMS will not be applied between elements of different idxs, + shape (N, ). + nms_cfg (dict): specify nms type and other parameters like iou_thr. + Possible keys includes the following. + + - iou_thr (float): IoU threshold used for NMS. + - split_thr (float): threshold number of boxes. In some cases the + number of boxes is large (e.g., 200k). To avoid OOM during + training, the users could set `split_thr` to a small value. + If the number of boxes is greater than the threshold, it will + perform NMS on each group of boxes separately and sequentially. + Defaults to 10000. + class_agnostic (bool): if true, nms is class agnostic, + i.e. IoU thresholding happens over all boxes, + regardless of the predicted class. + + Returns: + tuple: kept dets and indice. + """ + nms_cfg_ = nms_cfg.copy() + class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) + if class_agnostic: + boxes_for_nms = boxes + else: + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + + nms_type = nms_cfg_.pop('type', 'nms') + nms_op = eval(nms_type) + + split_thr = nms_cfg_.pop('split_thr', 10000) + # Won't split to multiple nms nodes when exporting to onnx + if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): + dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) + boxes = boxes[keep] + # -1 indexing works abnormal in TensorRT + # This assumes `dets` has 5 dimensions where + # the last dimension is score. + # TODO: more elegant way to handle the dimension issue. + # Some type of nms would reweight the score, such as SoftNMS + scores = dets[:, 4] + else: + max_num = nms_cfg_.pop('max_num', -1) + total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + # Some type of nms would reweight the score, such as SoftNMS + scores_after_nms = scores.new_zeros(scores.size()) + for id in torch.unique(idxs): + mask = (idxs == id).nonzero(as_tuple=False).view(-1) + dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) + total_mask[mask[keep]] = True + scores_after_nms[mask[keep]] = dets[:, -1] + keep = total_mask.nonzero(as_tuple=False).view(-1) + + scores, inds = scores_after_nms[keep].sort(descending=True) + keep = keep[inds] + boxes = boxes[keep] + + if max_num > 0: + keep = keep[:max_num] + boxes = boxes[:max_num] + scores = scores[:max_num] + + return torch.cat([boxes, scores[:, None]], -1), keep + + +def nms_match(dets, iou_threshold): + """Matched dets into different groups by NMS. + + NMS match is Similar to NMS but when a bbox is suppressed, nms match will + record the indice of suppressed bbox and form a group with the indice of + kept bbox. In each group, indice is sorted as score order. + + Arguments: + dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). + iou_thr (float): IoU thresh for NMS. + + Returns: + List[torch.Tensor | np.ndarray]: The outer list corresponds different + matched group, the inner Tensor corresponds the indices for a group + in score order. + """ + if dets.shape[0] == 0: + matched = [] + else: + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ + f'but get {dets.shape}' + if isinstance(dets, torch.Tensor): + dets_t = dets.detach().cpu() + else: + dets_t = torch.from_numpy(dets) + indata_list = [dets_t] + indata_dict = {'iou_threshold': float(iou_threshold)} + matched = ext_module.nms_match(*indata_list, **indata_dict) + if torch.__version__ == 'parrots': + matched = matched.tolist() + + if isinstance(dets, torch.Tensor): + return [dets.new_tensor(m, dtype=torch.long) for m in matched] + else: + return [np.array(m, dtype=np.int) for m in matched] + + +def nms_rotated(dets, scores, iou_threshold, labels=None): + """Performs non-maximum suppression (NMS) on the rotated boxes according to + their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Args: + boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ + be in (x_ctr, y_ctr, width, height, angle_radian) format. + scores (Tensor): scores in shape (N, ). + iou_threshold (float): IoU thresh for NMS. + labels (Tensor): boxes' label in shape (N,). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + """ + if dets.shape[0] == 0: + return dets, None + multi_label = labels is not None + if multi_label: + dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) + else: + dets_wl = dets + _, order = scores.sort(0, descending=True) + dets_sorted = dets_wl.index_select(0, order) + + if torch.__version__ == 'parrots': + keep_inds = ext_module.nms_rotated( + dets_wl, + scores, + order, + dets_sorted, + iou_threshold=iou_threshold, + multi_label=multi_label) + else: + keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, + iou_threshold, multi_label) + dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), + dim=1) + return dets, keep_inds diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/pixel_group.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/pixel_group.py new file mode 100644 index 0000000000000000000000000000000000000000..2143c75f835a467c802fc3c37ecd3ac0f85bcda4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/pixel_group.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['pixel_group']) + + +def pixel_group(score, mask, embedding, kernel_label, kernel_contour, + kernel_region_num, distance_threshold): + """Group pixels into text instances, which is widely used text detection + methods. + + Arguments: + score (np.array or Tensor): The foreground score with size hxw. + mask (np.array or Tensor): The foreground mask with size hxw. + embedding (np.array or Tensor): The embedding with size hxwxc to + distinguish instances. + kernel_label (np.array or Tensor): The instance kernel index with + size hxw. + kernel_contour (np.array or Tensor): The kernel contour with size hxw. + kernel_region_num (int): The instance kernel region number. + distance_threshold (float): The embedding distance threshold between + kernel and pixel in one instance. + + Returns: + pixel_assignment (List[List[float]]): The instance coordinate list. + Each element consists of averaged confidence, pixel number, and + coordinates (x_i, y_i for all pixels) in order. + """ + assert isinstance(score, (torch.Tensor, np.ndarray)) + assert isinstance(mask, (torch.Tensor, np.ndarray)) + assert isinstance(embedding, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_region_num, int) + assert isinstance(distance_threshold, float) + + if isinstance(score, np.ndarray): + score = torch.from_numpy(score) + if isinstance(mask, np.ndarray): + mask = torch.from_numpy(mask) + if isinstance(embedding, np.ndarray): + embedding = torch.from_numpy(embedding) + if isinstance(kernel_label, np.ndarray): + kernel_label = torch.from_numpy(kernel_label) + if isinstance(kernel_contour, np.ndarray): + kernel_contour = torch.from_numpy(kernel_contour) + + if torch.__version__ == 'parrots': + label = ext_module.pixel_group( + score, + mask, + embedding, + kernel_label, + kernel_contour, + kernel_region_num=kernel_region_num, + distance_threshold=distance_threshold) + label = label.tolist() + label = label[0] + list_index = kernel_region_num + pixel_assignment = [] + for x in range(kernel_region_num): + pixel_assignment.append( + np.array( + label[list_index:list_index + int(label[x])], + dtype=np.float)) + list_index = list_index + int(label[x]) + else: + pixel_assignment = ext_module.pixel_group(score, mask, embedding, + kernel_label, kernel_contour, + kernel_region_num, + distance_threshold) + return pixel_assignment diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/point_sample.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/point_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..08b1617805fa84e1c8afc61f3263b4b86bd2a136 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/point_sample.py @@ -0,0 +1,336 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa + +from os import path as osp + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair +from torch.onnx.operators import shape_as_tensor + + +def bilinear_grid_sample(im, grid, align_corners=False): + """Given an input and a flow-field grid, computes the output using input + values and pixel locations from grid. Supported only bilinear interpolation + method to sample the input pixels. + + Args: + im (torch.Tensor): Input feature map, shape (N, C, H, W) + grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) + align_corners {bool}: If set to True, the extrema (-1 and 1) are + considered as referring to the center points of the input’s + corner pixels. If set to False, they are instead considered as + referring to the corner points of the input’s corner pixels, + making the sampling more resolution agnostic. + Returns: + torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) + """ + n, c, h, w = im.shape + gn, gh, gw, _ = grid.shape + assert n == gn + + x = grid[:, :, :, 0] + y = grid[:, :, :, 1] + + if align_corners: + x = ((x + 1) / 2) * (w - 1) + y = ((y + 1) / 2) * (h - 1) + else: + x = ((x + 1) * w - 1) / 2 + y = ((y + 1) * h - 1) / 2 + + x = x.view(n, -1) + y = y.view(n, -1) + + x0 = torch.floor(x).long() + y0 = torch.floor(y).long() + x1 = x0 + 1 + y1 = y0 + 1 + + wa = ((x1 - x) * (y1 - y)).unsqueeze(1) + wb = ((x1 - x) * (y - y0)).unsqueeze(1) + wc = ((x - x0) * (y1 - y)).unsqueeze(1) + wd = ((x - x0) * (y - y0)).unsqueeze(1) + + # Apply default for grid_sample function zero padding + im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) + padded_h = h + 2 + padded_w = w + 2 + # save points positions after padding + x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 + + # Clip coordinates to padded image size + x0 = torch.where(x0 < 0, torch.tensor(0), x0) + x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) + x1 = torch.where(x1 < 0, torch.tensor(0), x1) + x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) + y0 = torch.where(y0 < 0, torch.tensor(0), y0) + y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) + y1 = torch.where(y1 < 0, torch.tensor(0), y1) + y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) + + im_padded = im_padded.view(n, c, -1) + + x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + + Ia = torch.gather(im_padded, 2, x0_y0) + Ib = torch.gather(im_padded, 2, x0_y1) + Ic = torch.gather(im_padded, 2, x1_y0) + Id = torch.gather(im_padded, 2, x1_y1) + + return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) + + +def is_in_onnx_export_without_custom_ops(): + from annotator.mmpkg.mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + return torch.onnx.is_in_onnx_export( + ) and not osp.exists(ort_custom_op_path) + + +def normalize(grid): + """Normalize input grid from [-1, 1] to [0, 1] + Args: + grid (Tensor): The grid to be normalize, range [-1, 1]. + Returns: + Tensor: Normalized grid, range [0, 1]. + """ + + return (grid + 1.0) / 2.0 + + +def denormalize(grid): + """Denormalize input grid from range [0, 1] to [-1, 1] + Args: + grid (Tensor): The grid to be denormalize, range [0, 1]. + Returns: + Tensor: Denormalized grid, range [-1, 1]. + """ + + return grid * 2.0 - 1.0 + + +def generate_grid(num_grid, size, device): + """Generate regular square grid of points in [0, 1] x [0, 1] coordinate + space. + + Args: + num_grid (int): The number of grids to sample, one for each region. + size (tuple(int, int)): The side size of the regular grid. + device (torch.device): Desired device of returned tensor. + + Returns: + (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that + contains coordinates for the regular grids. + """ + + affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) + grid = F.affine_grid( + affine_trans, torch.Size((1, 1, *size)), align_corners=False) + grid = normalize(grid) + return grid.view(1, -1, 2).expand(num_grid, -1, -1) + + +def rel_roi_point_to_abs_img_point(rois, rel_roi_points): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + Returns: + Tensor: Image based absolute point coordinates, shape (N, P, 2) + """ + + with torch.no_grad(): + assert rel_roi_points.size(0) == rois.size(0) + assert rois.dim() == 2 + assert rel_roi_points.dim() == 3 + assert rel_roi_points.size(2) == 2 + # remove batch idx + if rois.size(1) == 5: + rois = rois[:, 1:] + abs_img_points = rel_roi_points.clone() + # To avoid an error during exporting to onnx use independent + # variables instead inplace computation + xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) + ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) + xs += rois[:, None, 0] + ys += rois[:, None, 1] + abs_img_points = torch.stack([xs, ys], dim=2) + return abs_img_points + + +def get_shape_from_feature_map(x): + """Get spatial resolution of input feature map considering exporting to + onnx mode. + + Args: + x (torch.Tensor): Input tensor, shape (N, C, H, W) + Returns: + torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) + """ + if torch.onnx.is_in_onnx_export(): + img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( + x.device).float() + else: + img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( + x.device).float() + return img_shape + + +def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): + """Convert image based absolute point coordinates to image based relative + coordinates for sampling. + + Args: + abs_img_points (Tensor): Image based absolute point coordinates, + shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + assert (isinstance(img, tuple) and len(img) == 2) or \ + (isinstance(img, torch.Tensor) and len(img.shape) == 4) + + if isinstance(img, tuple): + h, w = img + scale = torch.tensor([w, h], + dtype=torch.float, + device=abs_img_points.device) + scale = scale.view(1, 1, 2) + else: + scale = get_shape_from_feature_map(img) + + return abs_img_points / scale * spatial_scale + + +def rel_roi_point_to_rel_img_point(rois, + rel_roi_points, + img, + spatial_scale=1.): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, + spatial_scale) + + return rel_img_point + + +def point_sample(input, points, align_corners=False, **kwargs): + """A wrapper around :func:`grid_sample` to support 3D point_coords tensors + Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to + lie inside ``[0, 1] x [0, 1]`` square. + + Args: + input (Tensor): Feature map, shape (N, C, H, W). + points (Tensor): Image based absolute point coordinates (normalized), + range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). + align_corners (bool): Whether align_corners. Default: False + + Returns: + Tensor: Features of `point` on `input`, shape (N, C, P) or + (N, C, Hgrid, Wgrid). + """ + + add_dim = False + if points.dim() == 3: + add_dim = True + points = points.unsqueeze(2) + if is_in_onnx_export_without_custom_ops(): + # If custom ops for onnx runtime not compiled use python + # implementation of grid_sample function to make onnx graph + # with supported nodes + output = bilinear_grid_sample( + input, denormalize(points), align_corners=align_corners) + else: + output = F.grid_sample( + input, denormalize(points), align_corners=align_corners, **kwargs) + if add_dim: + output = output.squeeze(3) + return output + + +class SimpleRoIAlign(nn.Module): + + def __init__(self, output_size, spatial_scale, aligned=True): + """Simple RoI align in PointRend, faster than standard RoIAlign. + + Args: + output_size (tuple[int]): h, w + spatial_scale (float): scale the input boxes by this number + aligned (bool): if False, use the legacy implementation in + MMDetection, align_corners=True will be used in F.grid_sample. + If True, align the results more perfectly. + """ + + super(SimpleRoIAlign, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + # to be consistent with other RoI ops + self.use_torchvision = False + self.aligned = aligned + + def forward(self, features, rois): + num_imgs = features.size(0) + num_rois = rois.size(0) + rel_roi_points = generate_grid( + num_rois, self.output_size, device=rois.device) + + if torch.onnx.is_in_onnx_export(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois, rel_roi_points, features, self.spatial_scale) + rel_img_points = rel_img_points.reshape(num_imgs, -1, + *rel_img_points.shape[1:]) + point_feats = point_sample( + features, rel_img_points, align_corners=not self.aligned) + point_feats = point_feats.transpose(1, 2) + else: + point_feats = [] + for batch_ind in range(num_imgs): + # unravel batch dim + feat = features[batch_ind].unsqueeze(0) + inds = (rois[:, 0].long() == batch_ind) + if inds.any(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois[inds], rel_roi_points[inds], feat, + self.spatial_scale).unsqueeze(0) + point_feat = point_sample( + feat, rel_img_points, align_corners=not self.aligned) + point_feat = point_feat.squeeze(0).transpose(0, 1) + point_feats.append(point_feat) + + point_feats = torch.cat(point_feats, dim=0) + + channels = features.size(1) + roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) + + return roi_feats + + def __repr__(self): + format_str = self.__class__.__name__ + format_str += '(output_size={}, spatial_scale={}'.format( + self.output_size, self.spatial_scale) + return format_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_in_boxes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_in_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..4003173a53052161dbcd687a2fa1d755642fdab8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_in_boxes.py @@ -0,0 +1,133 @@ +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', + 'points_in_boxes_all_forward' +]) + + +def points_in_boxes_part(points, boxes): + """Find the box in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in + LiDAR/DEPTH coordinate, (x, y, z) is the bottom center + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + + box_idxs_of_pts = points.new_zeros((batch_size, num_points), + dtype=torch.int).fill_(-1) + + # If manually put the tensor 'points' or 'boxes' on a device + # which is not the current device, some temporary variables + # will be created on the current device in the cuda op, + # and the output will be incorrect. + # Therefore, we force the current device to be the same + # as the device of the tensors if it was not. + # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 + # for the incorrect output before the fix. + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_part_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts + + +def points_in_boxes_cpu(points, boxes): + """Find all boxes in which each point is (CPU). The CPU version of + :meth:`points_in_boxes_all`. + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in + LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + point_indices = points.new_zeros((batch_size, num_boxes, num_points), + dtype=torch.int) + for b in range(batch_size): + ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(), + points[b].float().contiguous(), + point_indices[b]) + point_indices = point_indices.transpose(1, 2) + + return point_indices + + +def points_in_boxes_all(points, boxes): + """Find all boxes in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert boxes.shape[0] == points.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {boxes.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), + dtype=torch.int).fill_(0) + + # Same reason as line 25-32 + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_all_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_sampler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..ae1a24f939dd0e2934765326363ea51c2f2b4cca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/points_sampler.py @@ -0,0 +1,177 @@ +from typing import List + +import torch +from torch import nn as nn + +from annotator.mmpkg.mmcv.runner import force_fp32 +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) + + +def calc_square_dist(point_feat_a, point_feat_b, norm=True): + """Calculating square distance between a and b. + + Args: + point_feat_a (Tensor): (B, N, C) Feature vector of each point. + point_feat_b (Tensor): (B, M, C) Feature vector of each point. + norm (Bool, optional): Whether to normalize the distance. + Default: True. + + Returns: + Tensor: (B, N, M) Distance between each pair points. + """ + num_channel = point_feat_a.shape[-1] + # [bs, n, 1] + a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) + # [bs, 1, m] + b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) + + corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) + + dist = a_square + b_square - 2 * corr_matrix + if norm: + dist = torch.sqrt(dist) / num_channel + return dist + + +def get_sampler_cls(sampler_type): + """Get the type and mode of points sampler. + + Args: + sampler_type (str): The type of points sampler. + The valid value are "D-FPS", "F-FPS", or "FS". + + Returns: + class: Points sampler type. + """ + sampler_mappings = { + 'D-FPS': DFPSSampler, + 'F-FPS': FFPSSampler, + 'FS': FSSampler, + } + try: + return sampler_mappings[sampler_type] + except KeyError: + raise KeyError( + f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ + {sampler_type}') + + +class PointsSampler(nn.Module): + """Points sampling. + + Args: + num_point (list[int]): Number of sample points. + fps_mod_list (list[str], optional): Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. + """ + + def __init__(self, + num_point: List[int], + fps_mod_list: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1]): + super().__init__() + # FPS would be applied to different fps_mod in the list, + # so the length of the num_point should be equal to + # fps_mod_list and fps_sample_range_list. + assert len(num_point) == len(fps_mod_list) == len( + fps_sample_range_list) + self.num_point = num_point + self.fps_sample_range_list = fps_sample_range_list + self.samplers = nn.ModuleList() + for fps_mod in fps_mod_list: + self.samplers.append(get_sampler_cls(fps_mod)()) + self.fp16_enabled = False + + @force_fp32() + def forward(self, points_xyz, features): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, npoint, sample_num) Indices of sampled points. + """ + indices = [] + last_fps_end_index = 0 + + for fps_sample_range, sampler, npoint in zip( + self.fps_sample_range_list, self.samplers, self.num_point): + assert fps_sample_range < points_xyz.shape[1] + + if fps_sample_range == -1: + sample_points_xyz = points_xyz[:, last_fps_end_index:] + if features is not None: + sample_features = features[:, :, last_fps_end_index:] + else: + sample_features = None + else: + sample_points_xyz = \ + points_xyz[:, last_fps_end_index:fps_sample_range] + if features is not None: + sample_features = features[:, :, last_fps_end_index: + fps_sample_range] + else: + sample_features = None + + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, + npoint) + + indices.append(fps_idx + last_fps_end_index) + last_fps_end_index += fps_sample_range + indices = torch.cat(indices, dim=1) + + return indices + + +class DFPSSampler(nn.Module): + """Using Euclidean distances of points for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with D-FPS.""" + fps_idx = furthest_point_sample(points.contiguous(), npoint) + return fps_idx + + +class FFPSSampler(nn.Module): + """Using feature distances for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with F-FPS.""" + assert features is not None, \ + 'feature input to FFPS_Sampler should not be None' + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist( + features_for_fps, features_for_fps, norm=False) + fps_idx = furthest_point_sample_with_dist(features_dist, npoint) + return fps_idx + + +class FSSampler(nn.Module): + """Using F-FPS and D-FPS simultaneously.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with FS_Sampling.""" + assert features is not None, \ + 'feature input to FS_Sampler should not be None' + ffps_sampler = FFPSSampler() + dfps_sampler = DFPSSampler() + fps_idx_ffps = ffps_sampler(points, features, npoint) + fps_idx_dfps = dfps_sampler(points, features, npoint) + fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) + return fps_idx diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/psa_mask.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/psa_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..cdf14e62b50e8d4dd6856c94333c703bcc4c9ab6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/psa_mask.py @@ -0,0 +1,92 @@ +# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['psamask_forward', 'psamask_backward']) + + +class PSAMaskFunction(Function): + + @staticmethod + def symbolic(g, input, psa_type, mask_size): + return g.op( + 'mmcv::MMCVPSAMask', + input, + psa_type_i=psa_type, + mask_size_i=mask_size) + + @staticmethod + def forward(ctx, input, psa_type, mask_size): + ctx.psa_type = psa_type + ctx.mask_size = _pair(mask_size) + ctx.save_for_backward(input) + + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + assert channels == h_mask * w_mask + output = input.new_zeros( + (batch_size, h_feature * w_feature, h_feature, w_feature)) + + ext_module.psamask_forward( + input, + output, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return output + + @staticmethod + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + psa_type = ctx.psa_type + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + grad_input = grad_output.new_zeros( + (batch_size, channels, h_feature, w_feature)) + ext_module.psamask_backward( + grad_output, + grad_input, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return grad_input, None, None, None + + +psa_mask = PSAMaskFunction.apply + + +class PSAMask(nn.Module): + + def __init__(self, psa_type, mask_size=None): + super(PSAMask, self).__init__() + assert psa_type in ['collect', 'distribute'] + if psa_type == 'collect': + psa_type_enum = 0 + else: + psa_type_enum = 1 + self.psa_type_enum = psa_type_enum + self.mask_size = mask_size + self.psa_type = psa_type + + def forward(self, input): + return psa_mask(input, self.psa_type_enum, self.mask_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(psa_type={self.psa_type}, ' + s += f'mask_size={self.mask_size})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..0755aefc66e67233ceae0f4b77948301c443e9fb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align.py @@ -0,0 +1,223 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import deprecated_api_warning, ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_align_forward', 'roi_align_backward']) + + +class RoIAlignFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, + pool_mode, aligned): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + if has_custom_op: + return g.op( + 'mmcv::MMCVRoiAlign', + input, + rois, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned) + else: + from torch.onnx.symbolic_opset9 import sub, squeeze + from torch.onnx.symbolic_helper import _slice_helper + from torch.onnx import TensorProtoDataType + # batch_indices = rois[:, 0].long() + batch_indices = _slice_helper( + g, rois, axes=[1], starts=[0], ends=[1]) + batch_indices = squeeze(g, batch_indices, 1) + batch_indices = g.op( + 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) + # rois = rois[:, 1:] + rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) + if aligned: + # rois -= 0.5/spatial_scale + aligned_offset = g.op( + 'Constant', + value_t=torch.tensor([0.5 / spatial_scale], + dtype=torch.float32)) + rois = sub(g, rois, aligned_offset) + # roi align + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=max(0, sampling_ratio), + mode_s=pool_mode) + + @staticmethod + def forward(ctx, + input, + rois, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + assert pool_mode in ('max', 'avg') + ctx.pool_mode = 0 if pool_mode == 'max' else 1 + ctx.aligned = aligned + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + if ctx.pool_mode == 0: + argmax_y = input.new_zeros(output_shape) + argmax_x = input.new_zeros(output_shape) + else: + argmax_y = input.new_zeros(0) + argmax_x = input.new_zeros(0) + + ext_module.roi_align_forward( + input, + rois, + output, + argmax_y, + argmax_x, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + + ctx.save_for_backward(rois, argmax_y, argmax_x) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax_y, argmax_x = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous. + grad_output = grad_output.contiguous() + ext_module.roi_align_backward( + grad_output, + rois, + argmax_y, + argmax_x, + grad_input, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + return grad_input, None, None, None, None, None, None + + +roi_align = RoIAlignFunction.apply + + +class RoIAlign(nn.Module): + """RoI align pooling layer. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + pool_mode (str, 'avg' or 'max'): pooling mode in each bin. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + use_torchvision (bool): whether to use roi_align from torchvision. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + @deprecated_api_warning( + { + 'out_size': 'output_size', + 'sample_num': 'sampling_ratio' + }, + cls_name='RoIAlign') + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True, + use_torchvision=False): + super(RoIAlign, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.pool_mode = pool_mode + self.aligned = aligned + self.use_torchvision = use_torchvision + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N.\ + The other 4 columns are xyxy. + """ + if self.use_torchvision: + from torchvision.ops import roi_align as tv_roi_align + if 'aligned' in tv_roi_align.__code__.co_varnames: + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.aligned) + else: + if self.aligned: + rois -= rois.new_tensor([0.] + + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio) + else: + return roi_align(input, rois, self.output_size, self.spatial_scale, + self.sampling_ratio, self.pool_mode, self.aligned) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale}, ' + s += f'sampling_ratio={self.sampling_ratio}, ' + s += f'pool_mode={self.pool_mode}, ' + s += f'aligned={self.aligned}, ' + s += f'use_torchvision={self.use_torchvision})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align_rotated.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce4961a3555d4da8bc3e32f1f7d5ad50036587d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_align_rotated.py @@ -0,0 +1,177 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) + + +class RoIAlignRotatedFunction(Function): + + @staticmethod + def symbolic(g, features, rois, out_size, spatial_scale, sample_num, + aligned, clockwise): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + return g.op( + 'mmcv::MMCVRoIAlignRotated', + features, + rois, + output_height_i=out_h, + output_width_i=out_h, + spatial_scale_f=spatial_scale, + sampling_ratio_i=sample_num, + aligned_i=aligned, + clockwise_i=clockwise) + + @staticmethod + def forward(ctx, + features, + rois, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.aligned = aligned + ctx.clockwise = clockwise + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + ext_module.roi_align_rotated_forward( + features, + rois, + output, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + aligned = ctx.aligned + clockwise = ctx.clockwise + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert feature_size is not None + batch_size, num_channels, data_height, data_width = feature_size + + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + + if ctx.needs_input_grad[0]: + grad_input = rois.new_zeros(batch_size, num_channels, data_height, + data_width) + ext_module.roi_align_rotated_backward( + grad_output.contiguous(), + rois, + grad_input, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return grad_input, grad_rois, None, None, None, None, None + + +roi_align_rotated = RoIAlignRotatedFunction.apply + + +class RoIAlignRotated(nn.Module): + """RoI align pooling layer for rotated proposals. + + It accepts a feature map of shape (N, C, H, W) and rois with shape + (n, 6) with each roi decoded as (batch_index, center_x, center_y, + w, h, angle). The angle is in radian. + + Args: + out_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sample_num (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + Default: True. + clockwise (bool): If True, the angle in each proposal follows a + clockwise fashion in image space, otherwise, the angle is + counterclockwise. Default: False. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + def __init__(self, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + super(RoIAlignRotated, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + self.aligned = aligned + self.clockwise = clockwise + + def forward(self, features, rois): + return RoIAlignRotatedFunction.apply(features, rois, self.out_size, + self.spatial_scale, + self.sample_num, self.aligned, + self.clockwise) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_pool.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..d339d8f2941eabc1cbe181a9c6c5ab5ff4ff4e5f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roi_pool.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_pool_forward', 'roi_pool_backward']) + + +class RoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale): + return g.op( + 'MaxRoiPool', + input, + rois, + pooled_shape_i=output_size, + spatial_scale_f=spatial_scale) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + argmax = input.new_zeros(output_shape, dtype=torch.int) + + ext_module.roi_pool_forward( + input, + rois, + output, + argmax, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + ctx.save_for_backward(rois, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_pool_backward( + grad_output, + rois, + argmax, + grad_input, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + return grad_input, None, None, None + + +roi_pool = RoIPoolFunction.apply + + +class RoIPool(nn.Module): + + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py new file mode 100644 index 0000000000000000000000000000000000000000..8191920ca50b388ef58f577dc986da101662ac53 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn as nn +from torch.autograd import Function + +import annotator.mmpkg.mmcv as mmcv +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) + + +class RoIAwarePool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `PartA2 `_ for more + details. + + Args: + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int, optional): The maximum number of points per + voxel. Default: 128. + mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. + Default: 'max'. + """ + + def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): + super().__init__() + + self.out_size = out_size + self.max_pts_per_voxel = max_pts_per_voxel + assert mode in ['max', 'avg'] + pool_mapping = {'max': 0, 'avg': 1} + self.mode = pool_mapping[mode] + + def forward(self, rois, pts, pts_feature): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] + """ + + return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, + self.out_size, + self.max_pts_per_voxel, self.mode) + + +class RoIAwarePool3dFunction(Function): + + @staticmethod + def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, + mode): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int): The maximum number of points per voxel. + Default: 128. + mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average + pool). + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output + pooled features. + """ + + if isinstance(out_size, int): + out_x = out_y = out_z = out_size + else: + assert len(out_size) == 3 + assert mmcv.is_tuple_of(out_size, int) + out_x, out_y, out_z = out_size + + num_rois = rois.shape[0] + num_channels = pts_feature.shape[-1] + num_pts = pts.shape[0] + + pooled_features = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels)) + argmax = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) + pts_idx_of_voxels = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, max_pts_per_voxel), + dtype=torch.int) + + ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, + pts_idx_of_voxels, pooled_features, + mode) + + ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, + num_pts, num_channels) + return pooled_features + + @staticmethod + def backward(ctx, grad_out): + ret = ctx.roiaware_pool3d_for_backward + pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret + + grad_in = grad_out.new_zeros((num_pts, num_channels)) + ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, + grad_out.contiguous(), grad_in, + mode) + + return None, None, grad_in, None, None, None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py new file mode 100644 index 0000000000000000000000000000000000000000..0a21412c0728431c04b84245bc2e3109eea9aefc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py @@ -0,0 +1,77 @@ +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) + + +class RoIPointPool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `Paper of PartA2 `_ + for more details. + + Args: + num_sampled_points (int, optional): Number of samples in each roi. + Default: 512. + """ + + def __init__(self, num_sampled_points=512): + super().__init__() + self.num_sampled_points = num_sampled_points + + def forward(self, points, point_features, boxes3d): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, + self.num_sampled_points) + + +class RoIPointPool3dFunction(Function): + + @staticmethod + def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + num_sampled_points (int, optional): The num of sampled points. + Default: 512. + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + assert len(points.shape) == 3 and points.shape[2] == 3 + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ + 1], point_features.shape[2] + pooled_boxes3d = boxes3d.view(batch_size, -1, 7) + pooled_features = point_features.new_zeros( + (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros( + (batch_size, boxes_num)).int() + + ext_module.roipoint_pool3d_forward(points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, pooled_empty_flag) + + return pooled_features, pooled_empty_flag + + @staticmethod + def backward(ctx, grad_out): + raise NotImplementedError diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/saconv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/saconv.py new file mode 100644 index 0000000000000000000000000000000000000000..9d7be88c428ea2b9af2c32c60a86dddd13988ce8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/saconv.py @@ -0,0 +1,145 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.mmpkg.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init +from annotator.mmpkg.mmcv.ops.deform_conv import deform_conv2d +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version + + +@CONV_LAYERS.register_module(name='SAC') +class SAConv2d(ConvAWS2d): + """SAC (Switchable Atrous Convolution) + + This is an implementation of SAC in DetectoRS + (https://arxiv.org/pdf/2006.02334.pdf). + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + padding_mode (string, optional): ``'zeros'``, ``'reflect'``, + ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + use_deform: If ``True``, replace convolution with deformable + convolution. Default: ``False``. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.use_deform = use_deform + self.switch = nn.Conv2d( + self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) + self.pre_context = nn.Conv2d( + self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d( + self.out_channels, self.out_channels, kernel_size=1, bias=True) + if self.use_deform: + self.offset_s = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.offset_l = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.init_weights() + + def init_weights(self): + constant_init(self.switch, 0, bias=1) + self.weight_diff.data.zero_() + constant_init(self.pre_context, 0) + constant_init(self.post_context, 0) + if self.use_deform: + constant_init(self.offset_s, 0) + constant_init(self.offset_l, 0) + + def forward(self, x): + # pre-context + avg_x = F.adaptive_avg_pool2d(x, output_size=1) + avg_x = self.pre_context(avg_x) + avg_x = avg_x.expand_as(x) + x = x + avg_x + # switch + avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') + avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) + switch = self.switch(avg_x) + # sac + weight = self._get_weight(self.weight) + zero_bias = torch.zeros( + self.out_channels, device=weight.device, dtype=weight.dtype) + + if self.use_deform: + offset = self.offset_s(avg_x) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_s = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_s = super()._conv_forward(x, weight, zero_bias) + else: + out_s = super()._conv_forward(x, weight) + ori_p = self.padding + ori_d = self.dilation + self.padding = tuple(3 * p for p in self.padding) + self.dilation = tuple(3 * d for d in self.dilation) + weight = weight + self.weight_diff + if self.use_deform: + offset = self.offset_l(avg_x) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_l = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_l = super()._conv_forward(x, weight, zero_bias) + else: + out_l = super()._conv_forward(x, weight) + + out = switch * out_s + (1 - switch) * out_l + self.padding = ori_p + self.dilation = ori_d + # post-context + avg_x = F.adaptive_avg_pool2d(out, output_size=1) + avg_x = self.post_context(avg_x) + avg_x = avg_x.expand_as(out) + out = out + avg_x + return out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/scatter_points.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/scatter_points.py new file mode 100644 index 0000000000000000000000000000000000000000..2b8aa4169e9f6ca4a6f845ce17d6d1e4db416bb8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/scatter_points.py @@ -0,0 +1,135 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) + + +class _DynamicScatter(Function): + + @staticmethod + def forward(ctx, feats, coors, reduce_type='max'): + """convert kitti points(N, >=3) to voxels. + + Args: + feats (torch.Tensor): [N, C]. Points features to be reduced + into voxels. + coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates + (specifically multi-dim voxel index) of each points. + reduce_type (str, optional): Reduce op. support 'max', 'sum' and + 'mean'. Default: 'max'. + + Returns: + voxel_feats (torch.Tensor): [M, C]. Reduced features, input + features that shares the same voxel coordinates are reduced to + one row. + voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. + """ + results = ext_module.dynamic_point_to_voxel_forward( + feats, coors, reduce_type) + (voxel_feats, voxel_coors, point2voxel_map, + voxel_points_count) = results + ctx.reduce_type = reduce_type + ctx.save_for_backward(feats, voxel_feats, point2voxel_map, + voxel_points_count) + ctx.mark_non_differentiable(voxel_coors) + return voxel_feats, voxel_coors + + @staticmethod + def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): + (feats, voxel_feats, point2voxel_map, + voxel_points_count) = ctx.saved_tensors + grad_feats = torch.zeros_like(feats) + # TODO: whether to use index put or use cuda_backward + # To use index put, need point to voxel index + ext_module.dynamic_point_to_voxel_backward( + grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, + point2voxel_map, voxel_points_count, ctx.reduce_type) + return grad_feats, None, None + + +dynamic_scatter = _DynamicScatter.apply + + +class DynamicScatter(nn.Module): + """Scatters points into voxels, used in the voxel encoder with dynamic + voxelization. + + Note: + The CPU and GPU implementation get the same output, but have numerical + difference after summation and division (e.g., 5e-7). + + Args: + voxel_size (list): list [x, y, z] size of three dimension. + point_cloud_range (list): The coordinate range of points, [x_min, + y_min, z_min, x_max, y_max, z_max]. + average_points (bool): whether to use avg pooling to scatter points + into voxel. + """ + + def __init__(self, voxel_size, point_cloud_range, average_points: bool): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.average_points = average_points + + def forward_single(self, points, coors): + """Scatters points into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + reduce = 'mean' if self.average_points else 'max' + return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) + + def forward(self, points, coors): + """Scatters points/features into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + if coors.size(-1) == 3: + return self.forward_single(points, coors) + else: + batch_size = coors[-1, 0] + 1 + voxels, voxel_coors = [], [] + for i in range(batch_size): + inds = torch.where(coors[:, 0] == i) + voxel, voxel_coor = self.forward_single( + points[inds], coors[inds][:, 1:]) + coor_pad = nn.functional.pad( + voxel_coor, (1, 0), mode='constant', value=i) + voxel_coors.append(coor_pad) + voxels.append(voxel) + features = torch.cat(voxels, dim=0) + feature_coors = torch.cat(voxel_coors, dim=0) + + return features, feature_coors + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', average_points=' + str(self.average_points) + s += ')' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/sync_bn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/sync_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..46db9200f9eafbad662a04e71f60a099a3178346 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/sync_bn.py @@ -0,0 +1,279 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from annotator.mmpkg.mmcv.cnn import NORM_LAYERS +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', + 'sync_bn_backward_param', 'sync_bn_backward_data' +]) + + +class SyncBatchNormFunction(Function): + + @staticmethod + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + return g.op( + 'mmcv::MMCVSyncBatchNorm', + input, + running_mean, + running_var, + weight, + bias, + momentum_f=momentum, + eps_f=eps, + group_i=group, + group_size_i=group_size, + stats_mode=stats_mode) + + @staticmethod + def forward(self, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + self.momentum = momentum + self.eps = eps + self.group = group + self.group_size = group_size + self.stats_mode = stats_mode + + assert isinstance( + input, (torch.HalfTensor, torch.FloatTensor, + torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ + f'only support Half or Float Tensor, but {input.type()}' + output = torch.zeros_like(input) + input3d = input.flatten(start_dim=2) + output3d = output.view_as(input3d) + num_channels = input3d.size(1) + + # ensure mean/var/norm/std are initialized as zeros + # ``torch.empty()`` does not guarantee that + mean = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + var = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + norm = torch.zeros_like( + input3d, dtype=torch.float, device=input3d.device) + std = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + + batch_size = input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_forward_mean(input3d, mean) + batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) + else: + # skip updating mean and leave it as zeros when the input is empty + batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) + + # synchronize mean and the batch flag + vec = torch.cat([mean, batch_flag]) + if self.stats_mode == 'N': + vec *= batch_size + if self.group_size > 1: + dist.all_reduce(vec, group=self.group) + total_batch = vec[-1].detach() + mean = vec[:num_channels] + + if self.stats_mode == 'default': + mean = mean / self.group_size + elif self.stats_mode == 'N': + mean = mean / total_batch.clamp(min=1) + else: + raise NotImplementedError + + # leave var as zeros when the input is empty + if batch_size > 0: + ext_module.sync_bn_forward_var(input3d, mean, var) + + if self.stats_mode == 'N': + var *= batch_size + if self.group_size > 1: + dist.all_reduce(var, group=self.group) + + if self.stats_mode == 'default': + var /= self.group_size + elif self.stats_mode == 'N': + var /= total_batch.clamp(min=1) + else: + raise NotImplementedError + + # if the total batch size over all the ranks is zero, + # we should not update the statistics in the current batch + update_flag = total_batch.clamp(max=1) + momentum = update_flag * self.momentum + ext_module.sync_bn_forward_output( + input3d, + mean, + var, + weight, + bias, + running_mean, + running_var, + norm, + std, + output3d, + eps=self.eps, + momentum=momentum, + group_size=self.group_size) + self.save_for_backward(norm, std, weight) + return output + + @staticmethod + @once_differentiable + def backward(self, grad_output): + norm, std, weight = self.saved_tensors + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(weight) + grad_input = torch.zeros_like(grad_output) + grad_output3d = grad_output.flatten(start_dim=2) + grad_input3d = grad_input.view_as(grad_output3d) + + batch_size = grad_input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, + grad_bias) + + # all reduce + if self.group_size > 1: + dist.all_reduce(grad_weight, group=self.group) + dist.all_reduce(grad_bias, group=self.group) + grad_weight /= self.group_size + grad_bias /= self.group_size + + if batch_size > 0: + ext_module.sync_bn_backward_data(grad_output3d, weight, + grad_weight, grad_bias, norm, std, + grad_input3d) + + return grad_input, None, None, grad_weight, grad_bias, \ + None, None, None, None, None + + +@NORM_LAYERS.register_module(name='MMSyncBN') +class SyncBatchNorm(Module): + """Synchronized Batch Normalization. + + Args: + num_features (int): number of features/chennels in input tensor + eps (float, optional): a value added to the denominator for numerical + stability. Defaults to 1e-5. + momentum (float, optional): the value used for the running_mean and + running_var computation. Defaults to 0.1. + affine (bool, optional): whether to use learnable affine parameters. + Defaults to True. + track_running_stats (bool, optional): whether to track the running + mean and variance during training. When set to False, this + module does not track such statistics, and initializes statistics + buffers ``running_mean`` and ``running_var`` as ``None``. When + these buffers are ``None``, this module always uses batch + statistics in both training and eval modes. Defaults to True. + group (int, optional): synchronization of stats happen within + each process group individually. By default it is synchronization + across the whole world. Defaults to None. + stats_mode (str, optional): The statistical mode. Available options + includes ``'default'`` and ``'N'``. Defaults to 'default'. + When ``stats_mode=='default'``, it computes the overall statistics + using those from each worker with equal weight, i.e., the + statistics are synchronized and simply divied by ``group``. This + mode will produce inaccurate statistics when empty tensors occur. + When ``stats_mode=='N'``, it compute the overall statistics using + the total number of batches in each worker ignoring the number of + group, i.e., the statistics are synchronized and then divied by + the total batch ``N``. This mode is beneficial when empty tensors + occur during training, as it average the total mean by the real + number of batch. + """ + + def __init__(self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None, + stats_mode='default'): + super(SyncBatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + group = dist.group.WORLD if group is None else group + self.group = group + self.group_size = dist.get_world_size(group) + assert stats_mode in ['default', 'N'], \ + f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' + self.stats_mode = stats_mode + if self.affine: + self.weight = Parameter(torch.Tensor(num_features)) + self.bias = Parameter(torch.Tensor(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + if self.track_running_stats: + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.register_buffer('num_batches_tracked', + torch.tensor(0, dtype=torch.long)) + else: + self.register_buffer('running_mean', None) + self.register_buffer('running_var', None) + self.register_buffer('num_batches_tracked', None) + self.reset_parameters() + + def reset_running_stats(self): + if self.track_running_stats: + self.running_mean.zero_() + self.running_var.fill_(1) + self.num_batches_tracked.zero_() + + def reset_parameters(self): + self.reset_running_stats() + if self.affine: + self.weight.data.uniform_() # pytorch use ones_() + self.bias.data.zero_() + + def forward(self, input): + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input, got {input.dim()}D input') + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float( + self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + if self.training or not self.track_running_stats: + return SyncBatchNormFunction.apply( + input, self.running_mean, self.running_var, self.weight, + self.bias, exponential_average_factor, self.eps, self.group, + self.group_size, self.stats_mode) + else: + return F.batch_norm(input, self.running_mean, self.running_var, + self.weight, self.bias, False, + exponential_average_factor, self.eps) + + def __repr__(self): + s = self.__class__.__name__ + s += f'({self.num_features}, ' + s += f'eps={self.eps}, ' + s += f'momentum={self.momentum}, ' + s += f'affine={self.affine}, ' + s += f'track_running_stats={self.track_running_stats}, ' + s += f'group_size={self.group_size},' + s += f'stats_mode={self.stats_mode})' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_interpolate.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_interpolate.py new file mode 100644 index 0000000000000000000000000000000000000000..203f47f05d58087e034fb3cd8cd6a09233947b4a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_interpolate.py @@ -0,0 +1,68 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['three_interpolate_forward', 'three_interpolate_backward']) + + +class ThreeInterpolate(Function): + """Performs weighted linear interpolation on 3 features. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor, + weight: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, M) Features descriptors to be + interpolated + indices (Tensor): (B, n, 3) index three nearest neighbors + of the target features in features + weight (Tensor): (B, n, 3) weights of interpolation + + Returns: + Tensor: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert indices.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = indices.size(1) + ctx.three_interpolate_for_backward = (indices, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + ext_module.three_interpolate_forward( + features, indices, weight, output, b=B, c=c, m=m, n=n) + return output + + @staticmethod + def backward( + ctx, grad_out: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, N) tensor with gradients of outputs + + Returns: + Tensor: (B, C, M) tensor with gradients of features + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = torch.cuda.FloatTensor(B, c, m).zero_() + grad_out_data = grad_out.data.contiguous() + + ext_module.three_interpolate_backward( + grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_nn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_nn.py new file mode 100644 index 0000000000000000000000000000000000000000..2b01047a129989cd5545a0a86f23a487f4a13ce1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/three_nn.py @@ -0,0 +1,51 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) + + +class ThreeNN(Function): + """Find the top-3 nearest neighbors of the target set from the source set. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, target: torch.Tensor, + source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + target (Tensor): shape (B, N, 3), points set that needs to + find the nearest neighbors. + source (Tensor): shape (B, M, 3), points set that is used + to find the nearest neighbors of points in target set. + + Returns: + Tensor: shape (B, N, 3), L2 distance of each point in target + set to their corresponding nearest neighbors. + """ + target = target.contiguous() + source = source.contiguous() + + B, N, _ = target.size() + m = source.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/tin_shift.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/tin_shift.py new file mode 100644 index 0000000000000000000000000000000000000000..472c9fcfe45a124e819b7ed5653e585f94a8811e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/tin_shift.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Code reference from "Temporal Interlacing Network" +# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py +# Hao Shao, Shengju Qian, Yu Liu +# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk + +import torch +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['tin_shift_forward', 'tin_shift_backward']) + + +class TINShiftFunction(Function): + + @staticmethod + def forward(ctx, input, shift): + C = input.size(2) + num_segments = shift.size(1) + if C // num_segments <= 0 or C % num_segments != 0: + raise ValueError('C should be a multiple of num_segments, ' + f'but got C={C} and num_segments={num_segments}.') + + ctx.save_for_backward(shift) + + out = torch.zeros_like(input) + ext_module.tin_shift_forward(input, shift, out) + + return out + + @staticmethod + def backward(ctx, grad_output): + + shift = ctx.saved_tensors[0] + data_grad_input = grad_output.new(*grad_output.size()).zero_() + shift_grad_input = shift.new(*shift.size()).zero_() + ext_module.tin_shift_backward(grad_output, shift, data_grad_input) + + return data_grad_input, shift_grad_input + + +tin_shift = TINShiftFunction.apply + + +class TINShift(nn.Module): + """Temporal Interlace Shift. + + Temporal Interlace shift is a differentiable temporal-wise frame shifting + which is proposed in "Temporal Interlacing Network" + + Please refer to https://arxiv.org/abs/2001.06499 for more details. + Code is modified from https://github.com/mit-han-lab/temporal-shift-module + """ + + def forward(self, input, shift): + """Perform temporal interlace shift. + + Args: + input (Tensor): Feature map with shape [N, num_segments, C, H * W]. + shift (Tensor): Shift tensor with shape [N, num_segments]. + + Returns: + Feature map after temporal interlace shift. + """ + return tin_shift(input, shift) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/upfirdn2d.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/upfirdn2d.py new file mode 100644 index 0000000000000000000000000000000000000000..751db20a344e1164748d8d4d8b2f775247925eab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/upfirdn2d.py @@ -0,0 +1,330 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +from torch.autograd import Function +from torch.nn import functional as F + +from annotator.mmpkg.mmcv.utils import to_2tuple +from ..utils import ext_loader + +upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) + + +class UpFirDn2dBackward(Function): + + @staticmethod + def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, + in_size, out_size): + + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_ext.upfirdn2d( + grad_output, + grad_kernel, + up_x=down_x, + up_y=down_y, + down_x=up_x, + down_y=up_y, + pad_x0=g_pad_x0, + pad_x1=g_pad_x1, + pad_y0=g_pad_y0, + pad_y1=g_pad_y1) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], + in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + kernel, = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], + ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_ext.upfirdn2d( + gradgrad_input, + kernel, + up_x=ctx.up_x, + up_y=ctx.up_y, + down_x=ctx.down_x, + down_y=ctx.down_y, + pad_x0=ctx.pad_x0, + pad_x1=ctx.pad_x1, + pad_y0=ctx.pad_y0, + pad_y1=ctx.pad_y1) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], + # ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], + ctx.out_size[0], ctx.out_size[1]) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + batch, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_ext.upfirdn2d( + input, + kernel, + up_x=up_x, + up_y=up_y, + down_x=down_x, + down_y=down_y, + pad_x0=pad_x0, + pad_x1=pad_x1, + pad_y0=pad_y0, + pad_y1=pad_y1) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, + kernel, + grad_kernel, + ctx.up, + ctx.down, + ctx.pad, + ctx.g_pad, + ctx.in_size, + ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): + """UpFRIDn for 2d features. + + UpFIRDn is short for upsample, apply FIR filter and downsample. More + details can be found in: + https://www.mathworks.com/help/signal/ref/upfirdn.html + + Args: + input (Tensor): Tensor with shape of (n, c, h, w). + kernel (Tensor): Filter kernel. + up (int | tuple[int], optional): Upsampling factor. If given a number, + we will use this factor for the both height and width side. + Defaults to 1. + down (int | tuple[int], optional): Downsampling factor. If given a + number, we will use this factor for the both height and width side. + Defaults to 1. + pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or + (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). + + Returns: + Tensor: Tensor after UpFIRDn. + """ + if input.device.type == 'cpu': + if len(pad) == 2: + pad = (pad[0], pad[1], pad[0], pad[1]) + + up = to_2tuple(up) + + down = to_2tuple(down) + + out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], + pad[0], pad[1], pad[2], pad[3]) + else: + _up = to_2tuple(up) + + _down = to_2tuple(down) + + if len(pad) == 4: + _pad = pad + elif len(pad) == 2: + _pad = (pad[0], pad[1], pad[0], pad[1]) + + out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) + + return out + + +def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, + pad_y0, pad_y1): + _, channel, in_h, in_w = input.shape + input = input.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad( + out, + [0, 0, + max(pad_x0, 0), + max(pad_x1, 0), + max(pad_y0, 0), + max(pad_y1, 0)]) + out = out[:, + max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape( + [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/voxelize.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/voxelize.py new file mode 100644 index 0000000000000000000000000000000000000000..ca3226a4fbcbfe58490fa2ea8e1c16b531214121 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/ops/voxelize.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) + + +class _Voxelization(Function): + + @staticmethod + def forward(ctx, + points, + voxel_size, + coors_range, + max_points=35, + max_voxels=20000): + """Convert kitti points(N, >=3) to voxels. + + Args: + points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points + and points[:, 3:] contain other information like reflectivity. + voxel_size (tuple or float): The size of voxel with the shape of + [3]. + coors_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_points (int, optional): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. Default: 35. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + + Returns: + voxels_out (torch.Tensor): Output voxels with the shape of [M, + max_points, ndim]. Only contain points and returned when + max_points != -1. + coors_out (torch.Tensor): Output coordinates with the shape of + [M, 3]. + num_points_per_voxel_out (torch.Tensor): Num points per voxel with + the shape of [M]. Only returned when max_points != -1. + """ + if max_points == -1 or max_voxels == -1: + coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) + ext_module.dynamic_voxelize_forward(points, coors, voxel_size, + coors_range, 3) + return coors + else: + voxels = points.new_zeros( + size=(max_voxels, max_points, points.size(1))) + coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) + num_points_per_voxel = points.new_zeros( + size=(max_voxels, ), dtype=torch.int) + voxel_num = ext_module.hard_voxelize_forward( + points, voxels, coors, num_points_per_voxel, voxel_size, + coors_range, max_points, max_voxels, 3) + # select the valid voxels + voxels_out = voxels[:voxel_num] + coors_out = coors[:voxel_num] + num_points_per_voxel_out = num_points_per_voxel[:voxel_num] + return voxels_out, coors_out, num_points_per_voxel_out + + +voxelization = _Voxelization.apply + + +class Voxelization(nn.Module): + """Convert kitti points(N, >=3) to voxels. + + Please refer to `PVCNN `_ for more + details. + + Args: + voxel_size (tuple or float): The size of voxel with the shape of [3]. + point_cloud_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_num_points (int): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + """ + + def __init__(self, + voxel_size, + point_cloud_range, + max_num_points, + max_voxels=20000): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.max_num_points = max_num_points + if isinstance(max_voxels, tuple): + self.max_voxels = max_voxels + else: + self.max_voxels = _pair(max_voxels) + + point_cloud_range = torch.tensor( + point_cloud_range, dtype=torch.float32) + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + grid_size = (point_cloud_range[3:] - + point_cloud_range[:3]) / voxel_size + grid_size = torch.round(grid_size).long() + input_feat_shape = grid_size[:2] + self.grid_size = grid_size + # the origin shape is as [x-len, y-len, z-len] + # [w, h, d] -> [d, h, w] + self.pcd_shape = [*input_feat_shape, 1][::-1] + + def forward(self, input): + if self.training: + max_voxels = self.max_voxels[0] + else: + max_voxels = self.max_voxels[1] + + return voxelization(input, self.voxel_size, self.point_cloud_range, + self.max_num_points, max_voxels) + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', max_num_points=' + str(self.max_num_points) + s += ', max_voxels=' + str(self.max_voxels) + s += ')' + return s diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2ed2c17ad357742e423beeaf4d35db03fe9af469 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .collate import collate +from .data_container import DataContainer +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter, scatter_kwargs +from .utils import is_module_wrapper + +__all__ = [ + 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', + 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/_functions.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..9b5a8a44483ab991411d07122b22a1d027e4be8e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/_functions.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + # unsqueeze the first dimension thus the tensor's shape is the + # same as those scattered with GPU. + output = output.unsqueeze(0) + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/collate.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/collate.py new file mode 100644 index 0000000000000000000000000000000000000000..ad749197df21b0d74297548be5f66a696adebf7f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/collate.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections.abc import Mapping, Sequence + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .data_container import DataContainer + + +def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ + + if not isinstance(batch, Sequence): + raise TypeError(f'{batch.dtype} is not supported.') + + if isinstance(batch[0], DataContainer): + stacked = [] + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer( + stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + + if batch[i].pad_dims is not None: + ndim = batch[i].dim() + assert ndim > batch[i].pad_dims + max_shape = [0 for _ in range(batch[i].pad_dims)] + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = batch[i].size(-dim) + for sample in batch[i:i + samples_per_gpu]: + for dim in range(0, ndim - batch[i].pad_dims): + assert batch[i].size(dim) == sample.size(dim) + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], + sample.size(-dim)) + padded_samples = [] + for sample in batch[i:i + samples_per_gpu]: + pad = [0 for _ in range(batch[i].pad_dims * 2)] + for dim in range(1, batch[i].pad_dims + 1): + pad[2 * dim - + 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append( + F.pad( + sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + elif batch[i].pad_dims is None: + stacked.append( + default_collate([ + sample.data + for sample in batch[i:i + samples_per_gpu] + ])) + else: + raise ValueError( + 'pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_container.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_container.py new file mode 100644 index 0000000000000000000000000000000000000000..cedb0d32a51a1f575a622b38de2cee3ab4757821 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_container.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import torch + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError( + f'{args[0].__class__.__name__} has no attribute ' + f'{func.__name__} for type {args[0].datatype}') + return func(*args, **kwargs) + + return wrapper + + +class DataContainer: + """A container for any type of objects. + + Typically tensors will be stacked in the collate function and sliced along + some dimension in the scatter function. This behavior has some limitations. + 1. All tensors have to be the same size. + 2. Types are limited (numpy array or Tensor). + + We design `DataContainer` and `MMDataParallel` to overcome these + limitations. The behavior can be either of the following. + + - copy to GPU, pad all tensors to the same size and stack them + - copy to GPU without stacking + - leave the objects as is and pass it to the model + - pad_dims specifies the number of last few dimensions to do padding + """ + + def __init__(self, + data, + stack=False, + padding_value=0, + cpu_only=False, + pad_dims=2): + self._data = data + self._cpu_only = cpu_only + self._stack = stack + self._padding_value = padding_value + assert pad_dims in [None, 1, 2, 3] + self._pad_dims = pad_dims + + def __repr__(self): + return f'{self.__class__.__name__}({repr(self.data)})' + + def __len__(self): + return len(self._data) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self._cpu_only + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @property + def pad_dims(self): + return self._pad_dims + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_parallel.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..79b5f69b654cf647dc7ae9174223781ab5c607d2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/data_parallel.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the data in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..929c7a451a7443d715ab0cceef530c53eff44cb9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from annotator.mmpkg.mmcv import print_log +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def to_kwargs(self, inputs, kwargs, device_id): + # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 + # to move all tensors to device_id + return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.train_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed_deprecated.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed_deprecated.py new file mode 100644 index 0000000000000000000000000000000000000000..be60a37041fc6a76deae1851dde30448eaff054f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/distributed_deprecated.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter_kwargs + + +@MODULE_WRAPPERS.register_module() +class MMDistributedDataParallel(nn.Module): + + def __init__(self, + module, + dim=0, + broadcast_buffers=True, + bucket_cap_mb=25): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 + self._sync_params() + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip( + tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def _sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, + self.broadcast_bucket_size) + if self.broadcast_buffers: + if (TORCH_VERSION != 'parrots' + and digit_version(TORCH_VERSION) < digit_version('1.0')): + buffers = [b.data for b in self.module._all_buffers()] + else: + buffers = [b.data for b in self.module.buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, + self.broadcast_bucket_size) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) + + def train_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + return output + + def val_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.val_step(*inputs[0], **kwargs[0]) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/registry.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..6ce151e5f890691e8b583e5d50b492801bae82bd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/registry.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch.nn.parallel import DataParallel, DistributedDataParallel + +from annotator.mmpkg.mmcv.utils import Registry + +MODULE_WRAPPERS = Registry('module wrapper') +MODULE_WRAPPERS.register_module(module=DataParallel) +MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/scatter_gather.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/scatter_gather.py new file mode 100644 index 0000000000000000000000000000000000000000..900ff88566f8f14830590459dc4fd16d4b382e47 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/scatter_gather.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter +from .data_container import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + if target_gpus != [-1]: + return OrigScatter.apply(target_gpus, None, dim, obj) + else: + # for CPU inference we use self-implemented scatter + return Scatter.forward(target_gpus, obj) + if isinstance(obj, DataContainer): + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary.""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0f5712cb42c38a2e8563bf563efb6681383cab9b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/parallel/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .registry import MODULE_WRAPPERS + + +def is_module_wrapper(module): + """Check if a module is a module wrapper. + + The following 3 modules in MMCV (and their subclasses) are regarded as + module wrappers: DataParallel, DistributedDataParallel, + MMDistributedDataParallel (the deprecated version). You may add you own + module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: True if the input module is a module wrapper. + """ + module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) + return isinstance(module, module_wrappers) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..52e4b48d383a84a055dcd7f6236f6e8e58eab924 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_module import BaseModule, ModuleList, Sequential +from .base_runner import BaseRunner +from .builder import RUNNERS, build_runner +from .checkpoint import (CheckpointLoader, _load_checkpoint, + _load_checkpoint_with_prefix, load_checkpoint, + load_state_dict, save_checkpoint, weights_to_cpu) +from .default_constructor import DefaultRunnerConstructor +from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, + init_dist, master_only) +from .epoch_based_runner import EpochBasedRunner, Runner +from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model +from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook, + DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook, + Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, Hook, IterTimerHook, + LoggerHook, LrUpdaterHook, MlflowLoggerHook, + NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, + SyncBuffersHook, TensorboardLoggerHook, TextLoggerHook, + WandbLoggerHook) +from .iter_based_runner import IterBasedRunner, IterLoader +from .log_buffer import LogBuffer +from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, + DefaultOptimizerConstructor, build_optimizer, + build_optimizer_constructor) +from .priority import Priority, get_priority +from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed + +__all__ = [ + 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook', + 'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict', + 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority', + 'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict', + 'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', + 'build_optimizer_constructor', 'IterLoader', 'set_random_seed', + 'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook', + 'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads', + 'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', + '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential', + 'ModuleList', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_module.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_module.py new file mode 100644 index 0000000000000000000000000000000000000000..72e1164dfc442056cdc386050177f011b4e9900f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_module.py @@ -0,0 +1,195 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings +from abc import ABCMeta +from collections import defaultdict +from logging import FileHandler + +import torch.nn as nn + +from annotator.mmpkg.mmcv.runner.dist_utils import master_only +from annotator.mmpkg.mmcv.utils.logging import get_logger, logger_initialized, print_log + + +class BaseModule(nn.Module, metaclass=ABCMeta): + """Base module for all modules in openmmlab. + + ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional + functionality of parameter initialization. Compared with + ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. + + - ``init_cfg``: the config to control the initialization. + - ``init_weights``: The function of parameter + initialization and recording initialization + information. + - ``_params_init_info``: Used to track the parameter + initialization information. This attribute only + exists during executing the ``init_weights``. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, init_cfg=None): + """Initialize BaseModule, inherited from `torch.nn.Module`""" + + # NOTE init_cfg can be defined in different levels, but init_cfg + # in low levels has a higher priority. + + super(BaseModule, self).__init__() + # define default value of init_cfg instead of hard code + # in init_weights() function + self._is_init = False + + self.init_cfg = copy.deepcopy(init_cfg) + + # Backward compatibility in derived classes + # if pretrained is not None: + # warnings.warn('DeprecationWarning: pretrained is a deprecated \ + # key, please consider using init_cfg') + # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + + @property + def is_init(self): + return self._is_init + + def init_weights(self): + """Initialize the weights.""" + + is_top_level_module = False + # check if it is top-level module + if not hasattr(self, '_params_init_info'): + # The `_params_init_info` is used to record the initialization + # information of the parameters + # the key should be the obj:`nn.Parameter` of model and the value + # should be a dict containing + # - init_info (str): The string that describes the initialization. + # - tmp_mean_value (FloatTensor): The mean of the parameter, + # which indicates whether the parameter has been modified. + # this attribute would be deleted after all parameters + # is initialized. + self._params_init_info = defaultdict(dict) + is_top_level_module = True + + # Initialize the `_params_init_info`, + # When detecting the `tmp_mean_value` of + # the corresponding parameter is changed, update related + # initialization information + for name, param in self.named_parameters(): + self._params_init_info[param][ + 'init_info'] = f'The value is the same before and ' \ + f'after calling `init_weights` ' \ + f'of {self.__class__.__name__} ' + self._params_init_info[param][ + 'tmp_mean_value'] = param.data.mean() + + # pass `params_init_info` to all submodules + # All submodules share the same `params_init_info`, + # so it will be updated when parameters are + # modified at any level of the model. + for sub_module in self.modules(): + sub_module._params_init_info = self._params_init_info + + # Get the initialized logger, if not exist, + # create a logger named `mmcv` + logger_names = list(logger_initialized.keys()) + logger_name = logger_names[0] if logger_names else 'mmcv' + + from ..cnn import initialize + from ..cnn.utils.weight_init import update_init_info + module_name = self.__class__.__name__ + if not self._is_init: + if self.init_cfg: + print_log( + f'initialize {module_name} with init_cfg {self.init_cfg}', + logger=logger_name) + initialize(self, self.init_cfg) + if isinstance(self.init_cfg, dict): + # prevent the parameters of + # the pre-trained model + # from being overwritten by + # the `init_weights` + if self.init_cfg['type'] == 'Pretrained': + return + + for m in self.children(): + if hasattr(m, 'init_weights'): + m.init_weights() + # users may overload the `init_weights` + update_init_info( + m, + init_info=f'Initialized by ' + f'user-defined `init_weights`' + f' in {m.__class__.__name__} ') + + self._is_init = True + else: + warnings.warn(f'init_weights of {self.__class__.__name__} has ' + f'been called more than once.') + + if is_top_level_module: + self._dump_init_info(logger_name) + + for sub_module in self.modules(): + del sub_module._params_init_info + + @master_only + def _dump_init_info(self, logger_name): + """Dump the initialization information to a file named + `initialization.log.json` in workdir. + + Args: + logger_name (str): The name of logger. + """ + + logger = get_logger(logger_name) + + with_file_handler = False + # dump the information to the logger file if there is a `FileHandler` + for handler in logger.handlers: + if isinstance(handler, FileHandler): + handler.stream.write( + 'Name of parameter - Initialization information\n') + for name, param in self.named_parameters(): + handler.stream.write( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n") + handler.stream.flush() + with_file_handler = True + if not with_file_handler: + for name, param in self.named_parameters(): + print_log( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n ", + logger=logger_name) + + def __repr__(self): + s = super().__repr__() + if self.init_cfg: + s += f'\ninit_cfg={self.init_cfg}' + return s + + +class Sequential(BaseModule, nn.Sequential): + """Sequential module in openmmlab. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, *args, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.Sequential.__init__(self, *args) + + +class ModuleList(BaseModule, nn.ModuleList): + """ModuleList in openmmlab. + + Args: + modules (iterable, optional): an iterable of modules to add. + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, modules=None, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.ModuleList.__init__(self, modules) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_runner.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..a75a7d5db9f281fda10008636b24e2b98d9336a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/base_runner.py @@ -0,0 +1,542 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import annotator.mmpkg.mmcv as mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook +from .log_buffer import LogBuffer +from .priority import Priority, get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None): + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' + f'but got {type(batch_processor)}') + warnings.warn('batch_processor is deprecated, please implement ' + 'train_step() and val_step() in the model instead.') + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' + 'cannot be both available.') + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}') + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' + f'or dict or None, but got {type(optimizer)}') + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' + f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError( + f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError( + 'Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, + out_dir, + filename_tmpl, + save_optimizer=True, + meta=None, + create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError( + 'lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError( + 'momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def get_hook_info(self): + # Get hooks info in each stage + stage_hook_map = {stage: [] for stage in Hook.stages} + for hook in self.hooks: + try: + priority = Priority(hook.priority).name + except ValueError: + priority = hook.priority + classname = hook.__class__.__name__ + hook_info = f'({priority:<12}) {classname:<35}' + for trigger_stage in hook.get_triggered_stages(): + stage_hook_map[trigger_stage].append(hook_info) + + stage_hook_infos = [] + for stage in Hook.stages: + hook_infos = stage_hook_map[stage] + if len(hook_infos) > 0: + info = f'{stage}:\n' + info += '\n'.join(hook_infos) + info += '\n -------------------- ' + stage_hook_infos.append(info) + return '\n'.join(stage_hook_infos) + + def load_checkpoint(self, + filename, + map_location='cpu', + strict=False, + revise_keys=[(r'^module.', '')]): + return load_checkpoint( + self.model, + filename, + map_location, + strict, + self.logger, + revise_keys=revise_keys) + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if self.meta is None: + self.meta = {} + self.meta.setdefault('hook_msgs', {}) + # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages + self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) + + # Re-calculate the number of iterations when resuming + # models with different number of GPUs + if 'config' in checkpoint['meta']: + config = mmcv.Config.fromstring( + checkpoint['meta']['config'], file_format='.py') + previous_gpu_ids = config.get('gpu_ids', None) + if previous_gpu_ids and len(previous_gpu_ids) > 0 and len( + previous_gpu_ids) != self.world_size: + self._iter = int(self._iter * len(previous_gpu_ids) / + self.world_size) + self.logger.info('the iteration number is changed due to ' + 'change of GPU number') + + # resume meta information meta + self.meta = checkpoint['meta'] + + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if lr_config is None: + return + elif isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook, priority='VERY_HIGH') + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook, priority='HIGH') + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook, priority='ABOVE_NORMAL') + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook, priority='NORMAL') + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg( + info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_timer_hook(self, timer_config): + if timer_config is None: + return + if isinstance(timer_config, dict): + timer_config_ = copy.deepcopy(timer_config) + hook = mmcv.build_from_cfg(timer_config_, HOOKS) + else: + hook = timer_config + self.register_hook(hook, priority='LOW') + + def register_custom_hooks(self, custom_config): + if custom_config is None: + return + + if not isinstance(custom_config, list): + custom_config = [custom_config] + + for item in custom_config: + if isinstance(item, dict): + self.register_hook_from_cfg(item) + else: + self.register_hook(item, priority='NORMAL') + + def register_profiler_hook(self, profiler_config): + if profiler_config is None: + return + if isinstance(profiler_config, dict): + profiler_config.setdefault('type', 'ProfilerHook') + hook = mmcv.build_from_cfg(profiler_config, HOOKS) + else: + hook = profiler_config + self.register_hook(hook) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + timer_config=dict(type='IterTimerHook'), + custom_hooks_config=None): + """Register default and custom hooks for training. + + Default and custom hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_timer_hook(timer_config) + self.register_logger_hooks(log_config) + self.register_custom_hooks(custom_hooks_config) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..77c96ba0b2f30ead9da23f293c5dc84dd3e4a74f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/builder.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +from ..utils import Registry + +RUNNERS = Registry('runner') +RUNNER_BUILDERS = Registry('runner builder') + + +def build_runner_constructor(cfg): + return RUNNER_BUILDERS.build(cfg) + + +def build_runner(cfg, default_args=None): + runner_cfg = copy.deepcopy(cfg) + constructor_type = runner_cfg.pop('constructor', + 'DefaultRunnerConstructor') + runner_constructor = build_runner_constructor( + dict( + type=constructor_type, + runner_cfg=runner_cfg, + default_args=default_args)) + runner = runner_constructor() + return runner diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/checkpoint.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..d690be1dfe70b1b82eaac8fe4db7022b35d5426c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/checkpoint.py @@ -0,0 +1,707 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import re +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo + +import annotator.mmpkg.mmcv as mmcv +from ..fileio import FileClient +from ..fileio import load as load_file +from ..parallel import is_module_wrapper +from ..utils import mkdir_or_exist +from .dist_utils import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +class CheckpointLoader: + """A general checkpoint loader to manage all schemes.""" + + _schemes = {} + + @classmethod + def _register_scheme(cls, prefixes, loader, force=False): + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if (prefix not in cls._schemes) or force: + cls._schemes[prefix] = loader + else: + raise KeyError( + f'{prefix} is already registered as a loader backend, ' + 'add "force=True" if you want to override it') + # sort, longer prefixes take priority + cls._schemes = OrderedDict( + sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) + + @classmethod + def register_scheme(cls, prefixes, loader=None, force=False): + """Register a loader to CheckpointLoader. + + This method can be used as a normal class method or a decorator. + + Args: + prefixes (str or list[str] or tuple[str]): + The prefix of the registered loader. + loader (function, optional): The loader function to be registered. + When this method is used as a decorator, loader is None. + Defaults to None. + force (bool, optional): Whether to override the loader + if the prefix has already been registered. Defaults to False. + """ + + if loader is not None: + cls._register_scheme(prefixes, loader, force=force) + return + + def _register(loader_cls): + cls._register_scheme(prefixes, loader_cls, force=force) + return loader_cls + + return _register + + @classmethod + def _get_checkpoint_loader(cls, path): + """Finds a loader that supports the given path. Falls back to the local + loader if no other loader is found. + + Args: + path (str): checkpoint path + + Returns: + loader (function): checkpoint loader + """ + + for p in cls._schemes: + if path.startswith(p): + return cls._schemes[p] + + @classmethod + def load_checkpoint(cls, filename, map_location=None, logger=None): + """load checkpoint through URL scheme path. + + Args: + filename (str): checkpoint file name with given prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + logger (:mod:`logging.Logger`, optional): The logger for message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint_loader = cls._get_checkpoint_loader(filename) + class_name = checkpoint_loader.__name__ + mmcv.print_log( + f'load checkpoint from {class_name[10:]} path: {filename}', logger) + return checkpoint_loader(filename, map_location) + + +@CheckpointLoader.register_scheme(prefixes='') +def load_from_local(filename, map_location): + """load checkpoint by local file path. + + Args: + filename (str): local checkpoint file path + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) +def load_from_http(filename, map_location=None, model_dir=None): + """load checkpoint through HTTP or HTTPS scheme path. In distributed + setting, this function only download checkpoint at local rank 0. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + model_dir (string, optional): directory in which to save the object, + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='pavi://') +def load_from_pavi(filename, map_location=None): + """load checkpoint through the file path prefixed with pavi. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with pavi prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + assert filename.startswith('pavi://'), \ + f'Expected filename startswith `pavi://`, but get {filename}' + model_path = filename[7:] + + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='s3://') +def load_from_ceph(filename, map_location=None, backend='petrel'): + """load checkpoint through the file path prefixed with s3. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with s3 prefix + map_location (str, optional): Same as :func:`torch.load`. + backend (str, optional): The storage backend type. Options are 'ceph', + 'petrel'. Default: 'petrel'. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + allowed_backends = ['ceph', 'petrel'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + + if backend == 'ceph': + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + + # CephClient and PetrelBackend have the same prefix 's3://' and the latter + # will be chosen as default. If PetrelBackend can not be instantiated + # successfully, the CephClient will be chosen. + try: + file_client = FileClient(backend=backend) + except ImportError: + allowed_backends.remove(backend) + file_client = FileClient(backend=allowed_backends[0]) + + with io.BytesIO(file_client.get(filename)) as buffer: + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) +def load_from_torchvision(filename, map_location=None): + """load checkpoint through the file path prefixed with modelzoo or + torchvision. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + model_urls = get_torchvision_models() + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_name = filename[11:] + else: + model_name = filename[14:] + return load_from_http(model_urls[model_name], map_location=map_location) + + +@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) +def load_from_openmmlab(filename, map_location=None): + """load checkpoint through the file path prefixed with open-mmlab or + openmmlab. + + Args: + filename (str): checkpoint file path with open-mmlab or + openmmlab prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_external_models() + prefix_str = 'open-mmlab://' + if filename.startswith(prefix_str): + model_name = filename[13:] + else: + model_name = filename[12:] + prefix_str = 'openmmlab://' + + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'{prefix_str}{model_name} is deprecated in favor ' + f'of {prefix_str}{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_from_http(model_url, map_location=map_location) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='mmcls://') +def load_from_mmcls(filename, map_location=None): + """load checkpoint through the file path prefixed with mmcls. + + Args: + filename (str): checkpoint file path with mmcls prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_from_http( + model_urls[model_name], map_location=map_location) + checkpoint = _process_mmcls_checkpoint(checkpoint) + return checkpoint + + +def _load_checkpoint(filename, map_location=None, logger=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str, optional): Same as :func:`torch.load`. + Default: None. + logger (:mod:`logging.Logger`, optional): The logger for error message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + return CheckpointLoader.load_checkpoint(filename, map_location, logger) + + +def _load_checkpoint_with_prefix(prefix, filename, map_location=None): + """Load partial pretrained model with specific prefix. + + Args: + prefix (str): The prefix of sub-module. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint = _load_checkpoint(filename, map_location=map_location) + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + if not prefix.endswith('.'): + prefix += '.' + prefix_len = len(prefix) + + state_dict = { + k[prefix_len:]: v + for k, v in state_dict.items() if k.startswith(prefix) + } + + assert state_dict, f'{prefix} is not in the pretrained model' + return state_dict + + +def load_checkpoint(model, + filename, + map_location=None, + strict=False, + logger=None, + revise_keys=[(r'^module\.', '')]): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + revise_keys (list): A list of customized keywords to modify the + state_dict in checkpoint. Each item is a (pattern, replacement) + pair of the regular expression operations. Default: strip + the prefix 'module.' by [(r'^module\\.', '')]. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location, logger) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + # strip prefix of state_dict + metadata = getattr(state_dict, '_metadata', OrderedDict()) + for p, r in revise_keys: + state_dict = OrderedDict( + {re.sub(p, r, k): v + for k, v in state_dict.items()}) + # Keep metadata in state_dict + state_dict._metadata = metadata + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + # Keep metadata in state_dict + state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, + filename, + optimizer=None, + meta=None, + file_client_args=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + if file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" if filename starts with' + f'"pavi://", but got {file_client_args}') + try: + from pavi import modelcloud + from pavi import exception + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except exception.NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + file_client = FileClient.infer_client(file_client_args, filename) + with io.BytesIO() as f: + torch.save(checkpoint, f) + file_client.put(f.getvalue(), filename) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/default_constructor.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/default_constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..bdd7803289d6d70240977fa243d7f4432ccde8f8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/default_constructor.py @@ -0,0 +1,44 @@ +from .builder import RUNNER_BUILDERS, RUNNERS + + +@RUNNER_BUILDERS.register_module() +class DefaultRunnerConstructor: + """Default constructor for runners. + + Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. + For example, We can inject some new properties and functions for `Runner`. + + Example: + >>> from annotator.mmpkg.mmcv.runner import RUNNER_BUILDERS, build_runner + >>> # Define a new RunnerReconstructor + >>> @RUNNER_BUILDERS.register_module() + >>> class MyRunnerConstructor: + ... def __init__(self, runner_cfg, default_args=None): + ... if not isinstance(runner_cfg, dict): + ... raise TypeError('runner_cfg should be a dict', + ... f'but got {type(runner_cfg)}') + ... self.runner_cfg = runner_cfg + ... self.default_args = default_args + ... + ... def __call__(self): + ... runner = RUNNERS.build(self.runner_cfg, + ... default_args=self.default_args) + ... # Add new properties for existing runner + ... runner.my_name = 'my_runner' + ... runner.my_function = lambda self: print(self.my_name) + ... ... + >>> # build your runner + >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, + ... constructor='MyRunnerConstructor') + >>> runner = build_runner(runner_cfg) + """ + + def __init__(self, runner_cfg, default_args=None): + if not isinstance(runner_cfg, dict): + raise TypeError('runner_cfg should be a dict', + f'but got {type(runner_cfg)}') + self.runner_cfg = runner_cfg + self.default_args = default_args + + def __call__(self): + return RUNNERS.build(self.runner_cfg, default_args=self.default_args) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/dist_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/dist_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d3a1ef3fda5ceeb31bf15a73779da1b1903ab0fe --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/dist_utils.py @@ -0,0 +1,164 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/epoch_based_runner.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/epoch_based_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..d4df071e1740baa4aea2951590ac929b3715daa2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/epoch_based_runner.py @@ -0,0 +1,187 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch + +import annotator.mmpkg.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor( + self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, + **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' + 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self._iter += 1 + + self.call_hook('after_train_epoch') + self._epoch += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' + 'please set max_epochs in runner_config', DeprecationWarning) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, ( + 'max_epochs must be specified during instantiation') + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', + self.get_hook_info()) + self.logger.info('workflow: %s, max: %d epochs', workflow, + self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError( + f'runner has no method named "{mode}" to run an ' + 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError( + 'mode in workflow must be a str, but got {}'.format( + type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint(self, + out_dir, + filename_tmpl='epoch_{}.pth', + save_optimizer=True, + meta=None, + create_symlink=True): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn( + 'Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/fp16_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/fp16_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f6b54886519fd2808360b1632e5bebf6563eced2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/fp16_utils.py @@ -0,0 +1,410 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version +from .dist_utils import allreduce_grads as _allreduce_grads + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 + # manually, so the behavior may not be consistent with real amp. + from torch.cuda.amp import autocast +except ImportError: + pass + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, nn.Module): + return inputs + elif isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({ + k: cast_tensor_type(v, src_type, dst_type) + for k, v in inputs.items() + }) + elif isinstance(inputs, abc.Iterable): + return type(inputs)( + cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=True): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. If you are using PyTorch >= 1.6, + torch.cuda.amp is used as the backend, otherwise, original mmcv + implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=False): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + For PyTorch >= 1.6, this function will + 1. Set fp16 flag inside the model to True. + + Otherwise: + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + 3. Set `fp16_enabled` flag inside the model to True. + + Args: + model (nn.Module): Model in FP32. + """ + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.6.0')): + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): + module.float() + if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, + torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), + **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, + init_scale=2**32, + mode='dynamic', + scale_factor=2., + scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', + 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') \ + or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % \ + self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + def state_dict(self): + """Returns the state of the scaler as a :class:`dict`.""" + return dict( + cur_scale=self.cur_scale, + cur_iter=self.cur_iter, + mode=self.mode, + last_overflow_iter=self.last_overflow_iter, + scale_factor=self.scale_factor, + scale_window=self.scale_window) + + def load_state_dict(self, state_dict): + """Loads the loss_scaler state dict. + + Args: + state_dict (dict): scaler state. + """ + self.cur_scale = state_dict['cur_scale'] + self.cur_iter = state_dict['cur_iter'] + self.mode = state_dict['mode'] + self.last_overflow_iter = state_dict['last_overflow_iter'] + self.scale_factor = state_dict['scale_factor'] + self.scale_window = state_dict['scale_window'] + + @property + def loss_scale(self): + return self.cur_scale diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..915af28cefab14a14c1188ed861161080fd138a3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .checkpoint import CheckpointHook +from .closure import ClosureHook +from .ema import EMAHook +from .evaluation import DistEvalHook, EvalHook +from .hook import HOOKS, Hook +from .iter_timer import IterTimerHook +from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, + NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, + TextLoggerHook, WandbLoggerHook) +from .lr_updater import LrUpdaterHook +from .memory import EmptyCacheHook +from .momentum_updater import MomentumUpdaterHook +from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, OptimizerHook) +from .profiler import ProfilerHook +from .sampler_seed import DistSamplerSeedHook +from .sync_buffer import SyncBuffersHook + +__all__ = [ + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', + 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', + 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', + 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/checkpoint.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..877aa8b84ac48bea0a06f9d0733d74f88be2ecfc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/checkpoint.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings + +from annotator.mmpkg.mmcv.fileio import FileClient +from ..dist_utils import allreduce_params, master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + interval (int): The saving period. If ``by_epoch=True``, interval + indicates epochs, otherwise it indicates iterations. + Default: -1, which means "never". + by_epoch (bool): Saving checkpoints by epoch or by iteration. + Default: True. + save_optimizer (bool): Whether to save optimizer state_dict in the + checkpoint. It is usually used for resuming experiments. + Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, ``runner.work_dir`` will be used by default. If + specified, the ``out_dir`` will be the concatenation of ``out_dir`` + and the last level directory of ``runner.work_dir``. + `Changed in version 1.3.16.` + max_keep_ckpts (int, optional): The maximum checkpoints to keep. + In some cases we want only the latest few checkpoints and would + like to delete old ones to save the disk space. + Default: -1, which means unlimited. + save_last (bool, optional): Whether to force the last checkpoint to be + saved regardless of interval. Default: True. + sync_buffer (bool, optional): Whether to synchronize buffers in + different gpus. Default: False. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + + .. warning:: + Before v1.3.16, the ``out_dir`` argument indicates the path where the + checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the + root directory and the final path to save checkpoint is the + concatenation of ``out_dir`` and the last level directory of + ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" + and the value of ``runner.work_dir`` is "/path/of/B", then the final + path will be "/path/of/A/B". + """ + + def __init__(self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + save_last=True, + sync_buffer=False, + file_client_args=None, + **kwargs): + self.interval = interval + self.by_epoch = by_epoch + self.save_optimizer = save_optimizer + self.out_dir = out_dir + self.max_keep_ckpts = max_keep_ckpts + self.save_last = save_last + self.args = kwargs + self.sync_buffer = sync_buffer + self.file_client_args = file_client_args + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + + runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' + f'{self.file_client.name}.')) + + # disable the create_symlink option because some file backends do not + # allow to create a symlink + if 'create_symlink' in self.args: + if self.args[ + 'create_symlink'] and not self.file_client.allow_symlink: + self.args['create_symlink'] = False + warnings.warn( + ('create_symlink is set as True by the user but is changed' + 'to be False because creating symbolic link is not ' + f'allowed in {self.file_client.name}')) + else: + self.args['create_symlink'] = self.file_client.allow_symlink + + def after_train_epoch(self, runner): + if not self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` epochs + # 2. reach the last epoch of training + if self.every_n_epochs( + runner, self.interval) or (self.save_last + and self.is_last_epoch(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) + + @master_only + def _save_checkpoint(self, runner): + """Save the current checkpoint and delete unwanted checkpoint.""" + runner.save_checkpoint( + self.out_dir, save_optimizer=self.save_optimizer, **self.args) + if runner.meta is not None: + if self.by_epoch: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + else: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + runner.meta.setdefault('hook_msgs', dict()) + runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path( + self.out_dir, cur_ckpt_filename) + # remove other checkpoints + if self.max_keep_ckpts > 0: + if self.by_epoch: + name = 'epoch_{}.pth' + current_ckpt = runner.epoch + 1 + else: + name = 'iter_{}.pth' + current_ckpt = runner.iter + 1 + redundant_ckpts = range( + current_ckpt - self.max_keep_ckpts * self.interval, 0, + -self.interval) + filename_tmpl = self.args.get('filename_tmpl', name) + for _step in redundant_ckpts: + ckpt_path = self.file_client.join_path( + self.out_dir, filename_tmpl.format(_step)) + if self.file_client.isfile(ckpt_path): + self.file_client.remove(ckpt_path) + else: + break + + def after_train_iter(self, runner): + if self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` iterations + # 2. reach the last iteration of training + if self.every_n_iters( + runner, self.interval) or (self.save_last + and self.is_last_iter(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.iter + 1} iterations') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/closure.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/closure.py new file mode 100644 index 0000000000000000000000000000000000000000..b955f81f425be4ac3e6bb3f4aac653887989e872 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/closure.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ClosureHook(Hook): + + def __init__(self, fn_name, fn): + assert hasattr(self, fn_name) + assert callable(fn) + setattr(self, fn_name, fn) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/ema.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..15c7e68088f019802a59e7ae41cc1fe0c7f28f96 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/ema.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...parallel import is_module_wrapper +from ..hooks.hook import HOOKS, Hook + + +@HOOKS.register_module() +class EMAHook(Hook): + r"""Exponential Moving Average Hook. + + Use Exponential Moving Average on all parameters of model in training + process. All parameters have a ema backup, which update by the formula + as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. + + .. math:: + + \text{Xema\_{t+1}} = (1 - \text{momentum}) \times + \text{Xema\_{t}} + \text{momentum} \times X_t + + Args: + momentum (float): The momentum used for updating ema parameter. + Defaults to 0.0002. + interval (int): Update ema parameter every interval iteration. + Defaults to 1. + warm_up (int): During first warm_up steps, we may use smaller momentum + to update ema parameters more slowly. Defaults to 100. + resume_from (str): The checkpoint path. Defaults to None. + """ + + def __init__(self, + momentum=0.0002, + interval=1, + warm_up=100, + resume_from=None): + assert isinstance(interval, int) and interval > 0 + self.warm_up = warm_up + self.interval = interval + assert momentum > 0 and momentum < 1 + self.momentum = momentum**interval + self.checkpoint = resume_from + + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + model = runner.model + if is_module_wrapper(model): + model = model.module + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + + def after_train_iter(self, runner): + """Update ema parameter every self.interval iterations.""" + curr_step = runner.iter + # We warm up the momentum considering the instability at beginning + momentum = min(self.momentum, + (1 + curr_step) / (self.warm_up + curr_step)) + if curr_step % self.interval != 0: + return + for name, parameter in self.model_parameters.items(): + buffer_name = self.param_ema_buffer[name] + buffer_parameter = self.model_buffers[buffer_name] + buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) + + def after_train_epoch(self, runner): + """We load parameter values from ema backup to model before the + EvalHook.""" + self._swap_ema_parameters() + + def before_train_epoch(self, runner): + """We recover model's parameter from ema backup after last epoch's + EvalHook.""" + self._swap_ema_parameters() + + def _swap_ema_parameters(self): + """Swap the parameter of model with parameter in ema_buffer.""" + for name, value in self.model_parameters.items(): + temp = value.data.clone() + ema_buffer = self.model_buffers[self.param_ema_buffer[name]] + value.data.copy_(ema_buffer.data) + ema_buffer.data.copy_(temp) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..a1dbdfd593bae505a70534226b79791baec6453e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/evaluation.py @@ -0,0 +1,509 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from math import inf + +import torch.distributed as dist +from torch.nn.modules.batchnorm import _BatchNorm +from torch.utils.data import DataLoader + +from annotator.mmpkg.mmcv.fileio import FileClient +from annotator.mmpkg.mmcv.utils import is_seq_of +from .hook import Hook +from .logger import LoggerHook + + +class EvalHook(Hook): + """Non-Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in non-distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader, and return the test results. If ``None``, the default + test function ``mmcv.engine.single_gpu_test`` will be used. + (default: ``None``) + greater_keys (List[str] | None, optional): Metric keys that will be + inferred by 'greater' comparison rule. If ``None``, + _default_greater_keys will be used. (default: ``None``) + less_keys (List[str] | None, optional): Metric keys that will be + inferred by 'less' comparison rule. If ``None``, _default_less_keys + will be used. (default: ``None``) + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + `New in version 1.3.16.` + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + `New in version 1.3.16.` + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + + Notes: + If new arguments are added for EvalHook, tools/test.py, + tools/eval_metric.py may be affected. + """ + + # Since the key for determine greater or less is related to the downstream + # tasks, downstream repos may need to overwrite the following inner + # variable accordingly. + + rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} + init_value_map = {'greater': -inf, 'less': inf} + _default_greater_keys = [ + 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', + 'mAcc', 'aAcc' + ] + _default_less_keys = ['loss'] + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + out_dir=None, + file_client_args=None, + **eval_kwargs): + if not isinstance(dataloader, DataLoader): + raise TypeError(f'dataloader must be a pytorch DataLoader, ' + f'but got {type(dataloader)}') + + if interval <= 0: + raise ValueError(f'interval must be a positive number, ' + f'but got {interval}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' + + if start is not None and start < 0: + raise ValueError(f'The evaluation start epoch {start} is smaller ' + f'than 0') + + self.dataloader = dataloader + self.interval = interval + self.start = start + self.by_epoch = by_epoch + + assert isinstance(save_best, str) or save_best is None, \ + '""save_best"" should be a str or None ' \ + f'rather than {type(save_best)}' + self.save_best = save_best + self.eval_kwargs = eval_kwargs + self.initial_flag = True + + if test_fn is None: + from annotator.mmpkg.mmcv.engine import single_gpu_test + self.test_fn = single_gpu_test + else: + self.test_fn = test_fn + + if greater_keys is None: + self.greater_keys = self._default_greater_keys + else: + if not isinstance(greater_keys, (list, tuple)): + greater_keys = (greater_keys, ) + assert is_seq_of(greater_keys, str) + self.greater_keys = greater_keys + + if less_keys is None: + self.less_keys = self._default_less_keys + else: + if not isinstance(less_keys, (list, tuple)): + less_keys = (less_keys, ) + assert is_seq_of(less_keys, str) + self.less_keys = less_keys + + if self.save_best is not None: + self.best_ckpt_path = None + self._init_rule(rule, self.save_best) + + self.out_dir = out_dir + self.file_client_args = file_client_args + + def _init_rule(self, rule, key_indicator): + """Initialize rule, key_indicator, comparison_func, and best score. + + Here is the rule to determine which rule is used for key indicator + when the rule is not specific (note that the key indicator matching + is case-insensitive): + 1. If the key indicator is in ``self.greater_keys``, the rule will be + specified as 'greater'. + 2. Or if the key indicator is in ``self.less_keys``, the rule will be + specified as 'less'. + 3. Or if the key indicator is equal to the substring in any one item + in ``self.greater_keys``, the rule will be specified as 'greater'. + 4. Or if the key indicator is equal to the substring in any one item + in ``self.less_keys``, the rule will be specified as 'less'. + + Args: + rule (str | None): Comparison rule for best score. + key_indicator (str | None): Key indicator to determine the + comparison rule. + """ + if rule not in self.rule_map and rule is not None: + raise KeyError(f'rule must be greater, less or None, ' + f'but got {rule}.') + + if rule is None: + if key_indicator != 'auto': + # `_lc` here means we use the lower case of keys for + # case-insensitive matching + key_indicator_lc = key_indicator.lower() + greater_keys = [key.lower() for key in self.greater_keys] + less_keys = [key.lower() for key in self.less_keys] + + if key_indicator_lc in greater_keys: + rule = 'greater' + elif key_indicator_lc in less_keys: + rule = 'less' + elif any(key in key_indicator_lc for key in greater_keys): + rule = 'greater' + elif any(key in key_indicator_lc for key in less_keys): + rule = 'less' + else: + raise ValueError(f'Cannot infer the rule for key ' + f'{key_indicator}, thus a specific rule ' + f'must be specified.') + self.rule = rule + self.key_indicator = key_indicator + if self.rule is not None: + self.compare_func = self.rule_map[self.rule] + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'The best checkpoint will be saved to {self.out_dir} by ' + f'{self.file_client.name}')) + + if self.save_best is not None: + if runner.meta is None: + warnings.warn('runner.meta is None. Creating an empty one.') + runner.meta = dict() + runner.meta.setdefault('hook_msgs', dict()) + self.best_ckpt_path = runner.meta['hook_msgs'].get( + 'best_ckpt', None) + + def before_train_iter(self, runner): + """Evaluate the model only at the start of training by iteration.""" + if self.by_epoch or not self.initial_flag: + return + if self.start is not None and runner.iter >= self.start: + self.after_train_iter(runner) + self.initial_flag = False + + def before_train_epoch(self, runner): + """Evaluate the model only at the start of training by epoch.""" + if not (self.by_epoch and self.initial_flag): + return + if self.start is not None and runner.epoch >= self.start: + self.after_train_epoch(runner) + self.initial_flag = False + + def after_train_iter(self, runner): + """Called after every training iter to evaluate the results.""" + if not self.by_epoch and self._should_evaluate(runner): + # Because the priority of EvalHook is higher than LoggerHook, the + # training log and the evaluating log are mixed. Therefore, + # we need to dump the training log and clear it before evaluating + # log is generated. In addition, this problem will only appear in + # `IterBasedRunner` whose `self.by_epoch` is False, because + # `EpochBasedRunner` whose `self.by_epoch` is True calls + # `_do_evaluate` in `after_train_epoch` stage, and at this stage + # the training log has been printed, so it will not cause any + # problem. more details at + # https://github.com/open-mmlab/mmsegmentation/issues/694 + for hook in runner._hooks: + if isinstance(hook, LoggerHook): + hook.after_train_iter(runner) + runner.log_buffer.clear() + + self._do_evaluate(runner) + + def after_train_epoch(self, runner): + """Called after every training epoch to evaluate the results.""" + if self.by_epoch and self._should_evaluate(runner): + self._do_evaluate(runner) + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + results = self.test_fn(runner.model, self.dataloader) + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to save + # the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) + + def _should_evaluate(self, runner): + """Judge whether to perform evaluation. + + Here is the rule to judge whether to perform evaluation: + 1. It will not perform evaluation during the epoch/iteration interval, + which is determined by ``self.interval``. + 2. It will not perform evaluation if the start time is larger than + current time. + 3. It will not perform evaluation when current time is larger than + the start time but during epoch/iteration interval. + + Returns: + bool: The flag indicating whether to perform evaluation. + """ + if self.by_epoch: + current = runner.epoch + check_time = self.every_n_epochs + else: + current = runner.iter + check_time = self.every_n_iters + + if self.start is None: + if not check_time(runner, self.interval): + # No evaluation during the interval. + return False + elif (current + 1) < self.start: + # No evaluation if start is larger than the current time. + return False + else: + # Evaluation only at epochs/iters 3, 5, 7... + # if start==3 and interval==2 + if (current + 1 - self.start) % self.interval: + return False + return True + + def _save_ckpt(self, runner, key_score): + """Save the best checkpoint. + + It will compare the score according to the compare function, write + related information (best score, best checkpoint path) and save the + best checkpoint into ``work_dir``. + """ + if self.by_epoch: + current = f'epoch_{runner.epoch + 1}' + cur_type, cur_time = 'epoch', runner.epoch + 1 + else: + current = f'iter_{runner.iter + 1}' + cur_type, cur_time = 'iter', runner.iter + 1 + + best_score = runner.meta['hook_msgs'].get( + 'best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + best_score = key_score + runner.meta['hook_msgs']['best_score'] = best_score + + if self.best_ckpt_path and self.file_client.isfile( + self.best_ckpt_path): + self.file_client.remove(self.best_ckpt_path) + runner.logger.info( + (f'The previous best checkpoint {self.best_ckpt_path} was ' + 'removed')) + + best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' + self.best_ckpt_path = self.file_client.join_path( + self.out_dir, best_ckpt_name) + runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path + + runner.save_checkpoint( + self.out_dir, best_ckpt_name, create_symlink=False) + runner.logger.info( + f'Now best checkpoint is saved as {best_ckpt_name}.') + runner.logger.info( + f'Best {self.key_indicator} is {best_score:0.4f} ' + f'at {cur_time} {cur_type}.') + + def evaluate(self, runner, results): + """Evaluate the results. + + Args: + runner (:obj:`mmcv.Runner`): The underlined training runner. + results (list): Output results. + """ + eval_res = self.dataloader.dataset.evaluate( + results, logger=runner.logger, **self.eval_kwargs) + + for name, val in eval_res.items(): + runner.log_buffer.output[name] = val + runner.log_buffer.ready = True + + if self.save_best is not None: + # If the performance of model is pool, the `eval_res` may be an + # empty dict and it will raise exception when `self.save_best` is + # not None. More details at + # https://github.com/open-mmlab/mmdetection/issues/6265. + if not eval_res: + warnings.warn( + 'Since `eval_res` is an empty dict, the behavior to save ' + 'the best checkpoint will be skipped in this evaluation.') + return None + + if self.key_indicator == 'auto': + # infer from eval_results + self._init_rule(self.rule, list(eval_res.keys())[0]) + return eval_res[self.key_indicator] + + return None + + +class DistEvalHook(EvalHook): + """Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader in a multi-gpu manner, and return the test results. If + ``None``, the default test function ``mmcv.engine.multi_gpu_test`` + will be used. (default: ``None``) + tmpdir (str | None): Temporary directory to save the results of all + processes. Default: None. + gpu_collect (bool): Whether to use gpu or cpu to collect results. + Default: False. + broadcast_bn_buffer (bool): Whether to broadcast the + buffer(running_mean and running_var) of rank 0 to other rank + before evaluation. Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + """ + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + broadcast_bn_buffer=True, + tmpdir=None, + gpu_collect=False, + out_dir=None, + file_client_args=None, + **eval_kwargs): + + if test_fn is None: + from annotator.mmpkg.mmcv.engine import multi_gpu_test + test_fn = multi_gpu_test + + super().__init__( + dataloader, + start=start, + interval=interval, + by_epoch=by_epoch, + save_best=save_best, + rule=rule, + test_fn=test_fn, + greater_keys=greater_keys, + less_keys=less_keys, + out_dir=out_dir, + file_client_args=file_client_args, + **eval_kwargs) + + self.broadcast_bn_buffer = broadcast_bn_buffer + self.tmpdir = tmpdir + self.gpu_collect = gpu_collect + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + # Synchronization of BatchNorm's buffer (running_mean + # and running_var) is not supported in the DDP of pytorch, + # which may cause the inconsistent performance of models in + # different ranks, so we broadcast BatchNorm's buffers + # of rank 0 to other ranks to avoid this. + if self.broadcast_bn_buffer: + model = runner.model + for name, module in model.named_modules(): + if isinstance(module, + _BatchNorm) and module.track_running_stats: + dist.broadcast(module.running_var, 0) + dist.broadcast(module.running_mean, 0) + + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + + results = self.test_fn( + runner.model, + self.dataloader, + tmpdir=tmpdir, + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to + # save the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/hook.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/hook.py new file mode 100644 index 0000000000000000000000000000000000000000..bd31f985fee739ccb7ac62eefc6cef9f0c0d65d0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/hook.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from annotator.mmpkg.mmcv.utils import Registry, is_method_overridden + +HOOKS = Registry('hook') + + +class Hook: + stages = ('before_run', 'before_train_epoch', 'before_train_iter', + 'after_train_iter', 'after_train_epoch', 'before_val_epoch', + 'before_val_iter', 'after_val_iter', 'after_val_epoch', + 'after_run') + + def before_run(self, runner): + pass + + def after_run(self, runner): + pass + + def before_epoch(self, runner): + pass + + def after_epoch(self, runner): + pass + + def before_iter(self, runner): + pass + + def after_iter(self, runner): + pass + + def before_train_epoch(self, runner): + self.before_epoch(runner) + + def before_val_epoch(self, runner): + self.before_epoch(runner) + + def after_train_epoch(self, runner): + self.after_epoch(runner) + + def after_val_epoch(self, runner): + self.after_epoch(runner) + + def before_train_iter(self, runner): + self.before_iter(runner) + + def before_val_iter(self, runner): + self.before_iter(runner) + + def after_train_iter(self, runner): + self.after_iter(runner) + + def after_val_iter(self, runner): + self.after_iter(runner) + + def every_n_epochs(self, runner, n): + return (runner.epoch + 1) % n == 0 if n > 0 else False + + def every_n_inner_iters(self, runner, n): + return (runner.inner_iter + 1) % n == 0 if n > 0 else False + + def every_n_iters(self, runner, n): + return (runner.iter + 1) % n == 0 if n > 0 else False + + def end_of_epoch(self, runner): + return runner.inner_iter + 1 == len(runner.data_loader) + + def is_last_epoch(self, runner): + return runner.epoch + 1 == runner._max_epochs + + def is_last_iter(self, runner): + return runner.iter + 1 == runner._max_iters + + def get_triggered_stages(self): + trigger_stages = set() + for stage in Hook.stages: + if is_method_overridden(stage, Hook, self): + trigger_stages.add(stage) + + # some methods will be triggered in multi stages + # use this dict to map method to stages. + method_stages_map = { + 'before_epoch': ['before_train_epoch', 'before_val_epoch'], + 'after_epoch': ['after_train_epoch', 'after_val_epoch'], + 'before_iter': ['before_train_iter', 'before_val_iter'], + 'after_iter': ['after_train_iter', 'after_val_iter'], + } + + for method, map_stages in method_stages_map.items(): + if is_method_overridden(method, Hook, self): + trigger_stages.update(map_stages) + + return [stage for stage in Hook.stages if stage in trigger_stages] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/iter_timer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/iter_timer.py new file mode 100644 index 0000000000000000000000000000000000000000..cfd5002fe85ffc6992155ac01003878064a1d9be --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/iter_timer.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + + def before_epoch(self, runner): + self.t = time.time() + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + runner.log_buffer.update({'time': time.time() - self.t}) + self.t = time.time() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a0b6b345640a895368ac8a647afef6f24333d90e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import LoggerHook +from .dvclive import DvcliveLoggerHook +from .mlflow import MlflowLoggerHook +from .neptune import NeptuneLoggerHook +from .pavi import PaviLoggerHook +from .tensorboard import TensorboardLoggerHook +from .text import TextLoggerHook +from .wandb import WandbLoggerHook + +__all__ = [ + 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', + 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', + 'NeptuneLoggerHook', 'DvcliveLoggerHook' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/base.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/base.py new file mode 100644 index 0000000000000000000000000000000000000000..f845256729458ced821762a1b8ef881e17ff9955 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/base.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from abc import ABCMeta, abstractmethod + +import numpy as np +import torch + +from ..hook import Hook + + +class LoggerHook(Hook): + """Base class for logger hooks. + + Args: + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + by_epoch (bool): Whether EpochBasedRunner is used. + """ + + __metaclass__ = ABCMeta + + def __init__(self, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + self.interval = interval + self.ignore_last = ignore_last + self.reset_flag = reset_flag + self.by_epoch = by_epoch + + @abstractmethod + def log(self, runner): + pass + + @staticmethod + def is_scalar(val, include_np=True, include_torch=True): + """Tell the input variable is a scalar or not. + + Args: + val: Input variable. + include_np (bool): Whether include 0-d np.ndarray as a scalar. + include_torch (bool): Whether include 0-d torch.Tensor as a scalar. + + Returns: + bool: True or False. + """ + if isinstance(val, numbers.Number): + return True + elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: + return True + elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: + return True + else: + return False + + def get_mode(self, runner): + if runner.mode == 'train': + if 'time' in runner.log_buffer.output: + mode = 'train' + else: + mode = 'val' + elif runner.mode == 'val': + mode = 'val' + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return mode + + def get_epoch(self, runner): + if runner.mode == 'train': + epoch = runner.epoch + 1 + elif runner.mode == 'val': + # normal val mode + # runner.epoch += 1 has been done before val workflow + epoch = runner.epoch + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return epoch + + def get_iter(self, runner, inner_iter=False): + """Get the current training iteration step.""" + if self.by_epoch and inner_iter: + current_iter = runner.inner_iter + 1 + else: + current_iter = runner.iter + 1 + return current_iter + + def get_lr_tags(self, runner): + tags = {} + lrs = runner.current_lr() + if isinstance(lrs, dict): + for name, value in lrs.items(): + tags[f'learning_rate/{name}'] = value[0] + else: + tags['learning_rate'] = lrs[0] + return tags + + def get_momentum_tags(self, runner): + tags = {} + momentums = runner.current_momentum() + if isinstance(momentums, dict): + for name, value in momentums.items(): + tags[f'momentum/{name}'] = value[0] + else: + tags['momentum'] = momentums[0] + return tags + + def get_loggable_tags(self, + runner, + allow_scalar=True, + allow_text=False, + add_mode=True, + tags_to_skip=('time', 'data_time')): + tags = {} + for var, val in runner.log_buffer.output.items(): + if var in tags_to_skip: + continue + if self.is_scalar(val) and not allow_scalar: + continue + if isinstance(val, str) and not allow_text: + continue + if add_mode: + var = f'{self.get_mode(runner)}/{var}' + tags[var] = val + tags.update(self.get_lr_tags(runner)) + tags.update(self.get_momentum_tags(runner)) + return tags + + def before_run(self, runner): + for hook in runner.hooks[::-1]: + if isinstance(hook, LoggerHook): + hook.reset_flag = True + break + + def before_epoch(self, runner): + runner.log_buffer.clear() # clear logs of last epoch + + def after_train_iter(self, runner): + if self.by_epoch and self.every_n_inner_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif not self.by_epoch and self.every_n_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif self.end_of_epoch(runner) and not self.ignore_last: + # not precise but more stable + runner.log_buffer.average(self.interval) + + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_train_epoch(self, runner): + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_val_epoch(self, runner): + runner.log_buffer.average() + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/dvclive.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/dvclive.py new file mode 100644 index 0000000000000000000000000000000000000000..687cdc58c0336c92b1e4f9a410ba67ebaab2bc7a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/dvclive.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class DvcliveLoggerHook(LoggerHook): + """Class to log metrics with dvclive. + + It requires `dvclive`_ to be installed. + + Args: + path (str): Directory where dvclive will write TSV log files. + interval (int): Logging interval (every k iterations). + Default 10. + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + Default: True. + reset_flag (bool): Whether to clear the output buffer after logging. + Default: True. + by_epoch (bool): Whether EpochBasedRunner is used. + Default: True. + + .. _dvclive: + https://dvc.org/doc/dvclive + """ + + def __init__(self, + path, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + + super(DvcliveLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.path = path + self.import_dvclive() + + def import_dvclive(self): + try: + import dvclive + except ImportError: + raise ImportError( + 'Please run "pip install dvclive" to install dvclive') + self.dvclive = dvclive + + @master_only + def before_run(self, runner): + self.dvclive.init(self.path) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for k, v in tags.items(): + self.dvclive.log(k, v, step=self.get_iter(runner)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/mlflow.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/mlflow.py new file mode 100644 index 0000000000000000000000000000000000000000..f9a72592be47b534ce22573775fd5a7e8e86d72d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/mlflow.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class MlflowLoggerHook(LoggerHook): + + def __init__(self, + exp_name=None, + tags=None, + log_model=True, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + """Class to log metrics and (optionally) a trained model to MLflow. + + It requires `MLflow`_ to be installed. + + Args: + exp_name (str, optional): Name of the experiment to be used. + Default None. + If not None, set the active experiment. + If experiment does not exist, an experiment with provided name + will be created. + tags (dict of str: str, optional): Tags for the current run. + Default None. + If not None, set tags for the current run. + log_model (bool, optional): Whether to log an MLflow artifact. + Default True. + If True, log runner.model as an MLflow artifact + for the current run. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _MLflow: + https://www.mlflow.org/docs/latest/index.html + """ + super(MlflowLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_mlflow() + self.exp_name = exp_name + self.tags = tags + self.log_model = log_model + + def import_mlflow(self): + try: + import mlflow + import mlflow.pytorch as mlflow_pytorch + except ImportError: + raise ImportError( + 'Please run "pip install mlflow" to install mlflow') + self.mlflow = mlflow + self.mlflow_pytorch = mlflow_pytorch + + @master_only + def before_run(self, runner): + super(MlflowLoggerHook, self).before_run(runner) + if self.exp_name is not None: + self.mlflow.set_experiment(self.exp_name) + if self.tags is not None: + self.mlflow.set_tags(self.tags) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.mlflow.log_metrics(tags, step=self.get_iter(runner)) + + @master_only + def after_run(self, runner): + if self.log_model: + self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/neptune.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/neptune.py new file mode 100644 index 0000000000000000000000000000000000000000..7a38772b0c93a8608f32c6357b8616e77c139dc9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/neptune.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class NeptuneLoggerHook(LoggerHook): + """Class to log metrics to NeptuneAI. + + It requires `neptune-client` to be installed. + + Args: + init_kwargs (dict): a dict contains the initialization keys as below: + - project (str): Name of a project in a form of + namespace/project_name. If None, the value of + NEPTUNE_PROJECT environment variable will be taken. + - api_token (str): User’s API token. + If None, the value of NEPTUNE_API_TOKEN environment + variable will be taken. Note: It is strongly recommended + to use NEPTUNE_API_TOKEN environment variable rather than + placing your API token in plain text in your source code. + - name (str, optional, default is 'Untitled'): Editable name of + the run. Name is displayed in the run's Details and in + Runs table as a column. + Check https://docs.neptune.ai/api-reference/neptune#init for + more init arguments. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _NeptuneAI: + https://docs.neptune.ai/you-should-know/logging-metadata + """ + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=True, + with_step=True, + by_epoch=True): + + super(NeptuneLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_neptune() + self.init_kwargs = init_kwargs + self.with_step = with_step + + def import_neptune(self): + try: + import neptune.new as neptune + except ImportError: + raise ImportError( + 'Please run "pip install neptune-client" to install neptune') + self.neptune = neptune + self.run = None + + @master_only + def before_run(self, runner): + if self.init_kwargs: + self.run = self.neptune.init(**self.init_kwargs) + else: + self.run = self.neptune.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for tag_name, tag_value in tags.items(): + if self.with_step: + self.run[tag_name].log( + tag_value, step=self.get_iter(runner)) + else: + tags['global_step'] = self.get_iter(runner) + self.run[tag_name].log(tags) + + @master_only + def after_run(self, runner): + self.run.stop() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/pavi.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/pavi.py new file mode 100644 index 0000000000000000000000000000000000000000..5d1c4286920361e6b80f135b8d60b250f98f507a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/pavi.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import os +import os.path as osp + +import torch +import yaml + +import annotator.mmpkg.mmcv as mmcv +from ....parallel.utils import is_module_wrapper +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class PaviLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True, + img_key='img_info'): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.init_kwargs = init_kwargs + self.add_graph = add_graph + self.add_last_ckpt = add_last_ckpt + self.img_key = img_key + + @master_only + def before_run(self, runner): + super(PaviLoggerHook, self).before_run(runner) + try: + from pavi import SummaryWriter + except ImportError: + raise ImportError('Please run "pip install pavi" to install pavi.') + + self.run_name = runner.work_dir.split('/')[-1] + + if not self.init_kwargs: + self.init_kwargs = dict() + self.init_kwargs['name'] = self.run_name + self.init_kwargs['model'] = runner._model_name + if runner.meta is not None: + if 'config_dict' in runner.meta: + config_dict = runner.meta['config_dict'] + assert isinstance( + config_dict, + dict), ('meta["config_dict"] has to be of a dict, ' + f'but got {type(config_dict)}') + elif 'config_file' in runner.meta: + config_file = runner.meta['config_file'] + config_dict = dict(mmcv.Config.fromfile(config_file)) + else: + config_dict = None + if config_dict is not None: + # 'max_.*iter' is parsed in pavi sdk as the maximum iterations + # to properly set up the progress bar. + config_dict = config_dict.copy() + config_dict.setdefault('max_iter', runner.max_iters) + # non-serializable values are first converted in + # mmcv.dump to json + config_dict = json.loads( + mmcv.dump(config_dict, file_format='json')) + session_text = yaml.dump(config_dict) + self.init_kwargs['session_text'] = session_text + self.writer = SummaryWriter(**self.init_kwargs) + + def get_step(self, runner): + """Get the total training step/epoch.""" + if self.get_mode(runner) == 'val' and self.by_epoch: + return self.get_epoch(runner) + else: + return self.get_iter(runner) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, add_mode=False) + if tags: + self.writer.add_scalars( + self.get_mode(runner), tags, self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.add_last_ckpt: + ckpt_path = osp.join(runner.work_dir, 'latest.pth') + if osp.islink(ckpt_path): + ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) + + if osp.isfile(ckpt_path): + # runner.epoch += 1 has been done before `after_run`. + iteration = runner.epoch if self.by_epoch else runner.iter + return self.writer.add_snapshot_file( + tag=self.run_name, + snapshot_file_path=ckpt_path, + iteration=iteration) + + # flush the buffer and send a task ending signal to Pavi + self.writer.close() + + @master_only + def before_epoch(self, runner): + if runner.epoch == 0 and self.add_graph: + if is_module_wrapper(runner.model): + _model = runner.model.module + else: + _model = runner.model + device = next(_model.parameters()).device + data = next(iter(runner.data_loader)) + image = data[self.img_key][0:1].to(device) + with torch.no_grad(): + self.writer.add_graph(_model, image) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/tensorboard.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/tensorboard.py new file mode 100644 index 0000000000000000000000000000000000000000..7c480a560e90f5b06abb4afaf9597aaf7c1eaa82 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/tensorboard.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TensorboardLoggerHook(LoggerHook): + + def __init__(self, + log_dir=None, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.log_dir = log_dir + + @master_only + def before_run(self, runner): + super(TensorboardLoggerHook, self).before_run(runner) + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.1')): + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' + 'TensorboardLoggerHook.') + else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)') + + if self.log_dir is None: + self.log_dir = osp.join(runner.work_dir, 'tf_logs') + self.writer = SummaryWriter(self.log_dir) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, allow_text=True) + for tag, val in tags.items(): + if isinstance(val, str): + self.writer.add_text(tag, val, self.get_iter(runner)) + else: + self.writer.add_scalar(tag, val, self.get_iter(runner)) + + @master_only + def after_run(self, runner): + self.writer.close() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/text.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/text.py new file mode 100644 index 0000000000000000000000000000000000000000..0b30577469d5f70e544e1ce73816326e38dadb20 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/text.py @@ -0,0 +1,256 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import datetime +import os +import os.path as osp +from collections import OrderedDict + +import torch +import torch.distributed as dist + +import annotator.mmpkg.mmcv as mmcv +from annotator.mmpkg.mmcv.fileio.file_client import FileClient +from annotator.mmpkg.mmcv.utils import is_tuple_of, scandir +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TextLoggerHook(LoggerHook): + """Logger hook in text. + + In this logger hook, the information will be printed on terminal and + saved in json file. + + Args: + by_epoch (bool, optional): Whether EpochBasedRunner is used. + Default: True. + interval (int, optional): Logging interval (every k iterations). + Default: 10. + ignore_last (bool, optional): Ignore the log of last iterations in each + epoch if less than :attr:`interval`. Default: True. + reset_flag (bool, optional): Whether to clear the output buffer after + logging. Default: False. + interval_exp_name (int, optional): Logging interval for experiment + name. This feature is to help users conveniently get the experiment + information from screen or log file. Default: 1000. + out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. + If ``out_dir`` is specified, logs will be copied to a new directory + which is the concatenation of ``out_dir`` and the last level + directory of ``runner.work_dir``. Default: None. + `New in version 1.3.16.` + out_suffix (str or tuple[str], optional): Those filenames ending with + ``out_suffix`` will be copied to ``out_dir``. + Default: ('.log.json', '.log', '.py'). + `New in version 1.3.16.` + keep_local (bool, optional): Whether to keep local log when + :attr:`out_dir` is specified. If False, the local log will be + removed. Default: True. + `New in version 1.3.16.` + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + + def __init__(self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000, + out_dir=None, + out_suffix=('.log.json', '.log', '.py'), + keep_local=True, + file_client_args=None): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.by_epoch = by_epoch + self.time_sec_tot = 0 + self.interval_exp_name = interval_exp_name + + if out_dir is None and file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" when `out_dir` is not' + 'specified.') + self.out_dir = out_dir + + if not (out_dir is None or isinstance(out_dir, str) + or is_tuple_of(out_dir, str)): + raise TypeError('out_dir should be "None" or string or tuple of ' + 'string, but got {out_dir}') + self.out_suffix = out_suffix + + self.keep_local = keep_local + self.file_client_args = file_client_args + if self.out_dir is not None: + self.file_client = FileClient.infer_client(file_client_args, + self.out_dir) + + def before_run(self, runner): + super(TextLoggerHook, self).before_run(runner) + + if self.out_dir is not None: + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + # The final `self.out_dir` is the concatenation of `self.out_dir` + # and the last level directory of `runner.work_dir` + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'Text logs will be saved to {self.out_dir} by ' + f'{self.file_client.name} after the training process.')) + + self.start_iter = runner.iter + self.json_log_path = osp.join(runner.work_dir, + f'{runner.timestamp}.log.json') + if runner.meta is not None: + self._dump_log(runner.meta, runner) + + def _get_max_memory(self, runner): + device = getattr(runner.model, 'output_device', None) + mem = torch.cuda.max_memory_allocated(device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], + dtype=torch.int, + device=device) + if runner.world_size > 1: + dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) + return mem_mb.item() + + def _log_info(self, log_dict, runner): + # print exp name for users to distinguish experiments + # at every ``interval_exp_name`` iterations and the end of each epoch + if runner.meta is not None and 'exp_name' in runner.meta: + if (self.every_n_iters(runner, self.interval_exp_name)) or ( + self.by_epoch and self.end_of_epoch(runner)): + exp_info = f'Exp name: {runner.meta["exp_name"]}' + runner.logger.info(exp_info) + + if log_dict['mode'] == 'train': + if isinstance(log_dict['lr'], dict): + lr_str = [] + for k, val in log_dict['lr'].items(): + lr_str.append(f'lr_{k}: {val:.3e}') + lr_str = ' '.join(lr_str) + else: + lr_str = f'lr: {log_dict["lr"]:.3e}' + + # by epoch: Epoch [4][100/1000] + # by iter: Iter [100/100000] + if self.by_epoch: + log_str = f'Epoch [{log_dict["epoch"]}]' \ + f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + else: + log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' + log_str += f'{lr_str}, ' + + if 'time' in log_dict.keys(): + self.time_sec_tot += (log_dict['time'] * self.interval) + time_sec_avg = self.time_sec_tot / ( + runner.iter - self.start_iter + 1) + eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) + eta_str = str(datetime.timedelta(seconds=int(eta_sec))) + log_str += f'eta: {eta_str}, ' + log_str += f'time: {log_dict["time"]:.3f}, ' \ + f'data_time: {log_dict["data_time"]:.3f}, ' + # statistic memory + if torch.cuda.is_available(): + log_str += f'memory: {log_dict["memory"]}, ' + else: + # val/test time + # here 1000 is the length of the val dataloader + # by epoch: Epoch[val] [4][1000] + # by iter: Iter[val] [1000] + if self.by_epoch: + log_str = f'Epoch({log_dict["mode"]}) ' \ + f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' + else: + log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' + + log_items = [] + for name, val in log_dict.items(): + # TODO: resolve this hack + # these items have been in log_str + if name in [ + 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', + 'memory', 'epoch' + ]: + continue + if isinstance(val, float): + val = f'{val:.4f}' + log_items.append(f'{name}: {val}') + log_str += ', '.join(log_items) + + runner.logger.info(log_str) + + def _dump_log(self, log_dict, runner): + # dump log in json format + json_log = OrderedDict() + for k, v in log_dict.items(): + json_log[k] = self._round_float(v) + # only append log at last line + if runner.rank == 0: + with open(self.json_log_path, 'a+') as f: + mmcv.dump(json_log, f, file_format='json') + f.write('\n') + + def _round_float(self, items): + if isinstance(items, list): + return [self._round_float(item) for item in items] + elif isinstance(items, float): + return round(items, 5) + else: + return items + + def log(self, runner): + if 'eval_iter_num' in runner.log_buffer.output: + # this doesn't modify runner.iter and is regardless of by_epoch + cur_iter = runner.log_buffer.output.pop('eval_iter_num') + else: + cur_iter = self.get_iter(runner, inner_iter=True) + + log_dict = OrderedDict( + mode=self.get_mode(runner), + epoch=self.get_epoch(runner), + iter=cur_iter) + + # only record lr of the first param group + cur_lr = runner.current_lr() + if isinstance(cur_lr, list): + log_dict['lr'] = cur_lr[0] + else: + assert isinstance(cur_lr, dict) + log_dict['lr'] = {} + for k, lr_ in cur_lr.items(): + assert isinstance(lr_, list) + log_dict['lr'].update({k: lr_[0]}) + + if 'time' in runner.log_buffer.output: + # statistic memory + if torch.cuda.is_available(): + log_dict['memory'] = self._get_max_memory(runner) + + log_dict = dict(log_dict, **runner.log_buffer.output) + + self._log_info(log_dict, runner) + self._dump_log(log_dict, runner) + return log_dict + + def after_run(self, runner): + # copy or upload logs to self.out_dir + if self.out_dir is not None: + for filename in scandir(runner.work_dir, self.out_suffix, True): + local_filepath = osp.join(runner.work_dir, filename) + out_filepath = self.file_client.join_path( + self.out_dir, filename) + with open(local_filepath, 'r') as f: + self.file_client.put_text(f.read(), out_filepath) + + runner.logger.info( + (f'The file {local_filepath} has been uploaded to ' + f'{out_filepath}.')) + + if not self.keep_local: + os.remove(local_filepath) + runner.logger.info( + (f'{local_filepath} was removed due to the ' + '`self.keep_local=False`')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/wandb.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/wandb.py new file mode 100644 index 0000000000000000000000000000000000000000..9f6808462eb79ab2b04806a5d9f0d3dd079b5ea9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/logger/wandb.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class WandbLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=False, + commit=True, + by_epoch=True, + with_step=True): + super(WandbLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_wandb() + self.init_kwargs = init_kwargs + self.commit = commit + self.with_step = with_step + + def import_wandb(self): + try: + import wandb + except ImportError: + raise ImportError( + 'Please run "pip install wandb" to install wandb') + self.wandb = wandb + + @master_only + def before_run(self, runner): + super(WandbLoggerHook, self).before_run(runner) + if self.wandb is None: + self.import_wandb() + if self.init_kwargs: + self.wandb.init(**self.init_kwargs) + else: + self.wandb.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + if self.with_step: + self.wandb.log( + tags, step=self.get_iter(runner), commit=self.commit) + else: + tags['global_step'] = self.get_iter(runner) + self.wandb.log(tags, commit=self.commit) + + @master_only + def after_run(self, runner): + self.wandb.join() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/lr_updater.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/lr_updater.py new file mode 100644 index 0000000000000000000000000000000000000000..b9851d2ca3c4e60b95ad734c19a2484b9ca7c708 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/lr_updater.py @@ -0,0 +1,670 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from math import cos, pi + +import annotator.mmpkg.mmcv as mmcv +from .hook import HOOKS, Hook + + +class LrUpdaterHook(Hook): + """LR Scheduler in MMCV. + + Args: + by_epoch (bool): LR changes epoch by epoch + warmup (string): Type of warmup used. It can be None(use no warmup), + 'constant', 'linear' or 'exp' + warmup_iters (int): The number of iterations or epochs that warmup + lasts + warmup_ratio (float): LR used at the beginning of warmup equals to + warmup_ratio * initial_lr + warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters + means the number of epochs that warmup lasts, otherwise means the + number of iteration that warmup lasts + """ + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.1, + warmup_by_epoch=False): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_ratio" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + self.warmup_by_epoch = warmup_by_epoch + + if self.warmup_by_epoch: + self.warmup_epochs = self.warmup_iters + self.warmup_iters = None + else: + self.warmup_epochs = None + + self.base_lr = [] # initial lr for all param groups + self.regular_lr = [] # expected lr if no warming up is performed + + def _set_lr(self, runner, lr_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, lr in zip(optim.param_groups, lr_groups[k]): + param_group['lr'] = lr + else: + for param_group, lr in zip(runner.optimizer.param_groups, + lr_groups): + param_group['lr'] = lr + + def get_lr(self, runner, base_lr): + raise NotImplementedError + + def get_regular_lr(self, runner): + if isinstance(runner.optimizer, dict): + lr_groups = {} + for k in runner.optimizer.keys(): + _lr_group = [ + self.get_lr(runner, _base_lr) + for _base_lr in self.base_lr[k] + ] + lr_groups.update({k: _lr_group}) + + return lr_groups + else: + return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] + + def get_warmup_lr(self, cur_iters): + + def _get_warmup_lr(cur_iters, regular_lr): + if self.warmup == 'constant': + warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - + self.warmup_ratio) + warmup_lr = [_lr * (1 - k) for _lr in regular_lr] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_lr = [_lr * k for _lr in regular_lr] + return warmup_lr + + if isinstance(self.regular_lr, dict): + lr_groups = {} + for key, regular_lr in self.regular_lr.items(): + lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr) + return lr_groups + else: + return _get_warmup_lr(cur_iters, self.regular_lr) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + group.setdefault('initial_lr', group['lr']) + _base_lr = [ + group['initial_lr'] for group in optim.param_groups + ] + self.base_lr.update({k: _base_lr}) + else: + for group in runner.optimizer.param_groups: + group.setdefault('initial_lr', group['lr']) + self.base_lr = [ + group['initial_lr'] for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if self.warmup_iters is None: + epoch_len = len(runner.data_loader) + self.warmup_iters = self.warmup_epochs * epoch_len + + if not self.by_epoch: + return + + self.regular_lr = self.get_regular_lr(runner) + self._set_lr(runner, self.regular_lr) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_lr = self.get_regular_lr(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + + +@HOOKS.register_module() +class FixedLrUpdaterHook(LrUpdaterHook): + + def __init__(self, **kwargs): + super(FixedLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + return base_lr + + +@HOOKS.register_module() +class StepLrUpdaterHook(LrUpdaterHook): + """Step LR scheduler with min_lr clipping. + + Args: + step (int | list[int]): Step to decay the LR. If an int value is given, + regard it as the decay interval. If a list is given, decay LR at + these steps. + gamma (float, optional): Decay LR ratio. Default: 0.1. + min_lr (float, optional): Minimum LR value to keep. If LR after decay + is lower than `min_lr`, it will be clipped to this value. If None + is given, we don't perform lr clipping. Default: None. + """ + + def __init__(self, step, gamma=0.1, min_lr=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_lr = min_lr + super(StepLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + lr = base_lr * (self.gamma**exp) + if self.min_lr is not None: + # clip to a minimum value + lr = max(lr, self.min_lr) + return lr + + +@HOOKS.register_module() +class ExpLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, **kwargs): + self.gamma = gamma + super(ExpLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * self.gamma**progress + + +@HOOKS.register_module() +class PolyLrUpdaterHook(LrUpdaterHook): + + def __init__(self, power=1., min_lr=0., **kwargs): + self.power = power + self.min_lr = min_lr + super(PolyLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + coeff = (1 - progress / max_progress)**self.power + return (base_lr - self.min_lr) * coeff + self.min_lr + + +@HOOKS.register_module() +class InvLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, power=1., **kwargs): + self.gamma = gamma + self.power = power + super(InvLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * (1 + self.gamma * progress)**(-self.power) + + +@HOOKS.register_module() +class CosineAnnealingLrUpdaterHook(LrUpdaterHook): + + def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook): + """Flat + Cosine lr schedule. + + Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501 + + Args: + start_percent (float): When to start annealing the learning rate + after the percentage of the total training steps. + The value should be in range [0, 1). + Default: 0.75 + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, + start_percent=0.75, + min_lr=None, + min_lr_ratio=None, + **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + if start_percent < 0 or start_percent > 1 or not isinstance( + start_percent, float): + raise ValueError( + 'expected float between 0 and 1 start_percent, but ' + f'got {start_percent}') + self.start_percent = start_percent + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(FlatCosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + start = round(runner.max_epochs * self.start_percent) + progress = runner.epoch - start + max_progress = runner.max_epochs - start + else: + start = round(runner.max_iters * self.start_percent) + progress = runner.iter - start + max_progress = runner.max_iters - start + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + if progress < 0: + return base_lr + else: + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class CosineRestartLrUpdaterHook(LrUpdaterHook): + """Cosine annealing with restarts learning rate scheme. + + Args: + periods (list[int]): Periods for each cosine anneling cycle. + restart_weights (list[float], optional): Restart weights at each + restart iteration. Default: [1]. + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, + periods, + restart_weights=[1], + min_lr=None, + min_lr_ratio=None, + **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.periods = periods + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + self.restart_weights = restart_weights + assert (len(self.periods) == len(self.restart_weights) + ), 'periods and restart_weights should have the same length.' + super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) + + self.cumulative_periods = [ + sum(self.periods[0:i + 1]) for i in range(0, len(self.periods)) + ] + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + else: + progress = runner.iter + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + idx = get_position_from_periods(progress, self.cumulative_periods) + current_weight = self.restart_weights[idx] + nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] + current_periods = self.periods[idx] + + alpha = min((progress - nearest_restart) / current_periods, 1) + return annealing_cos(base_lr, target_lr, alpha, current_weight) + + +def get_position_from_periods(iteration, cumulative_periods): + """Get the position from a period list. + + It will return the index of the right-closest number in the period list. + For example, the cumulative_periods = [100, 200, 300, 400], + if iteration == 50, return 0; + if iteration == 210, return 2; + if iteration == 300, return 3. + + Args: + iteration (int): Current iteration. + cumulative_periods (list[int]): Cumulative period list. + + Returns: + int: The position of the right-closest number in the period list. + """ + for i, period in enumerate(cumulative_periods): + if iteration < period: + return i + raise ValueError(f'Current iteration {iteration} exceeds ' + f'cumulative_periods {cumulative_periods}') + + +@HOOKS.register_module() +class CyclicLrUpdaterHook(LrUpdaterHook): + """Cyclic LR Scheduler. + + Implement the cyclical learning rate policy (CLR) described in + https://arxiv.org/pdf/1506.01186.pdf + + Different from the original paper, we use cosine annealing rather than + triangular policy inside a cycle. This improves the performance in the + 3D detection area. + + Args: + by_epoch (bool): Whether to update LR by epoch. + target_ratio (tuple[float]): Relative ratio of the highest LR and the + lowest LR to the initial LR. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of LR in + the total cycle. + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. Default: 'cos'. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + anneal_strategy='cos', + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.lr_phases = [] # init lr_phases + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicLrUpdaterHook, self).before_run(runner) + # initiate lr_phases + # total lr_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.lr_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.lr_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.lr_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return self.anneal_func(base_lr * start_ratio, + base_lr * end_ratio, + progress / (end_iter - start_iter)) + + +@HOOKS.register_module() +class OneCycleLrUpdaterHook(LrUpdaterHook): + """One Cycle LR Scheduler. + + The 1cycle learning rate policy changes the learning rate after every + batch. The one cycle learning rate policy is described in + https://arxiv.org/pdf/1708.07120.pdf + + Args: + max_lr (float or list): Upper learning rate boundaries in the cycle + for each parameter group. + total_steps (int, optional): The total number of steps in the cycle. + Note that if a value is not provided here, it will be the max_iter + of runner. Default: None. + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + div_factor (float): Determines the initial learning rate via + initial_lr = max_lr/div_factor + Default: 25 + final_div_factor (float): Determines the minimum learning rate via + min_lr = initial_lr/final_div_factor + Default: 1e4 + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__(self, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy='cos', + div_factor=25, + final_div_factor=1e4, + three_phase=False, + **kwargs): + # validate by_epoch, currently only support by_epoch = False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], \ + 'currently only support "by_epoch" = False' + if not isinstance(max_lr, (numbers.Number, list, dict)): + raise ValueError('the type of max_lr must be the one of list or ' + f'dict, but got {type(max_lr)}') + self._max_lr = max_lr + if total_steps is not None: + if not isinstance(total_steps, int): + raise ValueError('the type of total_steps must be int, but' + f'got {type(total_steps)}') + self.total_steps = total_steps + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('expected float between 0 and 1 pct_start, but ' + f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must be one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.div_factor = div_factor + self.final_div_factor = final_div_factor + self.three_phase = three_phase + self.lr_phases = [] # init lr_phases + super(OneCycleLrUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if hasattr(self, 'total_steps'): + total_steps = self.total_steps + else: + total_steps = runner.max_iters + if total_steps < runner.max_iters: + raise ValueError( + 'The total steps must be greater than or equal to max ' + f'iterations {runner.max_iters} of runner, but total steps ' + f'is {total_steps}.') + + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + _max_lr = format_param(k, optim, self._max_lr) + self.base_lr[k] = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(optim.param_groups, self.base_lr[k]): + group.setdefault('initial_lr', lr) + else: + k = type(runner.optimizer).__name__ + _max_lr = format_param(k, runner.optimizer, self._max_lr) + self.base_lr = [lr / self.div_factor for lr in _max_lr] + for group, lr in zip(runner.optimizer.param_groups, self.base_lr): + group.setdefault('initial_lr', lr) + + if self.three_phase: + self.lr_phases.append( + [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append([ + float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1 + ]) + self.lr_phases.append( + [total_steps - 1, 1, 1 / self.final_div_factor]) + else: + self.lr_phases.append( + [float(self.pct_start * total_steps) - 1, 1, self.div_factor]) + self.lr_phases.append( + [total_steps - 1, self.div_factor, 1 / self.final_div_factor]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + start_iter = 0 + for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases): + if curr_iter <= end_iter: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr, + pct) + break + start_iter = end_iter + return lr + + +def annealing_cos(start, end, factor, weight=1): + """Calculate annealing cos learning rate. + + Cosine anneal from `weight * start + (1 - weight) * end` to `end` as + percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the cosine annealing. + end (float): The ending learing rate of the cosine annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + weight (float, optional): The combination factor of `start` and `end` + when calculating the actual starting learning rate. Default to 1. + """ + cos_out = cos(pi * factor) + 1 + return end + 0.5 * weight * (start - end) * cos_out + + +def annealing_linear(start, end, factor): + """Calculate annealing linear learning rate. + + Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the linear annealing. + end (float): The ending learing rate of the linear annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + """ + return start + (end - start) * factor + + +def format_param(name, optim, param): + if isinstance(param, numbers.Number): + return [param] * len(optim.param_groups) + elif isinstance(param, (list, tuple)): # multi param groups + if len(param) != len(optim.param_groups): + raise ValueError(f'expected {len(optim.param_groups)} ' + f'values for {name}, got {len(param)}') + return param + else: # multi optimizers + if name not in param: + raise KeyError(f'{name} is not found in {param.keys()}') + return param[name] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/memory.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..70cf9a838fb314e3bd3c07aadbc00921a81e83ed --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/memory.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class EmptyCacheHook(Hook): + + def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): + self._before_epoch = before_epoch + self._after_epoch = after_epoch + self._after_iter = after_iter + + def after_iter(self, runner): + if self._after_iter: + torch.cuda.empty_cache() + + def before_epoch(self, runner): + if self._before_epoch: + torch.cuda.empty_cache() + + def after_epoch(self, runner): + if self._after_epoch: + torch.cuda.empty_cache() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/momentum_updater.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/momentum_updater.py new file mode 100644 index 0000000000000000000000000000000000000000..cdc70246280c2318f51034bb6b66eade7b478b79 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/momentum_updater.py @@ -0,0 +1,493 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import annotator.mmpkg.mmcv as mmcv +from .hook import HOOKS, Hook +from .lr_updater import annealing_cos, annealing_linear, format_param + + +class MomentumUpdaterHook(Hook): + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.9): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_momentum" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + + self.base_momentum = [] # initial momentum for all param groups + self.regular_momentum = [ + ] # expected momentum if no warming up is performed + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, + momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, + momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, base_momentum): + raise NotImplementedError + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k in runner.optimizer.keys(): + _momentum_group = [ + self.get_momentum(runner, _base_momentum) + for _base_momentum in self.base_momentum[k] + ] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + return [ + self.get_momentum(runner, _base_momentum) + for _base_momentum in self.base_momentum + ] + + def get_warmup_momentum(self, cur_iters): + + def _get_warmup_momentum(cur_iters, regular_momentum): + if self.warmup == 'constant': + warmup_momentum = [ + _momentum / self.warmup_ratio + for _momentum in self.regular_momentum + ] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - + self.warmup_ratio) + warmup_momentum = [ + _momentum / (1 - k) for _momentum in self.regular_mom + ] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_momentum = [ + _momentum / k for _momentum in self.regular_mom + ] + return warmup_momentum + + if isinstance(self.regular_momentum, dict): + momentum_groups = {} + for key, regular_momentum in self.regular_momentum.items(): + momentum_groups[key] = _get_warmup_momentum( + cur_iters, regular_momentum) + return momentum_groups + else: + return _get_warmup_momentum(cur_iters, self.regular_momentum) + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, + # if 'initial_momentum' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_momentum = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + _base_momentum = [ + group['initial_momentum'] for group in optim.param_groups + ] + self.base_momentum.update({k: _base_momentum}) + else: + for group in runner.optimizer.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + self.base_momentum = [ + group['initial_momentum'] + for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if not self.by_epoch: + return + self.regular_mom = self.get_regular_momentum(runner) + self._set_momentum(runner, self.regular_mom) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_mom = self.get_regular_momentum(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + + +@HOOKS.register_module() +class StepMomentumUpdaterHook(MomentumUpdaterHook): + """Step momentum scheduler with min value clipping. + + Args: + step (int | list[int]): Step to decay the momentum. If an int value is + given, regard it as the decay interval. If a list is given, decay + momentum at these steps. + gamma (float, optional): Decay momentum ratio. Default: 0.5. + min_momentum (float, optional): Minimum momentum value to keep. If + momentum after decay is lower than this value, it will be clipped + accordingly. If None is given, we don't perform lr clipping. + Default: None. + """ + + def __init__(self, step, gamma=0.5, min_momentum=None, **kwargs): + if isinstance(step, list): + assert mmcv.is_list_of(step, int) + assert all([s > 0 for s in step]) + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + self.min_momentum = min_momentum + super(StepMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + progress = runner.epoch if self.by_epoch else runner.iter + + # calculate exponential term + if isinstance(self.step, int): + exp = progress // self.step + else: + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + + momentum = base_momentum * (self.gamma**exp) + if self.min_momentum is not None: + # clip to a minimum value + momentum = max(momentum, self.min_momentum) + return momentum + + +@HOOKS.register_module() +class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): + + def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): + assert (min_momentum is None) ^ (min_momentum_ratio is None) + self.min_momentum = min_momentum + self.min_momentum_ratio = min_momentum_ratio + super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + if self.min_momentum_ratio is not None: + target_momentum = base_momentum * self.min_momentum_ratio + else: + target_momentum = self.min_momentum + return annealing_cos(base_momentum, target_momentum, + progress / max_progress) + + +@HOOKS.register_module() +class CyclicMomentumUpdaterHook(MomentumUpdaterHook): + """Cyclic momentum Scheduler. + + Implement the cyclical momentum scheduler policy described in + https://arxiv.org/pdf/1708.07120.pdf + + This momentum scheduler usually used together with the CyclicLRUpdater + to improve the performance in the 3D detection area. + + Attributes: + target_ratio (tuple[float]): Relative ratio of the lowest momentum and + the highest momentum to the initial momentum. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of momentum + in the total cycle. + by_epoch (bool): Whether to update momentum by epoch. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.momentum_phases = [] # init momentum_phases + # currently only support by_epoch=False + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicMomentumUpdaterHook, self).before_run(runner) + # initiate momentum_phases + # total momentum_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.momentum_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.momentum_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_momentum(self, runner, base_momentum): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.momentum_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return annealing_cos(base_momentum * start_ratio, + base_momentum * end_ratio, + progress / (end_iter - start_iter)) + + +@HOOKS.register_module() +class OneCycleMomentumUpdaterHook(MomentumUpdaterHook): + """OneCycle momentum Scheduler. + + This momentum scheduler usually used together with the OneCycleLrUpdater + to improve the performance. + + Args: + base_momentum (float or list): Lower momentum boundaries in the cycle + for each parameter group. Note that momentum is cycled inversely + to learning rate; at the peak of a cycle, momentum is + 'base_momentum' and learning rate is 'max_lr'. + Default: 0.85 + max_momentum (float or list): Upper momentum boundaries in the cycle + for each parameter group. Functionally, + it defines the cycle amplitude (max_momentum - base_momentum). + Note that momentum is cycled inversely + to learning rate; at the start of a cycle, momentum is + 'max_momentum' and learning rate is 'base_lr' + Default: 0.95 + pct_start (float): The percentage of the cycle (in number of steps) + spent increasing the learning rate. + Default: 0.3 + anneal_strategy (str): {'cos', 'linear'} + Specifies the annealing strategy: 'cos' for cosine annealing, + 'linear' for linear annealing. + Default: 'cos' + three_phase (bool): If three_phase is True, use a third phase of the + schedule to annihilate the learning rate according to + final_div_factor instead of modifying the second phase (the first + two phases will be symmetrical about the step indicated by + pct_start). + Default: False + """ + + def __init__(self, + base_momentum=0.85, + max_momentum=0.95, + pct_start=0.3, + anneal_strategy='cos', + three_phase=False, + **kwargs): + # validate by_epoch, currently only support by_epoch=False + if 'by_epoch' not in kwargs: + kwargs['by_epoch'] = False + else: + assert not kwargs['by_epoch'], \ + 'currently only support "by_epoch" = False' + if not isinstance(base_momentum, (float, list, dict)): + raise ValueError('base_momentum must be the type among of float,' + 'list or dict.') + self._base_momentum = base_momentum + if not isinstance(max_momentum, (float, list, dict)): + raise ValueError('max_momentum must be the type among of float,' + 'list or dict.') + self._max_momentum = max_momentum + # validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError('Expected float between 0 and 1 pct_start, but ' + f'got {pct_start}') + self.pct_start = pct_start + # validate anneal_strategy + if anneal_strategy not in ['cos', 'linear']: + raise ValueError('anneal_strategy must by one of "cos" or ' + f'"linear", instead got {anneal_strategy}') + elif anneal_strategy == 'cos': + self.anneal_func = annealing_cos + elif anneal_strategy == 'linear': + self.anneal_func = annealing_linear + self.three_phase = three_phase + self.momentum_phases = [] # init momentum_phases + super(OneCycleMomentumUpdaterHook, self).__init__(**kwargs) + + def before_run(self, runner): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + if ('momentum' not in optim.defaults + and 'betas' not in optim.defaults): + raise ValueError('optimizer must support momentum with' + 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip( + optim.param_groups, _base_momentum, _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + else: + optim = runner.optimizer + if ('momentum' not in optim.defaults + and 'betas' not in optim.defaults): + raise ValueError('optimizer must support momentum with' + 'option enabled') + self.use_beta1 = 'betas' in optim.defaults + k = type(optim).__name__ + _base_momentum = format_param(k, optim, self._base_momentum) + _max_momentum = format_param(k, optim, self._max_momentum) + for group, b_momentum, m_momentum in zip(optim.param_groups, + _base_momentum, + _max_momentum): + if self.use_beta1: + _, beta2 = group['betas'] + group['betas'] = (m_momentum, beta2) + else: + group['momentum'] = m_momentum + group['base_momentum'] = b_momentum + group['max_momentum'] = m_momentum + + if self.three_phase: + self.momentum_phases.append({ + 'end_iter': + float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': + 'max_momentum', + 'end_momentum': + 'base_momentum' + }) + self.momentum_phases.append({ + 'end_iter': + float(2 * self.pct_start * runner.max_iters) - 2, + 'start_momentum': + 'base_momentum', + 'end_momentum': + 'max_momentum' + }) + self.momentum_phases.append({ + 'end_iter': runner.max_iters - 1, + 'start_momentum': 'max_momentum', + 'end_momentum': 'max_momentum' + }) + else: + self.momentum_phases.append({ + 'end_iter': + float(self.pct_start * runner.max_iters) - 1, + 'start_momentum': + 'max_momentum', + 'end_momentum': + 'base_momentum' + }) + self.momentum_phases.append({ + 'end_iter': runner.max_iters - 1, + 'start_momentum': 'base_momentum', + 'end_momentum': 'max_momentum' + }) + + def _set_momentum(self, runner, momentum_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, mom in zip(optim.param_groups, + momentum_groups[k]): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + else: + for param_group, mom in zip(runner.optimizer.param_groups, + momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, param_group): + curr_iter = runner.iter + start_iter = 0 + for i, phase in enumerate(self.momentum_phases): + end_iter = phase['end_iter'] + if curr_iter <= end_iter or i == len(self.momentum_phases) - 1: + pct = (curr_iter - start_iter) / (end_iter - start_iter) + momentum = self.anneal_func( + param_group[phase['start_momentum']], + param_group[phase['end_momentum']], pct) + break + start_iter = end_iter + return momentum + + def get_regular_momentum(self, runner): + if isinstance(runner.optimizer, dict): + momentum_groups = {} + for k, optim in runner.optimizer.items(): + _momentum_group = [ + self.get_momentum(runner, param_group) + for param_group in optim.param_groups + ] + momentum_groups.update({k: _momentum_group}) + return momentum_groups + else: + momentum_groups = [] + for param_group in runner.optimizer.param_groups: + momentum_groups.append(self.get_momentum(runner, param_group)) + return momentum_groups diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/optimizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..580a183639a5d95c04ecae9c619afb795a169e9e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/optimizer.py @@ -0,0 +1,508 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from annotator.mmpkg.mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + from torch.cuda.amp import GradScaler +except ImportError: + pass + + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + + +@HOOKS.register_module() +class GradientCumulativeOptimizerHook(OptimizerHook): + """Optimizer Hook implements multi-iters gradient cumulating. + + Args: + cumulative_iters (int, optional): Num of gradient cumulative iters. + The optimizer will step every `cumulative_iters` iters. + Defaults to 1. + + Examples: + >>> # Use cumulative_iters to simulate a large batch size + >>> # It is helpful when the hardware cannot handle a large batch size. + >>> loader = DataLoader(data, batch_size=64) + >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4) + >>> # almost equals to + >>> loader = DataLoader(data, batch_size=256) + >>> optim_hook = OptimizerHook() + """ + + def __init__(self, cumulative_iters=1, **kwargs): + super(GradientCumulativeOptimizerHook, self).__init__(**kwargs) + + assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \ + f'cumulative_iters only accepts positive int, but got ' \ + f'{type(cumulative_iters)} instead.' + + self.cumulative_iters = cumulative_iters + self.divisible_iters = 0 + self.remainder_iters = 0 + self.initialized = False + + def has_batch_norm(self, module): + if isinstance(module, _BatchNorm): + return True + for m in module.children(): + if self.has_batch_norm(m): + return True + return False + + def _init(self, runner): + if runner.iter % self.cumulative_iters != 0: + runner.logger.warning( + 'Resume iter number is not divisible by cumulative_iters in ' + 'GradientCumulativeOptimizerHook, which means the gradient of ' + 'some iters is lost and the result may be influenced slightly.' + ) + + if self.has_batch_norm(runner.model) and self.cumulative_iters > 1: + runner.logger.warning( + 'GradientCumulativeOptimizerHook may slightly decrease ' + 'performance if the model has BatchNorm layers.') + + residual_iters = runner.max_iters - runner.iter + + self.divisible_iters = ( + residual_iters // self.cumulative_iters * self.cumulative_iters) + self.remainder_iters = residual_iters - self.divisible_iters + + self.initialized = True + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + loss.backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + runner.optimizer.zero_grad() + + +if (TORCH_VERSION != 'parrots' + and digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (using PyTorch's implementation). + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of GradScalar. + Defaults to 512. For Pytorch >= 1.6, mmcv uses official + implementation of GradScaler. If you use a dict version of + loss_scale to create GradScaler, please refer to: + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler + for the parameters. + + Examples: + >>> loss_scale = dict( + ... init_scale=65536.0, + ... growth_factor=2.0, + ... backoff_factor=0.5, + ... growth_interval=2000 + ... ) + >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale) + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + self._scale_update_param = None + if loss_scale == 'dynamic': + self.loss_scaler = GradScaler() + elif isinstance(loss_scale, float): + self._scale_update_param = loss_scale + self.loss_scaler = GradScaler(init_scale=loss_scale) + elif isinstance(loss_scale, dict): + self.loss_scaler = GradScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training.""" + # wrap model mode to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, + fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new( + fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), + fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer to + https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler. + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients. + 3. Unscale the optimizer’s gradient tensors. + 4. Call optimizer.step() and update scale factor. + 5. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + + self.loss_scaler.scale(runner.outputs['loss']).backward() + self.loss_scaler.unscale_(runner.optimizer) + # grad clip + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, + Fp16OptimizerHook): + """Fp16 optimizer Hook (using PyTorch's implementation) implements + multi-iters gradient cumulating. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend, + to take care of the optimization procedure. + """ + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, + self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + loss = runner.outputs['loss'] + loss = loss / loss_factor + + self.loss_scaler.scale(loss).backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + # copy fp16 grads in the model to fp32 params in the optimizer + self.loss_scaler.unscale_(runner.optimizer) + + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + + # backward and update scaler + self.loss_scaler.step(runner.optimizer) + self.loss_scaler.update(self._scale_update_param) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() + +else: + + @HOOKS.register_module() + class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook (mmcv's implementation). + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor configuration. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler( + init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] + for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + # resume from state dict + if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']: + scaler_state_dict = runner.meta['fp16']['loss_scaler'] + self.loss_scaler.load_state_dict(scaler_state_dict) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, + fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new( + fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), + fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + 6. Save loss_scaler state_dict for resume purpose. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, + self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + @HOOKS.register_module() + class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook, + Fp16OptimizerHook): + """Fp16 optimizer Hook (using mmcv implementation) implements multi- + iters gradient cumulating.""" + + def __init__(self, *args, **kwargs): + super(GradientCumulativeFp16OptimizerHook, + self).__init__(*args, **kwargs) + + def after_train_iter(self, runner): + if not self.initialized: + self._init(runner) + + if runner.iter < self.divisible_iters: + loss_factor = self.cumulative_iters + else: + loss_factor = self.remainder_iters + + loss = runner.outputs['loss'] + loss = loss / loss_factor + + # scale the loss value + scaled_loss = loss * self.loss_scaler.loss_scale + scaled_loss.backward() + + if (self.every_n_iters(runner, self.cumulative_iters) + or self.is_last_iter(runner)): + + # copy fp16 grads in the model to fp32 params in the optimizer + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, + self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update( + {'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + else: + runner.logger.warning( + 'Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') + + self.loss_scaler.update_scale(has_overflow) + + # save state_dict of loss_scaler + runner.meta.setdefault( + 'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict() + + # clear grads + runner.model.zero_grad() + runner.optimizer.zero_grad() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/profiler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..b70236997eec59c2209ef351ae38863b4112d0ec --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/profiler.py @@ -0,0 +1,180 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from typing import Callable, List, Optional, Union + +import torch + +from ..dist_utils import master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ProfilerHook(Hook): + """Profiler to analyze performance during training. + + PyTorch Profiler is a tool that allows the collection of the performance + metrics during the training. More details on Profiler can be found at + https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile + + Args: + by_epoch (bool): Profile performance by epoch or by iteration. + Default: True. + profile_iters (int): Number of iterations for profiling. + If ``by_epoch=True``, profile_iters indicates that they are the + first profile_iters epochs at the beginning of the + training, otherwise it indicates the first profile_iters + iterations. Default: 1. + activities (list[str]): List of activity groups (CPU, CUDA) to use in + profiling. Default: ['cpu', 'cuda']. + schedule (dict, optional): Config of generating the callable schedule. + if schedule is None, profiler will not add step markers into the + trace and table view. Default: None. + on_trace_ready (callable, dict): Either a handler or a dict of generate + handler. Default: None. + record_shapes (bool): Save information about operator's input shapes. + Default: False. + profile_memory (bool): Track tensor memory allocation/deallocation. + Default: False. + with_stack (bool): Record source information (file and line number) + for the ops. Default: False. + with_flops (bool): Use formula to estimate the FLOPS of specific + operators (matrix multiplication and 2D convolution). + Default: False. + json_trace_path (str, optional): Exports the collected trace in Chrome + JSON format. Default: None. + + Example: + >>> runner = ... # instantiate a Runner + >>> # tensorboard trace + >>> trace_config = dict(type='tb_trace', dir_name='work_dir') + >>> profiler_config = dict(on_trace_ready=trace_config) + >>> runner.register_profiler_hook(profiler_config) + >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)]) + """ + + def __init__(self, + by_epoch: bool = True, + profile_iters: int = 1, + activities: List[str] = ['cpu', 'cuda'], + schedule: Optional[dict] = None, + on_trace_ready: Optional[Union[Callable, dict]] = None, + record_shapes: bool = False, + profile_memory: bool = False, + with_stack: bool = False, + with_flops: bool = False, + json_trace_path: Optional[str] = None) -> None: + try: + from torch import profiler # torch version >= 1.8.1 + except ImportError: + raise ImportError('profiler is the new feature of torch1.8.1, ' + f'but your version is {torch.__version__}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' + self.by_epoch = by_epoch + + if profile_iters < 1: + raise ValueError('profile_iters should be greater than 0, but got ' + f'{profile_iters}') + self.profile_iters = profile_iters + + if not isinstance(activities, list): + raise ValueError( + f'activities should be list, but got {type(activities)}') + self.activities = [] + for activity in activities: + activity = activity.lower() + if activity == 'cpu': + self.activities.append(profiler.ProfilerActivity.CPU) + elif activity == 'cuda': + self.activities.append(profiler.ProfilerActivity.CUDA) + else: + raise ValueError( + f'activity should be "cpu" or "cuda", but got {activity}') + + if schedule is not None: + self.schedule = profiler.schedule(**schedule) + else: + self.schedule = None + + self.on_trace_ready = on_trace_ready + self.record_shapes = record_shapes + self.profile_memory = profile_memory + self.with_stack = with_stack + self.with_flops = with_flops + self.json_trace_path = json_trace_path + + @master_only + def before_run(self, runner): + if self.by_epoch and runner.max_epochs < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' + f'{runner.max_epochs}') + + if not self.by_epoch and runner.max_iters < self.profile_iters: + raise ValueError('self.profile_iters should not be greater than ' + f'{runner.max_iters}') + + if callable(self.on_trace_ready): # handler + _on_trace_ready = self.on_trace_ready + elif isinstance(self.on_trace_ready, dict): # config of handler + trace_cfg = self.on_trace_ready.copy() + trace_type = trace_cfg.pop('type') # log_trace handler + if trace_type == 'log_trace': + + def _log_handler(prof): + print(prof.key_averages().table(**trace_cfg)) + + _on_trace_ready = _log_handler + elif trace_type == 'tb_trace': # tensorboard_trace handler + try: + import torch_tb_profiler # noqa: F401 + except ImportError: + raise ImportError('please run "pip install ' + 'torch-tb-profiler" to install ' + 'torch_tb_profiler') + _on_trace_ready = torch.profiler.tensorboard_trace_handler( + **trace_cfg) + else: + raise ValueError('trace_type should be "log_trace" or ' + f'"tb_trace", but got {trace_type}') + elif self.on_trace_ready is None: + _on_trace_ready = None # type: ignore + else: + raise ValueError('on_trace_ready should be handler, dict or None, ' + f'but got {type(self.on_trace_ready)}') + + if runner.max_epochs > 1: + warnings.warn(f'profiler will profile {runner.max_epochs} epochs ' + 'instead of 1 epoch. Since profiler will slow down ' + 'the training, it is recommended to train 1 epoch ' + 'with ProfilerHook and adjust your setting according' + ' to the profiler summary. During normal training ' + '(epoch > 1), you may disable the ProfilerHook.') + + self.profiler = torch.profiler.profile( + activities=self.activities, + schedule=self.schedule, + on_trace_ready=_on_trace_ready, + record_shapes=self.record_shapes, + profile_memory=self.profile_memory, + with_stack=self.with_stack, + with_flops=self.with_flops) + + self.profiler.__enter__() + runner.logger.info('profiler is profiling...') + + @master_only + def after_train_epoch(self, runner): + if self.by_epoch and runner.epoch == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) + + @master_only + def after_train_iter(self, runner): + self.profiler.step() + if not self.by_epoch and runner.iter == self.profile_iters - 1: + runner.logger.info('profiler may take a few minutes...') + self.profiler.__exit__(None, None, None) + if self.json_trace_path is not None: + self.profiler.export_chrome_trace(self.json_trace_path) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sampler_seed.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sampler_seed.py new file mode 100644 index 0000000000000000000000000000000000000000..ee0dc6bdd8df5775857028aaed5444c0f59caf80 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sampler_seed.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class DistSamplerSeedHook(Hook): + """Data-loading sampler for distributed training. + + When distributed training, it is only useful in conjunction with + :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same + purpose with :obj:`IterLoader`. + """ + + def before_epoch(self, runner): + if hasattr(runner.data_loader.sampler, 'set_epoch'): + # in case the data loader uses `SequentialSampler` in Pytorch + runner.data_loader.sampler.set_epoch(runner.epoch) + elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): + # batch sampler in pytorch warps the sampler as its attributes. + runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sync_buffer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sync_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..6376b7ff894280cb2782243b25e8973650591577 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/hooks/sync_buffer.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..dist_utils import allreduce_params +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class SyncBuffersHook(Hook): + """Synchronize model buffers such as running_mean and running_var in BN at + the end of each epoch. + + Args: + distributed (bool): Whether distributed training is used. It is + effective only for distributed training. Defaults to True. + """ + + def __init__(self, distributed=True): + self.distributed = distributed + + def after_epoch(self, runner): + """All-reduce model buffers at the end of each epoch.""" + if self.distributed: + allreduce_params(runner.model.buffers()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/iter_based_runner.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/iter_based_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..e93849ba8a0960d958c76151d5bdd406e4b795a4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/iter_based_runner.py @@ -0,0 +1,273 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch +from torch.optim import Optimizer + +import annotator.mmpkg.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .hooks import IterTimerHook +from .utils import get_host_info + + +class IterLoader: + + def __init__(self, dataloader): + self._dataloader = dataloader + self.iter_loader = iter(self._dataloader) + self._epoch = 0 + + @property + def epoch(self): + return self._epoch + + def __next__(self): + try: + data = next(self.iter_loader) + except StopIteration: + self._epoch += 1 + if hasattr(self._dataloader.sampler, 'set_epoch'): + self._dataloader.sampler.set_epoch(self._epoch) + time.sleep(2) # Prevent possible deadlock during epoch transition + self.iter_loader = iter(self._dataloader) + data = next(self.iter_loader) + + return data + + def __len__(self): + return len(self._dataloader) + + +@RUNNERS.register_module() +class IterBasedRunner(BaseRunner): + """Iteration-based Runner. + + This runner train models iteration by iteration. + """ + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._epoch = data_loader.epoch + data_batch = next(data_loader) + self.call_hook('before_train_iter') + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.train_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_train_iter') + self._inner_iter += 1 + self._iter += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + data_batch = next(data_loader) + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.val_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_val_iter') + self._inner_iter += 1 + + def run(self, data_loaders, workflow, max_iters=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, iters) to specify the + running order and iterations. E.g, [('train', 10000), + ('val', 1000)] means running 10000 iterations for training and + 1000 iterations for validation, iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_iters is not None: + warnings.warn( + 'setting max_iters in run is deprecated, ' + 'please set max_iters in runner_config', DeprecationWarning) + self._max_iters = max_iters + assert self._max_iters is not None, ( + 'max_iters must be specified during instantiation') + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', + self.get_hook_info()) + self.logger.info('workflow: %s, max: %d iters', workflow, + self._max_iters) + self.call_hook('before_run') + + iter_loaders = [IterLoader(x) for x in data_loaders] + + self.call_hook('before_epoch') + + while self.iter < self._max_iters: + for i, flow in enumerate(workflow): + self._inner_iter = 0 + mode, iters = flow + if not isinstance(mode, str) or not hasattr(self, mode): + raise ValueError( + 'runner has no method named "{}" to run a workflow'. + format(mode)) + iter_runner = getattr(self, mode) + for _ in range(iters): + if mode == 'train' and self.iter >= self._max_iters: + break + iter_runner(iter_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_epoch') + self.call_hook('after_run') + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + """Resume model from checkpoint. + + Args: + checkpoint (str): Checkpoint to resume from. + resume_optimizer (bool, optional): Whether resume the optimizer(s) + if the checkpoint file includes optimizer(s). Default to True. + map_location (str, optional): Same as :func:`torch.load`. + Default to 'default'. + """ + if map_location == 'default': + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + self._inner_iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') + + def save_checkpoint(self, + out_dir, + filename_tmpl='iter_{}.pth', + meta=None, + save_optimizer=True, + create_symlink=True): + """Save checkpoint to file. + + Args: + out_dir (str): Directory to save checkpoint files. + filename_tmpl (str, optional): Checkpoint file template. + Defaults to 'iter_{}.pth'. + meta (dict, optional): Metadata to be saved in checkpoint. + Defaults to None. + save_optimizer (bool, optional): Whether save optimizer. + Defaults to True. + create_symlink (bool, optional): Whether create symlink to the + latest checkpoint file. Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.iter + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + custom_hooks_config=None): + """Register default hooks for iter-based training. + + Checkpoint hook, optimizer stepper hook and logger hooks will be set to + `by_epoch=False` by default. + + Default hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + if checkpoint_config is not None: + checkpoint_config.setdefault('by_epoch', False) + if lr_config is not None: + lr_config.setdefault('by_epoch', False) + if log_config is not None: + for info in log_config['hooks']: + info.setdefault('by_epoch', False) + super(IterBasedRunner, self).register_training_hooks( + lr_config=lr_config, + momentum_config=momentum_config, + optimizer_config=optimizer_config, + checkpoint_config=checkpoint_config, + log_config=log_config, + timer_config=IterTimerHook(), + custom_hooks_config=custom_hooks_config) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/log_buffer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/log_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..d949e2941c5400088c7cd8a1dc893d8b233ae785 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/log_buffer.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict + +import numpy as np + + +class LogBuffer: + + def __init__(self): + self.val_history = OrderedDict() + self.n_history = OrderedDict() + self.output = OrderedDict() + self.ready = False + + def clear(self): + self.val_history.clear() + self.n_history.clear() + self.clear_output() + + def clear_output(self): + self.output.clear() + self.ready = False + + def update(self, vars, count=1): + assert isinstance(vars, dict) + for key, var in vars.items(): + if key not in self.val_history: + self.val_history[key] = [] + self.n_history[key] = [] + self.val_history[key].append(var) + self.n_history[key].append(count) + + def average(self, n=0): + """Average latest n values or all values.""" + assert n >= 0 + for key in self.val_history: + values = np.array(self.val_history[key][-n:]) + nums = np.array(self.n_history[key][-n:]) + avg = np.sum(values * nums) / np.sum(nums) + self.output[key] = avg + self.ready = True diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..53c34d0470992cbc374f29681fdd00dc0e57968d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, + build_optimizer_constructor) +from .default_constructor import DefaultOptimizerConstructor + +__all__ = [ + 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', + 'build_optimizer', 'build_optimizer_constructor' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f9234eed8f1f186d9d8dfda34562157ee39bdb3a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/builder.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import inspect + +import torch + +from ...utils import Registry, build_from_cfg + +OPTIMIZERS = Registry('optimizer') +OPTIMIZER_BUILDERS = Registry('optimizer builder') + + +def register_torch_optimizers(): + torch_optimizers = [] + for module_name in dir(torch.optim): + if module_name.startswith('__'): + continue + _optim = getattr(torch.optim, module_name) + if inspect.isclass(_optim) and issubclass(_optim, + torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + return torch_optimizers + + +TORCH_OPTIMIZERS = register_torch_optimizers() + + +def build_optimizer_constructor(cfg): + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', + 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict( + type=constructor_type, + optimizer_cfg=optimizer_cfg, + paramwise_cfg=paramwise_cfg)) + optimizer = optim_constructor(model) + return optimizer diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/default_constructor.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/default_constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..de2ae39cb6378cc17c098f5324f5d5c321879b91 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/optimizer/default_constructor.py @@ -0,0 +1,249 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +from torch.nn import GroupNorm, LayerNorm + +from annotator.mmpkg.mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of +from annotator.mmpkg.mmcv.utils.ext_loader import check_ops_exist +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS + + +@OPTIMIZER_BUILDERS.register_module() +class DefaultOptimizerConstructor: + """Default constructor for optimizers. + + By default each parameter share the same optimizer settings, and we + provide an argument ``paramwise_cfg`` to specify parameter-wise settings. + It is a dict and may contain the following fields: + + - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If + one of the keys in ``custom_keys`` is a substring of the name of one + parameter, then the setting of the parameter will be specified by + ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will + be ignored. It should be noted that the aforementioned ``key`` is the + longest key that is a substring of the name of the parameter. If there + are multiple matched keys with the same length, then the key with lower + alphabet order will be chosen. + ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` + and ``decay_mult``. See Example 2 below. + - ``bias_lr_mult`` (float): It will be multiplied to the learning + rate for all bias parameters (except for those in normalization + layers and offset layers of DCN). + - ``bias_decay_mult`` (float): It will be multiplied to the weight + decay for all bias parameters (except for those in + normalization layers, depthwise conv layers, offset layers of DCN). + - ``norm_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of normalization + layers. + - ``dwconv_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of depthwise conv + layers. + - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning + rate for parameters of offset layer in the deformable convs + of a model. + - ``bypass_duplicate`` (bool): If true, the duplicate parameters + would not be added into optimizer. Default: False. + + Note: + 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will + override the effect of ``bias_lr_mult`` in the bias of offset + layer. So be careful when using both ``bias_lr_mult`` and + ``dcn_offset_lr_mult``. If you wish to apply both of them to the + offset layer in deformable convs, set ``dcn_offset_lr_mult`` + to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. + 2. If the option ``dcn_offset_lr_mult`` is used, the constructor will + apply it to all the DCN layers in the model. So be careful when + the model contains multiple DCN layers in places other than + backbone. + + Args: + model (:obj:`nn.Module`): The model with parameters to be optimized. + optimizer_cfg (dict): The config dict of the optimizer. + Positional fields are + + - `type`: class name of the optimizer. + + Optional fields are + + - any arguments of the corresponding optimizer type, e.g., + lr, weight_decay, momentum, etc. + paramwise_cfg (dict, optional): Parameter-wise options. + + Example 1: + >>> model = torch.nn.modules.Conv1d(1, 1, 1) + >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, + >>> weight_decay=0.0001) + >>> paramwise_cfg = dict(norm_decay_mult=0.) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + + Example 2: + >>> # assume model have attribute model.backbone and model.cls_head + >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) + >>> paramwise_cfg = dict(custom_keys={ + '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + >>> # Then the `lr` and `weight_decay` for model.backbone is + >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for + >>> # model.cls_head is (0.01, 0.95). + """ + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + if not isinstance(optimizer_cfg, dict): + raise TypeError('optimizer_cfg should be a dict', + f'but got {type(optimizer_cfg)}') + self.optimizer_cfg = optimizer_cfg + self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg + self.base_lr = optimizer_cfg.get('lr', None) + self.base_wd = optimizer_cfg.get('weight_decay', None) + self._validate_cfg() + + def _validate_cfg(self): + if not isinstance(self.paramwise_cfg, dict): + raise TypeError('paramwise_cfg should be None or a dict, ' + f'but got {type(self.paramwise_cfg)}') + + if 'custom_keys' in self.paramwise_cfg: + if not isinstance(self.paramwise_cfg['custom_keys'], dict): + raise TypeError( + 'If specified, custom_keys must be a dict, ' + f'but got {type(self.paramwise_cfg["custom_keys"])}') + if self.base_wd is None: + for key in self.paramwise_cfg['custom_keys']: + if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: + raise ValueError('base_wd should not be None') + + # get base lr and weight decay + # weight_decay must be explicitly specified if mult is specified + if ('bias_decay_mult' in self.paramwise_cfg + or 'norm_decay_mult' in self.paramwise_cfg + or 'dwconv_decay_mult' in self.paramwise_cfg): + if self.base_wd is None: + raise ValueError('base_wd should not be None') + + def _is_in(self, param_group, param_group_list): + assert is_list_of(param_group_list, dict) + param = set(param_group['params']) + param_set = set() + for group in param_group_list: + param_set.update(set(group['params'])) + + return not param.isdisjoint(param_set) + + def add_params(self, params, module, prefix='', is_dcn_module=None): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + prefix (str): The prefix of the module + is_dcn_module (int|float|None): If the current module is a + submodule of DCN, `is_dcn_module` will be passed to + control conv_offset layer's learning rate. Defaults to None. + """ + # get param-wise options + custom_keys = self.paramwise_cfg.get('custom_keys', {}) + # first sort with alphabet order and then sort with reversed len of str + sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) + + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.) + bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.) + + # special rules for norm layers and depth-wise conv layers + is_norm = isinstance(module, + (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = ( + isinstance(module, torch.nn.Conv2d) + and module.in_channels == module.groups) + + for name, param in module.named_parameters(recurse=False): + param_group = {'params': [param]} + if not param.requires_grad: + params.append(param_group) + continue + if bypass_duplicate and self._is_in(param_group, params): + warnings.warn(f'{prefix} is duplicate. It is skipped since ' + f'bypass_duplicate={bypass_duplicate}') + continue + # if the parameter match one of the custom keys, ignore other rules + is_custom = False + for key in sorted_keys: + if key in f'{prefix}.{name}': + is_custom = True + lr_mult = custom_keys[key].get('lr_mult', 1.) + param_group['lr'] = self.base_lr * lr_mult + if self.base_wd is not None: + decay_mult = custom_keys[key].get('decay_mult', 1.) + param_group['weight_decay'] = self.base_wd * decay_mult + break + + if not is_custom: + # bias_lr_mult affects all bias parameters + # except for norm.bias dcn.conv_offset.bias + if name == 'bias' and not (is_norm or is_dcn_module): + param_group['lr'] = self.base_lr * bias_lr_mult + + if (prefix.find('conv_offset') != -1 and is_dcn_module + and isinstance(module, torch.nn.Conv2d)): + # deal with both dcn_offset's bias & weight + param_group['lr'] = self.base_lr * dcn_offset_lr_mult + + # apply weight decay policies + if self.base_wd is not None: + # norm decay + if is_norm: + param_group[ + 'weight_decay'] = self.base_wd * norm_decay_mult + # depth-wise conv + elif is_dwconv: + param_group[ + 'weight_decay'] = self.base_wd * dwconv_decay_mult + # bias lr and decay + elif name == 'bias' and not is_dcn_module: + # TODO: current bias_decay_mult will have affect on DCN + param_group[ + 'weight_decay'] = self.base_wd * bias_decay_mult + params.append(param_group) + + if check_ops_exist(): + from annotator.mmpkg.mmcv.ops import DeformConv2d, ModulatedDeformConv2d + is_dcn_module = isinstance(module, + (DeformConv2d, ModulatedDeformConv2d)) + else: + is_dcn_module = False + for child_name, child_mod in module.named_children(): + child_prefix = f'{prefix}.{child_name}' if prefix else child_name + self.add_params( + params, + child_mod, + prefix=child_prefix, + is_dcn_module=is_dcn_module) + + def __call__(self, model): + if hasattr(model, 'module'): + model = model.module + + optimizer_cfg = self.optimizer_cfg.copy() + # if no paramwise option is specified, just use the global setting + if not self.paramwise_cfg: + optimizer_cfg['params'] = model.parameters() + return build_from_cfg(optimizer_cfg, OPTIMIZERS) + + # set param-wise lr and weight decay recursively + params = [] + self.add_params(params, model) + optimizer_cfg['params'] = params + + return build_from_cfg(optimizer_cfg, OPTIMIZERS) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/priority.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/priority.py new file mode 100644 index 0000000000000000000000000000000000000000..64cc4e3a05f8d5b89ab6eb32461e6e80f1d62e67 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/priority.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + + +class Priority(Enum): + """Hook priority levels. + + +--------------+------------+ + | Level | Value | + +==============+============+ + | HIGHEST | 0 | + +--------------+------------+ + | VERY_HIGH | 10 | + +--------------+------------+ + | HIGH | 30 | + +--------------+------------+ + | ABOVE_NORMAL | 40 | + +--------------+------------+ + | NORMAL | 50 | + +--------------+------------+ + | BELOW_NORMAL | 60 | + +--------------+------------+ + | LOW | 70 | + +--------------+------------+ + | VERY_LOW | 90 | + +--------------+------------+ + | LOWEST | 100 | + +--------------+------------+ + """ + + HIGHEST = 0 + VERY_HIGH = 10 + HIGH = 30 + ABOVE_NORMAL = 40 + NORMAL = 50 + BELOW_NORMAL = 60 + LOW = 70 + VERY_LOW = 90 + LOWEST = 100 + + +def get_priority(priority): + """Get priority value. + + Args: + priority (int or str or :obj:`Priority`): Priority. + + Returns: + int: The priority value. + """ + if isinstance(priority, int): + if priority < 0 or priority > 100: + raise ValueError('priority must be between 0 and 100') + return priority + elif isinstance(priority, Priority): + return priority.value + elif isinstance(priority, str): + return Priority[priority.upper()].value + else: + raise TypeError('priority must be an integer or Priority enum value') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..11bbc523e9a009119531c5eb903a93fe40cc5bca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/runner/utils.py @@ -0,0 +1,93 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import random +import sys +import time +import warnings +from getpass import getuser +from socket import gethostname + +import numpy as np +import torch + +import annotator.mmpkg.mmcv as mmcv + + +def get_host_info(): + """Get hostname and username. + + Return empty string if exception raised, e.g. ``getpass.getuser()`` will + lead to error in docker container + """ + host = '' + try: + host = f'{getuser()}@{gethostname()}' + except Exception as e: + warnings.warn(f'Host or user not found: {str(e)}') + finally: + return host + + +def get_time_str(): + return time.strftime('%Y%m%d_%H%M%S', time.localtime()) + + +def obj_from_dict(info, parent=None, default_args=None): + """Initialize an object from dict. + + The dict must contain the key "type", which indicates the object type, it + can be either a string or type, such as "list" or ``list``. Remaining + fields are treated as the arguments for constructing the object. + + Args: + info (dict): Object types and arguments. + parent (:class:`module`): Module which may containing expected object + classes. + default_args (dict, optional): Default arguments for initializing the + object. + + Returns: + any type: Object built from the dict. + """ + assert isinstance(info, dict) and 'type' in info + assert isinstance(default_args, dict) or default_args is None + args = info.copy() + obj_type = args.pop('type') + if mmcv.is_str(obj_type): + if parent is not None: + obj_type = getattr(parent, obj_type) + else: + obj_type = sys.modules[obj_type] + elif not isinstance(obj_type, type): + raise TypeError('type must be a str or valid type, but ' + f'got {type(obj_type)}') + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + return obj_type(**args) + + +def set_random_seed(seed, deterministic=False, use_rank_shift=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + rank_shift (bool): Whether to add rank number to the random seed to + have different random seed in different threads. Default: False. + """ + if use_rank_shift: + rank, _ = mmcv.runner.get_dist_info() + seed += rank + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..378a0068432a371af364de9d73785901c0f83383 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/__init__.py @@ -0,0 +1,69 @@ +# flake8: noqa +# Copyright (c) OpenMMLab. All rights reserved. +from .config import Config, ConfigDict, DictAction +from .misc import (check_prerequisites, concat_list, deprecated_api_warning, + has_method, import_modules_from_strings, is_list_of, + is_method_overridden, is_seq_of, is_str, is_tuple_of, + iter_cast, list_cast, requires_executable, requires_package, + slice_list, to_1tuple, to_2tuple, to_3tuple, to_4tuple, + to_ntuple, tuple_cast) +from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist, + scandir, symlink) +from .progressbar import (ProgressBar, track_iter_progress, + track_parallel_progress, track_progress) +from .testing import (assert_attrs_equal, assert_dict_contains_subset, + assert_dict_has_keys, assert_is_norm_layer, + assert_keys_equal, assert_params_all_zeros, + check_python_script) +from .timer import Timer, TimerError, check_time +from .version_utils import digit_version, get_git_hash + +try: + import torch +except ImportError: + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast', + 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', + 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', + 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', + 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', + 'track_progress', 'track_iter_progress', 'track_parallel_progress', + 'Timer', 'TimerError', 'check_time', 'deprecated_api_warning', + 'digit_version', 'get_git_hash', 'import_modules_from_strings', + 'assert_dict_contains_subset', 'assert_attrs_equal', + 'assert_dict_has_keys', 'assert_keys_equal', 'check_python_script', + 'to_1tuple', 'to_2tuple', 'to_3tuple', 'to_4tuple', 'to_ntuple', + 'is_method_overridden', 'has_method' + ] +else: + from .env import collect_env + from .logging import get_logger, print_log + from .parrots_jit import jit, skip_no_elena + from .parrots_wrapper import ( + TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, DataLoader, + PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, + _AvgPoolNd, _BatchNorm, _ConvNd, _ConvTransposeMixin, _InstanceNorm, + _MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home) + from .registry import Registry, build_from_cfg + from .trace import is_jit_tracing + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger', + 'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', + 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', + 'check_prerequisites', 'requires_package', 'requires_executable', + 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', + 'symlink', 'scandir', 'ProgressBar', 'track_progress', + 'track_iter_progress', 'track_parallel_progress', 'Registry', + 'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'SyncBatchNorm', + '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', '_AvgPoolNd', '_BatchNorm', + '_ConvNd', '_ConvTransposeMixin', '_InstanceNorm', '_MaxPoolNd', + 'get_build_config', 'BuildExtension', 'CppExtension', 'CUDAExtension', + 'DataLoader', 'PoolDataLoader', 'TORCH_VERSION', + 'deprecated_api_warning', 'digit_version', 'get_git_hash', + 'import_modules_from_strings', 'jit', 'skip_no_elena', + 'assert_dict_contains_subset', 'assert_attrs_equal', + 'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer', + 'assert_params_all_zeros', 'check_python_script', + 'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch', + '_get_cuda_home', 'has_method' + ] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/config.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..e2f7551f95cbf5d8ffa225bba7325632b5e7f01b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/config.py @@ -0,0 +1,688 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == 'Windows': + import regex as re +else: + import re + +BASE_KEY = '_base_' +DELETE_KEY = '_delete_' +DEPRECATION_KEY = '_deprecation_' +RESERVED_KEYS = ['filename', 'text', 'pretty_text'] + + +class ConfigDict(Dict): + + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError(f"'{self.__class__.__name__}' object has no " + f"attribute '{name}'") + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=''): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument('--' + prefix + k) + elif isinstance(v, int): + parser.add_argument('--' + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument('--' + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument('--' + prefix + k, action='store_true') + elif isinstance(v, dict): + add_args(parser, v, prefix + k + '.') + elif isinstance(v, abc.Iterable): + parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') + else: + print(f'cannot parse key {prefix + k} of type {type(v)}') + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError('There are syntax errors in config ' + f'file {filename}: {e}') + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname) + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' + value = value.replace('\\', '/') + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r'\{\{\s*' + BASE_KEY + r'\.([\w\.]+)\s*\}\}' + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f'_{base_var}_{uuid.uuid4().hex.lower()[:6]}' + base_var_dict[randstr] = base_var + regexp = r'\{\{\s*' + BASE_KEY + r'\.' + base_var + r'\s*\}\}' + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, 'w', encoding='utf-8') as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split('.'): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars( + v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple( + Config._substitute_base_vars(c, base_var_dict, base_cfg) + for c in cfg) + elif isinstance(cfg, list): + cfg = [ + Config._substitute_base_vars(c, base_var_dict, base_cfg) + for c in cfg + ] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split('.'): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname) + if platform.system() == 'Windows': + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, + temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars( + temp_config_file.name, temp_config_file.name) + + if filename.endswith('.py'): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith('__') + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith(('.yml', '.yaml', '.json')): + import annotator.mmpkg.mmcv as mmcv + cfg_dict = mmcv.load(temp_config_file.name) + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = f'The config file {filename} will be deprecated ' \ + 'in the future.' + if 'expected' in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' \ + 'instead.' + if 'reference' in deprecation_info: + warning_msg += ' More information can be found at ' \ + f'{deprecation_info["reference"]}' + warnings.warn(warning_msg) + + cfg_text = filename + '\n' + with open(filename, 'r', encoding='utf-8') as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = base_filename if isinstance( + base_filename, list) else [base_filename] + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError('Duplicate key is not allowed among bases. ' + f'Duplicate keys: {duplicate_keys}') + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars(cfg_dict, base_var_dict, + base_cfg_dict) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = '\n'.join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f'Index {k} exceeds the length of list {b}') + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, + dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f'{k}={v} in child config cannot inherit from base ' + f'because {k} is a dict in the child config but is of ' + f'type {type(b[k])} in base config. You may set ' + f'`{DELETE_KEY}=True` to ignore the base config') + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, + use_predefined_variables=True, + import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, + use_predefined_variables) + if import_custom_modules and cfg_dict.get('custom_imports', None): + import_modules_from_strings(**cfg_dict['custom_imports']) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + if file_format != '.py' and 'dict(' in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn( + 'Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile( + 'w', encoding='utf-8', suffix=file_format, + delete=False) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument('config', help='config file path') + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument('config', help='config file path') + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError('cfg_dict must be a dict, but ' + f'got {type(cfg_dict)}') + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f'{key} is reserved for config file') + + super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) + super(Config, self).__setattr__('_filename', filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, 'r') as f: + text = f.read() + else: + text = '' + super(Config, self).__setattr__('_text', text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split('\n') + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * ' ') + line for line in s] + s = '\n'.join(s) + s = first + '\n' + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = '[\n' + v_str += '\n'.join( + f'dict({_indent(_format_dict(v_), indent)}),' + for v_ in v).rstrip(',') + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + ']' + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= \ + (not str(key_name).isidentifier()) + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = '' + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += '{' + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = '' if outest_level or is_last else ',' + if isinstance(v, dict): + v_str = '\n' + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: dict({v_str}' + else: + attr_str = f'{str(k)}=dict({v_str}' + attr_str = _indent(attr_str, indent) + ')' + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += '\n'.join(s) + if use_mapping: + r += '}' + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style='pep8', + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__('_cfg_dict', _cfg_dict) + super(Config, self).__setattr__('_filename', _filename) + super(Config, self).__setattr__('_text', _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() + if self.filename.endswith('.py'): + if file is None: + return self.pretty_text + else: + with open(file, 'w', encoding='utf-8') as f: + f.write(self.pretty_text) + else: + import annotator.mmpkg.mmcv as mmcv + if file is None: + file_format = self.filename.split('.')[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'model.backbone.depth': 50, + ... 'model.backbone.with_cp':True} + >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... model=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split('.') + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + super(Config, self).__setattr__( + '_cfg_dict', + Config._merge_a_into_b( + option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys)) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return True if val.lower() == 'true' else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count('(') == string.count(')')) and ( + string.count('[') == string.count(']')), \ + f'Imbalanced brackets exist in {string}' + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ((char == ',') and (pre.count('(') == pre.count(')')) + and (pre.count('[') == pre.count(']'))): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip('\'\"').replace(' ', '') + is_tuple = False + if val.startswith('(') and val.endswith(')'): + is_tuple = True + val = val[1:-1] + elif val.startswith('[') and val.endswith(']'): + val = val[1:-1] + elif ',' not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1:] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/env.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/env.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c6e64a63f8a3ed813b749c134823a0ef69964c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/env.py @@ -0,0 +1,95 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""This file holding some environment constant for sharing by other files.""" + +import os.path as osp +import subprocess +import sys +from collections import defaultdict + +import cv2 +import torch + +import annotator.mmpkg.mmcv as mmcv +from .parrots_wrapper import get_build_config + + +def collect_env(): + """Collect the information of the running environments. + + Returns: + dict: The environment information. The following fields are contained. + + - sys.platform: The variable of ``sys.platform``. + - Python: Python version. + - CUDA available: Bool, indicating if CUDA is available. + - GPU devices: Device type of each GPU. + - CUDA_HOME (optional): The env var ``CUDA_HOME``. + - NVCC (optional): NVCC version. + - GCC: GCC version, "n/a" if GCC is not installed. + - PyTorch: PyTorch version. + - PyTorch compiling details: The output of \ + ``torch.__config__.show()``. + - TorchVision (optional): TorchVision version. + - OpenCV: OpenCV version. + - MMCV: MMCV version. + - MMCV Compiler: The GCC version for compiling MMCV ops. + - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. + """ + env_info = {} + env_info['sys.platform'] = sys.platform + env_info['Python'] = sys.version.replace('\n', '') + + cuda_available = torch.cuda.is_available() + env_info['CUDA available'] = cuda_available + + if cuda_available: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + devices[torch.cuda.get_device_name(k)].append(str(k)) + for name, device_ids in devices.items(): + env_info['GPU ' + ','.join(device_ids)] = name + + from annotator.mmpkg.mmcv.utils.parrots_wrapper import _get_cuda_home + CUDA_HOME = _get_cuda_home() + env_info['CUDA_HOME'] = CUDA_HOME + + if CUDA_HOME is not None and osp.isdir(CUDA_HOME): + try: + nvcc = osp.join(CUDA_HOME, 'bin/nvcc') + nvcc = subprocess.check_output( + f'"{nvcc}" -V | tail -n1', shell=True) + nvcc = nvcc.decode('utf-8').strip() + except subprocess.SubprocessError: + nvcc = 'Not Available' + env_info['NVCC'] = nvcc + + try: + gcc = subprocess.check_output('gcc --version | head -n1', shell=True) + gcc = gcc.decode('utf-8').strip() + env_info['GCC'] = gcc + except subprocess.CalledProcessError: # gcc is unavailable + env_info['GCC'] = 'n/a' + + env_info['PyTorch'] = torch.__version__ + env_info['PyTorch compiling details'] = get_build_config() + + try: + import torchvision + env_info['TorchVision'] = torchvision.__version__ + except ModuleNotFoundError: + pass + + env_info['OpenCV'] = cv2.__version__ + + env_info['MMCV'] = mmcv.__version__ + + try: + from annotator.mmpkg.mmcv.ops import get_compiler_version, get_compiling_cuda_version + except ModuleNotFoundError: + env_info['MMCV Compiler'] = 'n/a' + env_info['MMCV CUDA Compiler'] = 'n/a' + else: + env_info['MMCV Compiler'] = get_compiler_version() + env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() + + return env_info diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/ext_loader.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/ext_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..08132d2c1b9a1c28880e4bab4d4fa1ba39d9d083 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/ext_loader.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib +import os +import pkgutil +import warnings +from collections import namedtuple + +import torch + +if torch.__version__ != 'parrots': + + def load_ext(name, funcs): + ext = importlib.import_module('mmcv.' + name) + for fun in funcs: + assert hasattr(ext, fun), f'{fun} miss in module {name}' + return ext +else: + from parrots import extension + from parrots.base import ParrotsException + + has_return_value_ops = [ + 'nms', + 'softnms', + 'nms_match', + 'nms_rotated', + 'top_pool_forward', + 'top_pool_backward', + 'bottom_pool_forward', + 'bottom_pool_backward', + 'left_pool_forward', + 'left_pool_backward', + 'right_pool_forward', + 'right_pool_backward', + 'fused_bias_leakyrelu', + 'upfirdn2d', + 'ms_deform_attn_forward', + 'pixel_group', + 'contour_expand', + ] + + def get_fake_func(name, e): + + def fake_func(*args, **kwargs): + warnings.warn(f'{name} is not supported in parrots now') + raise e + + return fake_func + + def load_ext(name, funcs): + ExtModule = namedtuple('ExtModule', funcs) + ext_list = [] + lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + for fun in funcs: + try: + ext_fun = extension.load(fun, name, lib_dir=lib_root) + except ParrotsException as e: + if 'No element registered' not in e.message: + warnings.warn(e.message) + ext_fun = get_fake_func(fun, e) + ext_list.append(ext_fun) + else: + if fun in has_return_value_ops: + ext_list.append(ext_fun.op) + else: + ext_list.append(ext_fun.op_) + return ExtModule(*ext_list) + + +def check_ops_exist(): + ext_loader = pkgutil.find_loader('mmcv._ext') + return ext_loader is not None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/logging.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa0e04bb9b3ab2a4bfbc4def50404ccbac2c6e6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/logging.py @@ -0,0 +1,110 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.distributed as dist + +logger_initialized = {} + + +def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): + """Initialize and get a logger by name. + + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + file_mode (str): The file mode used in opening log file. + Defaults to 'w'. + + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + # handle duplicate logs to the console + # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) + # to the root logger. As logger.propagate is True by default, this root + # level handler causes logging messages from rank>0 processes to + # unexpectedly show up on the console, creating much unwanted clutter. + # To fix this issue, we set the root logger's StreamHandler, if any, to log + # at the ERROR level. + for handler in logger.root.handlers: + if type(handler) is logging.StreamHandler: + handler.setLevel(logging.ERROR) + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + # only rank 0 will add a FileHandler + if rank == 0 and log_file is not None: + # Here, the default behaviour of the official logger is 'a'. Thus, we + # provide an interface to change the file mode to the default + # behaviour. + file_handler = logging.FileHandler(log_file, file_mode) + handlers.append(file_handler) + + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + if rank == 0: + logger.setLevel(log_level) + else: + logger.setLevel(logging.ERROR) + + logger_initialized[name] = True + + return logger + + +def print_log(msg, logger=None, level=logging.INFO): + """Print a log message. + + Args: + msg (str): The message to be logged. + logger (logging.Logger | str | None): The logger to be used. + Some special loggers are: + - "silent": no message will be printed. + - other str: the logger obtained with `get_root_logger(logger)`. + - None: The `print()` method will be used to print log messages. + level (int): Logging level. Only available when `logger` is a Logger + object or "root". + """ + if logger is None: + print(msg) + elif isinstance(logger, logging.Logger): + logger.log(level, msg) + elif logger == 'silent': + pass + elif isinstance(logger, str): + _logger = get_logger(logger) + _logger.log(level, msg) + else: + raise TypeError( + 'logger should be either a logging.Logger object, str, ' + f'"silent" or None, but got {type(logger)}') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/misc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..2c58d0d7fee9fe3d4519270ad8c1e998d0d8a18c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/misc.py @@ -0,0 +1,377 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections.abc +import functools +import itertools +import subprocess +import warnings +from collections import abc +from importlib import import_module +from inspect import getfullargspec +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError( + f'custom_imports must be a list but got type {type(imports)}') + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError( + f'{imp} is of type {type(imp)} and cannot be imported.') + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f'{imp} failed to import and is ignored.', + UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +def iter_cast(inputs, dst_type, return_type=None): + """Cast elements of an iterable object into some type. + + Args: + inputs (Iterable): The input object. + dst_type (type): Destination type. + return_type (type, optional): If specified, the output object will be + converted to this type, otherwise an iterator. + + Returns: + iterator or specified type: The converted object. + """ + if not isinstance(inputs, abc.Iterable): + raise TypeError('inputs must be an iterable object') + if not isinstance(dst_type, type): + raise TypeError('"dst_type" must be a valid type') + + out_iterable = map(dst_type, inputs) + + if return_type is None: + return out_iterable + else: + return return_type(out_iterable) + + +def list_cast(inputs, dst_type): + """Cast elements of an iterable object into a list of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=list) + + +def tuple_cast(inputs, dst_type): + """Cast elements of an iterable object into a tuple of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=tuple) + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_list_of(seq, expected_type): + """Check whether it is a list of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=list) + + +def is_tuple_of(seq, expected_type): + """Check whether it is a tuple of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=tuple) + + +def slice_list(in_list, lens): + """Slice a list into several sub lists by a list of given length. + + Args: + in_list (list): The list to be sliced. + lens(int or list): The expected length of each out list. + + Returns: + list: A list of sliced list. + """ + if isinstance(lens, int): + assert len(in_list) % lens == 0 + lens = [lens] * int(len(in_list) / lens) + if not isinstance(lens, list): + raise TypeError('"indices" must be an integer or a list of integers') + elif sum(lens) != len(in_list): + raise ValueError('sum of lens and list length does not ' + f'match: {sum(lens)} != {len(in_list)}') + out_list = [] + idx = 0 + for i in range(len(lens)): + out_list.append(in_list[idx:idx + lens[i]]) + idx += lens[i] + return out_list + + +def concat_list(in_list): + """Concatenate a list of list into a single list. + + Args: + in_list (list): The list of list to be merged. + + Returns: + list: The concatenated flat list. + """ + return list(itertools.chain(*in_list)) + + +def check_prerequisites( + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' + 'found, please install them first.'): # yapf: disable + """A decorator factory to check if prerequisites are satisfied. + + Args: + prerequisites (str of list[str]): Prerequisites to be checked. + checker (callable): The checker method that returns True if a + prerequisite is meet, False otherwise. + msg_tmpl (str): The message template with two variables. + + Returns: + decorator: A specific decorator. + """ + + def wrap(func): + + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + requirements = [prerequisites] if isinstance( + prerequisites, str) else prerequisites + missing = [] + for item in requirements: + if not checker(item): + missing.append(item) + if missing: + print(msg_tmpl.format(', '.join(missing), func.__name__)) + raise RuntimeError('Prerequisites not meet.') + else: + return func(*args, **kwargs) + + return wrapped_func + + return wrap + + +def _check_py_package(package): + try: + import_module(package) + except ImportError: + return False + else: + return True + + +def _check_executable(cmd): + if subprocess.call(f'which {cmd}', shell=True) != 0: + return False + else: + return True + + +def requires_package(prerequisites): + """A decorator to check if some python packages are installed. + + Example: + >>> @requires_package('numpy') + >>> func(arg1, args): + >>> return numpy.zeros(1) + array([0.]) + >>> @requires_package(['numpy', 'non_package']) + >>> func(arg1, args): + >>> return numpy.zeros(1) + ImportError + """ + return check_prerequisites(prerequisites, checker=_check_py_package) + + +def requires_executable(prerequisites): + """A decorator to check if some executable files are installed. + + Example: + >>> @requires_executable('ffmpeg') + >>> func(arg1, args): + >>> print(1) + 1 + """ + return check_prerequisites(prerequisites, checker=_check_executable) + + +def deprecated_api_warning(name_dict, cls_name=None): + """A decorator to check if some arguments are deprecate and try to replace + deprecate src_arg_name to dst_arg_name. + + Args: + name_dict(dict): + key (str): Deprecate argument names. + val (str): Expected argument names. + + Returns: + func: New function. + """ + + def api_warning_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get name of the function + func_name = old_func.__name__ + if cls_name is not None: + func_name = f'{cls_name}.{func_name}' + if args: + arg_names = args_info.args[:len(args)] + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in arg_names: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + arg_names[arg_names.index(src_arg_name)] = dst_arg_name + if kwargs: + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in kwargs: + + assert dst_arg_name not in kwargs, ( + f'The expected behavior is to replace ' + f'the deprecated key `{src_arg_name}` to ' + f'new key `{dst_arg_name}`, but got them ' + f'in the arguments at the same time, which ' + f'is confusing. `{src_arg_name} will be ' + f'deprecated in the future, please ' + f'use `{dst_arg_name}` instead.') + + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + kwargs[dst_arg_name] = kwargs.pop(src_arg_name) + + # apply converted arguments to the decorated method + output = old_func(*args, **kwargs) + return output + + return new_func + + return api_warning_wrapper + + +def is_method_overridden(method, base_class, derived_class): + """Check if a method of base class is overridden in derived class. + + Args: + method (str): the method name to check. + base_class (type): the class of the base class. + derived_class (type | Any): the class or instance of the derived class. + """ + assert isinstance(base_class, type), \ + "base_class doesn't accept instance, Please pass class instead." + + if not isinstance(derived_class, type): + derived_class = derived_class.__class__ + + base_method = getattr(base_class, method) + derived_method = getattr(derived_class, method) + return derived_method != base_method + + +def has_method(obj: object, method: str) -> bool: + """Check whether the object has a method. + + Args: + method (str): The method name to check. + obj (object): The object to check. + + Returns: + bool: True if the object has the method else False. + """ + return hasattr(obj, method) and callable(getattr(obj, method)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_jit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_jit.py new file mode 100644 index 0000000000000000000000000000000000000000..61873f6dbb9b10ed972c90aa8faa321e3cb3249e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_jit.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os + +from .parrots_wrapper import TORCH_VERSION + +parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') + +if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': + from parrots.jit import pat as jit +else: + + def jit(func=None, + check_input=None, + full_shape=True, + derivate=False, + coderize=False, + optimize=False): + + def wrapper(func): + + def wrapper_inner(*args, **kargs): + return func(*args, **kargs) + + return wrapper_inner + + if func is None: + return wrapper + else: + return func + + +if TORCH_VERSION == 'parrots': + from parrots.utils.tester import skip_no_elena +else: + + def skip_no_elena(func): + + def wrapper(*args, **kargs): + return func(*args, **kargs) + + return wrapper diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_wrapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..93c97640d4b9ed088ca82cfe03e6efebfcfa9dbf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/parrots_wrapper.py @@ -0,0 +1,107 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from functools import partial + +import torch + +TORCH_VERSION = torch.__version__ + + +def is_rocm_pytorch() -> bool: + is_rocm = False + if TORCH_VERSION != 'parrots': + try: + from torch.utils.cpp_extension import ROCM_HOME + is_rocm = True if ((torch.version.hip is not None) and + (ROCM_HOME is not None)) else False + except ImportError: + pass + return is_rocm + + +def _get_cuda_home(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import CUDA_HOME + else: + if is_rocm_pytorch(): + from torch.utils.cpp_extension import ROCM_HOME + CUDA_HOME = ROCM_HOME + else: + from torch.utils.cpp_extension import CUDA_HOME + return CUDA_HOME + + +def get_build_config(): + if TORCH_VERSION == 'parrots': + from parrots.config import get_build_info + return get_build_info() + else: + return torch.__config__.show() + + +def _get_conv(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin + else: + from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin + return _ConvNd, _ConvTransposeMixin + + +def _get_dataloader(): + if TORCH_VERSION == 'parrots': + from torch.utils.data import DataLoader, PoolDataLoader + else: + from torch.utils.data import DataLoader + PoolDataLoader = DataLoader + return DataLoader, PoolDataLoader + + +def _get_extension(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import BuildExtension, Extension + CppExtension = partial(Extension, cuda=False) + CUDAExtension = partial(Extension, cuda=True) + else: + from torch.utils.cpp_extension import (BuildExtension, CppExtension, + CUDAExtension) + return BuildExtension, CppExtension, CUDAExtension + + +def _get_pool(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + else: + from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + + +def _get_norm(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm2d + else: + from torch.nn.modules.instancenorm import _InstanceNorm + from torch.nn.modules.batchnorm import _BatchNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm + return _BatchNorm, _InstanceNorm, SyncBatchNorm_ + + +_ConvNd, _ConvTransposeMixin = _get_conv() +DataLoader, PoolDataLoader = _get_dataloader() +BuildExtension, CppExtension, CUDAExtension = _get_extension() +_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() +_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() + + +class SyncBatchNorm(SyncBatchNorm_): + + def _check_input_dim(self, input): + if TORCH_VERSION == 'parrots': + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input (got {input.dim()}D input)') + else: + super()._check_input_dim(input) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/path.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/path.py new file mode 100644 index 0000000000000000000000000000000000000000..7dab4b3041413b1432b0f434b8b14783097d33c6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/path.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError('`filepath` should be a string or a Path') + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == '': + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = suffix.lower() if isinstance(suffix, str) else tuple( + item.lower() for item in suffix) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, + case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=('.git', )): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/progressbar.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/progressbar.py new file mode 100644 index 0000000000000000000000000000000000000000..0062f670dd94fa9da559ab26ef85517dcf5211c7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/progressbar.py @@ -0,0 +1,208 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import sys +from collections.abc import Iterable +from multiprocessing import Pool +from shutil import get_terminal_size + +from .timer import Timer + + +class ProgressBar: + """A progress bar which can print the progress.""" + + def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): + self.task_num = task_num + self.bar_width = bar_width + self.completed = 0 + self.file = file + if start: + self.start() + + @property + def terminal_width(self): + width, _ = get_terminal_size() + return width + + def start(self): + if self.task_num > 0: + self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' + 'elapsed: 0s, ETA:') + else: + self.file.write('completed: 0, elapsed: 0s') + self.file.flush() + self.timer = Timer() + + def update(self, num_tasks=1): + assert num_tasks > 0 + self.completed += num_tasks + elapsed = self.timer.since_start() + if elapsed > 0: + fps = self.completed / elapsed + else: + fps = float('inf') + if self.task_num > 0: + percentage = self.completed / float(self.task_num) + eta = int(elapsed * (1 - percentage) / percentage + 0.5) + msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ + f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ + f'ETA: {eta:5}s' + + bar_width = min(self.bar_width, + int(self.terminal_width - len(msg)) + 2, + int(self.terminal_width * 0.6)) + bar_width = max(2, bar_width) + mark_width = int(bar_width * percentage) + bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) + self.file.write(msg.format(bar_chars)) + else: + self.file.write( + f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' + f' {fps:.1f} tasks/s') + self.file.flush() + + +def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): + """Track the progress of tasks execution with a progress bar. + + Tasks are done with a simple for-loop. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + results = [] + for task in tasks: + results.append(func(task, **kwargs)) + prog_bar.update() + prog_bar.file.write('\n') + return results + + +def init_pool(process_num, initializer=None, initargs=None): + if initializer is None: + return Pool(process_num) + elif initargs is None: + return Pool(process_num, initializer) + else: + if not isinstance(initargs, tuple): + raise TypeError('"initargs" must be a tuple') + return Pool(process_num, initializer, initargs) + + +def track_parallel_progress(func, + tasks, + nproc, + initializer=None, + initargs=None, + bar_width=50, + chunksize=1, + skip_first=False, + keep_order=True, + file=sys.stdout): + """Track the progress of parallel task execution with a progress bar. + + The built-in :mod:`multiprocessing` module is used for process pools and + tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + nproc (int): Process (worker) number. + initializer (None or callable): Refer to :class:`multiprocessing.Pool` + for details. + initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for + details. + chunksize (int): Refer to :class:`multiprocessing.Pool` for details. + bar_width (int): Width of progress bar. + skip_first (bool): Whether to skip the first sample for each worker + when estimating fps, since the initialization step may takes + longer. + keep_order (bool): If True, :func:`Pool.imap` is used, otherwise + :func:`Pool.imap_unordered` is used. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + pool = init_pool(nproc, initializer, initargs) + start = not skip_first + task_num -= nproc * chunksize * int(skip_first) + prog_bar = ProgressBar(task_num, bar_width, start, file=file) + results = [] + if keep_order: + gen = pool.imap(func, tasks, chunksize) + else: + gen = pool.imap_unordered(func, tasks, chunksize) + for result in gen: + results.append(result) + if skip_first: + if len(results) < nproc * chunksize: + continue + elif len(results) == nproc * chunksize: + prog_bar.start() + continue + prog_bar.update() + prog_bar.file.write('\n') + pool.close() + pool.join() + return results + + +def track_iter_progress(tasks, bar_width=50, file=sys.stdout): + """Track the progress of tasks iteration or enumeration with a progress + bar. + + Tasks are yielded with a simple for-loop. + + Args: + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Yields: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + for task in tasks: + yield task + prog_bar.update() + prog_bar.file.write('\n') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/registry.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9df39bc9f3d8d568361e7250ab35468f2b74e0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/registry.py @@ -0,0 +1,315 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from config dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' + f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' + f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError( + f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a str or valid type, but got {type(obj_type)}') + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f'{obj_cls.__name__}: {e}') + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + \ + f'(name={self._name}, ' \ + f'items={self._module_dict})' + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split('.') + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find('.') + if split_index != -1: + return key[:split_index], key[split_index + 1:] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert registry.scope not in self.children, \ + f'scope {registry.scope} exists in {self.name} registry' + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError('module must be a class, ' + f'but got {type(module_class)}') + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f'{name} is already registered ' + f'in {self.name}') + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.') + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + 'name must be either of None, an instance of str or a sequence' + f' of str, but got {type(name)}') + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module( + module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module( + module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/testing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..a27f936da8ec14bac18562ede0a79d476d82f797 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/testing.py @@ -0,0 +1,140 @@ +# Copyright (c) Open-MMLab. +import sys +from collections.abc import Iterable +from runpy import run_path +from shlex import split +from typing import Any, Dict, List +from unittest.mock import patch + + +def check_python_script(cmd): + """Run the python cmd script with `__main__`. The difference between + `os.system` is that, this function exectues code in the current process, so + that it can be tracked by coverage tools. Currently it supports two forms: + + - ./tests/data/scripts/hello.py zz + - python tests/data/scripts/hello.py zz + """ + args = split(cmd) + if args[0] == 'python': + args = args[1:] + with patch.object(sys, 'argv', args): + run_path(args[0], run_name='__main__') + + +def _any(judge_result): + """Since built-in ``any`` works only when the element of iterable is not + iterable, implement the function.""" + if not isinstance(judge_result, Iterable): + return judge_result + + try: + for element in judge_result: + if _any(element): + return True + except TypeError: + # Maybe encounter the case: torch.tensor(True) | torch.tensor(False) + if judge_result: + return True + return False + + +def assert_dict_contains_subset(dict_obj: Dict[Any, Any], + expected_subset: Dict[Any, Any]) -> bool: + """Check if the dict_obj contains the expected_subset. + + Args: + dict_obj (Dict[Any, Any]): Dict object to be checked. + expected_subset (Dict[Any, Any]): Subset expected to be contained in + dict_obj. + + Returns: + bool: Whether the dict_obj contains the expected_subset. + """ + + for key, value in expected_subset.items(): + if key not in dict_obj.keys() or _any(dict_obj[key] != value): + return False + return True + + +def assert_attrs_equal(obj: Any, expected_attrs: Dict[str, Any]) -> bool: + """Check if attribute of class object is correct. + + Args: + obj (object): Class object to be checked. + expected_attrs (Dict[str, Any]): Dict of the expected attrs. + + Returns: + bool: Whether the attribute of class object is correct. + """ + for attr, value in expected_attrs.items(): + if not hasattr(obj, attr) or _any(getattr(obj, attr) != value): + return False + return True + + +def assert_dict_has_keys(obj: Dict[str, Any], + expected_keys: List[str]) -> bool: + """Check if the obj has all the expected_keys. + + Args: + obj (Dict[str, Any]): Object to be checked. + expected_keys (List[str]): Keys expected to contained in the keys of + the obj. + + Returns: + bool: Whether the obj has the expected keys. + """ + return set(expected_keys).issubset(set(obj.keys())) + + +def assert_keys_equal(result_keys: List[str], target_keys: List[str]) -> bool: + """Check if target_keys is equal to result_keys. + + Args: + result_keys (List[str]): Result keys to be checked. + target_keys (List[str]): Target keys to be checked. + + Returns: + bool: Whether target_keys is equal to result_keys. + """ + return set(result_keys) == set(target_keys) + + +def assert_is_norm_layer(module) -> bool: + """Check if the module is a norm layer. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the module is a norm layer. + """ + from .parrots_wrapper import _BatchNorm, _InstanceNorm + from torch.nn import GroupNorm, LayerNorm + norm_layer_candidates = (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm) + return isinstance(module, norm_layer_candidates) + + +def assert_params_all_zeros(module) -> bool: + """Check if the parameters of the module is all zeros. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: Whether the parameters of the module is all zeros. + """ + weight_data = module.weight.data + is_weight_zero = weight_data.allclose( + weight_data.new_zeros(weight_data.size())) + + if hasattr(module, 'bias') and module.bias is not None: + bias_data = module.bias.data + is_bias_zero = bias_data.allclose( + bias_data.new_zeros(bias_data.size())) + else: + is_bias_zero = True + + return is_weight_zero and is_bias_zero diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/timer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..0435c1250ebb63e0d881d7022979a76b2dcc7298 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/timer.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from time import time + + +class TimerError(Exception): + + def __init__(self, message): + self.message = message + super(TimerError, self).__init__(message) + + +class Timer: + """A flexible Timer class. + + :Example: + + >>> import time + >>> import annotator.mmpkg.mmcv as mmcv + >>> with mmcv.Timer(): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + 1.000 + >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + it takes 1.0 seconds + >>> timer = mmcv.Timer() + >>> time.sleep(0.5) + >>> print(timer.since_start()) + 0.500 + >>> time.sleep(0.5) + >>> print(timer.since_last_check()) + 0.500 + >>> print(timer.since_start()) + 1.000 + """ + + def __init__(self, start=True, print_tmpl=None): + self._is_running = False + self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' + if start: + self.start() + + @property + def is_running(self): + """bool: indicate whether the timer is running""" + return self._is_running + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + print(self.print_tmpl.format(self.since_last_check())) + self._is_running = False + + def start(self): + """Start the timer.""" + if not self._is_running: + self._t_start = time() + self._is_running = True + self._t_last = time() + + def since_start(self): + """Total time since the timer is started. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + self._t_last = time() + return self._t_last - self._t_start + + def since_last_check(self): + """Time since the last checking. + + Either :func:`since_start` or :func:`since_last_check` is a checking + operation. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + dur = time() - self._t_last + self._t_last = time() + return dur + + +_g_timers = {} # global timers + + +def check_time(timer_id): + """Add check points in a single line. + + This method is suitable for running a task on a list of items. A timer will + be registered when the method is called for the first time. + + :Example: + + >>> import time + >>> import annotator.mmpkg.mmcv as mmcv + >>> for i in range(1, 6): + >>> # simulate a code block + >>> time.sleep(i) + >>> mmcv.check_time('task1') + 2.000 + 3.000 + 4.000 + 5.000 + + Args: + timer_id (str): Timer identifier. + """ + if timer_id not in _g_timers: + _g_timers[timer_id] = Timer() + return 0 + else: + return _g_timers[timer_id].since_last_check() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/trace.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..51f6e3cab4ac7bbdf561583d7463a5f2897960e7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/trace.py @@ -0,0 +1,23 @@ +import warnings + +import torch + +from annotator.mmpkg.mmcv.utils import digit_version + + +def is_jit_tracing() -> bool: + if (torch.__version__ != 'parrots' + and digit_version(torch.__version__) >= digit_version('1.6.0')): + on_trace = torch.jit.is_tracing() + # In PyTorch 1.6, torch.jit.is_tracing has a bug. + # Refers to https://github.com/pytorch/pytorch/issues/42448 + if isinstance(on_trace, bool): + return on_trace + else: + return torch._C._is_tracing() + else: + warnings.warn( + 'torch.jit.is_tracing is only supported after v1.6.0. ' + 'Therefore is_tracing returns False automatically. Please ' + 'set on_trace manually if you are using trace.', UserWarning) + return False diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/version_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/version_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..963c45a2e8a86a88413ab6c18c22481fb9831985 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/utils/version_utils.py @@ -0,0 +1,90 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import subprocess +import warnings + +from packaging.version import parse + + +def digit_version(version_str: str, length: int = 4): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. For pre-release + versions: alpha < beta < rc. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int]: The version info in digits (integers). + """ + assert 'parrots' not in version_str + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + mapping = {'a': -3, 'b': -2, 'rc': -1} + val = -4 + # version.pre can be None + if version.pre: + if version.pre[0] not in mapping: + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' + 'version checking may go wrong') + else: + val = mapping[version.pre[0]] + release.extend([val, version.pre[-1]]) + else: + release.extend([val, 0]) + + elif version.is_postrelease: + release.extend([1, version.post]) + else: + release.extend([0, 0]) + return tuple(release) + + +def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + +def get_git_hash(fallback='unknown', digits=None): + """Get the git hash of the current repo. + + Args: + fallback (str, optional): The fallback string when git hash is + unavailable. Defaults to 'unknown'. + digits (int, optional): kept digits of the hash. Defaults to None, + meaning all digits are kept. + + Returns: + str: Git commit hash. + """ + + if digits is not None and not isinstance(digits, int): + raise TypeError('digits must be None or an integer') + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + if digits is not None: + sha = sha[:digits] + except OSError: + sha = fallback + + return sha diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/version.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/version.py new file mode 100644 index 0000000000000000000000000000000000000000..1cce4e50bd692d4002e3cac3c545a3fb2efe95d0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/version.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +__version__ = '1.3.17' + + +def parse_version_info(version_str: str, length: int = 4) -> tuple: + """Parse a version string into a tuple. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int | str]: The version info, e.g., "1.3.0" is parsed into + (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into + (2, 0, 0, 0, 'rc', 1) (when length is set to 4). + """ + from packaging.version import parse + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + release.extend(list(version.pre)) + elif version.is_postrelease: + release.extend(list(version.post)) + else: + release.extend([0, 0]) + return tuple(release) + + +version_info = tuple(int(x) for x in __version__.split('.')[:3]) + +__all__ = ['__version__', 'version_info', 'parse_version_info'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..73199b01dec52820dc6ca0139903536344d5a1eb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .io import Cache, VideoReader, frames2video +from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread, + flowwrite, quantize_flow, sparse_flow_from_bytes) +from .processing import concat_video, convert_video, cut_video, resize_video + +__all__ = [ + 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', + 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', + 'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/io.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/io.py new file mode 100644 index 0000000000000000000000000000000000000000..06ae9b8ae4404ec7822fd49c01c183a0be0cbf35 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/io.py @@ -0,0 +1,318 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict + +import cv2 +from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT, + CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH, + CAP_PROP_POS_FRAMES, VideoWriter_fourcc) + +from annotator.mmpkg.mmcv.utils import (check_file_exist, mkdir_or_exist, scandir, + track_progress) + + +class Cache: + + def __init__(self, capacity): + self._cache = OrderedDict() + self._capacity = int(capacity) + if capacity <= 0: + raise ValueError('capacity must be a positive integer') + + @property + def capacity(self): + return self._capacity + + @property + def size(self): + return len(self._cache) + + def put(self, key, val): + if key in self._cache: + return + if len(self._cache) >= self.capacity: + self._cache.popitem(last=False) + self._cache[key] = val + + def get(self, key, default=None): + val = self._cache[key] if key in self._cache else default + return val + + +class VideoReader: + """Video class with similar usage to a list object. + + This video warpper class provides convenient apis to access frames. + There exists an issue of OpenCV's VideoCapture class that jumping to a + certain frame may be inaccurate. It is fixed in this class by checking + the position after jumping each time. + Cache is used when decoding videos. So if the same frame is visited for + the second time, there is no need to decode again if it is stored in the + cache. + + :Example: + + >>> import annotator.mmpkg.mmcv as mmcv + >>> v = mmcv.VideoReader('sample.mp4') + >>> len(v) # get the total frame number with `len()` + 120 + >>> for img in v: # v is iterable + >>> mmcv.imshow(img) + >>> v[5] # get the 6th frame + """ + + def __init__(self, filename, cache_capacity=10): + # Check whether the video path is a url + if not filename.startswith(('https://', 'http://')): + check_file_exist(filename, 'Video file not found: ' + filename) + self._vcap = cv2.VideoCapture(filename) + assert cache_capacity > 0 + self._cache = Cache(cache_capacity) + self._position = 0 + # get basic info + self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) + self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) + self._fps = self._vcap.get(CAP_PROP_FPS) + self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) + self._fourcc = self._vcap.get(CAP_PROP_FOURCC) + + @property + def vcap(self): + """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" + return self._vcap + + @property + def opened(self): + """bool: Indicate whether the video is opened.""" + return self._vcap.isOpened() + + @property + def width(self): + """int: Width of video frames.""" + return self._width + + @property + def height(self): + """int: Height of video frames.""" + return self._height + + @property + def resolution(self): + """tuple: Video resolution (width, height).""" + return (self._width, self._height) + + @property + def fps(self): + """float: FPS of the video.""" + return self._fps + + @property + def frame_cnt(self): + """int: Total frames of the video.""" + return self._frame_cnt + + @property + def fourcc(self): + """str: "Four character code" of the video.""" + return self._fourcc + + @property + def position(self): + """int: Current cursor position, indicating frame decoded.""" + return self._position + + def _get_real_position(self): + return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) + + def _set_real_position(self, frame_id): + self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) + pos = self._get_real_position() + for _ in range(frame_id - pos): + self._vcap.read() + self._position = frame_id + + def read(self): + """Read the next frame. + + If the next frame have been decoded before and in the cache, then + return it directly, otherwise decode, cache and return it. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + # pos = self._position + if self._cache: + img = self._cache.get(self._position) + if img is not None: + ret = True + else: + if self._position != self._get_real_position(): + self._set_real_position(self._position) + ret, img = self._vcap.read() + if ret: + self._cache.put(self._position, img) + else: + ret, img = self._vcap.read() + if ret: + self._position += 1 + return img + + def get_frame(self, frame_id): + """Get frame by index. + + Args: + frame_id (int): Index of the expected frame, 0-based. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + if frame_id < 0 or frame_id >= self._frame_cnt: + raise IndexError( + f'"frame_id" must be between 0 and {self._frame_cnt - 1}') + if frame_id == self._position: + return self.read() + if self._cache: + img = self._cache.get(frame_id) + if img is not None: + self._position = frame_id + 1 + return img + self._set_real_position(frame_id) + ret, img = self._vcap.read() + if ret: + if self._cache: + self._cache.put(self._position, img) + self._position += 1 + return img + + def current_frame(self): + """Get the current frame (frame that is just visited). + + Returns: + ndarray or None: If the video is fresh, return None, otherwise + return the frame. + """ + if self._position == 0: + return None + return self._cache.get(self._position - 1) + + def cvt2frames(self, + frame_dir, + file_start=0, + filename_tmpl='{:06d}.jpg', + start=0, + max_num=0, + show_progress=True): + """Convert a video to frame images. + + Args: + frame_dir (str): Output directory to store all the frame images. + file_start (int): Filenames will start from the specified number. + filename_tmpl (str): Filename template with the index as the + placeholder. + start (int): The starting frame index. + max_num (int): Maximum number of frames to be written. + show_progress (bool): Whether to show a progress bar. + """ + mkdir_or_exist(frame_dir) + if max_num == 0: + task_num = self.frame_cnt - start + else: + task_num = min(self.frame_cnt - start, max_num) + if task_num <= 0: + raise ValueError('start must be less than total frame number') + if start > 0: + self._set_real_position(start) + + def write_frame(file_idx): + img = self.read() + if img is None: + return + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + cv2.imwrite(filename, img) + + if show_progress: + track_progress(write_frame, range(file_start, + file_start + task_num)) + else: + for i in range(task_num): + write_frame(file_start + i) + + def __len__(self): + return self.frame_cnt + + def __getitem__(self, index): + if isinstance(index, slice): + return [ + self.get_frame(i) + for i in range(*index.indices(self.frame_cnt)) + ] + # support negative indexing + if index < 0: + index += self.frame_cnt + if index < 0: + raise IndexError('index out of range') + return self.get_frame(index) + + def __iter__(self): + self._set_real_position(0) + return self + + def __next__(self): + img = self.read() + if img is not None: + return img + else: + raise StopIteration + + next = __next__ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._vcap.release() + + +def frames2video(frame_dir, + video_file, + fps=30, + fourcc='XVID', + filename_tmpl='{:06d}.jpg', + start=0, + end=0, + show_progress=True): + """Read the frame images from a directory and join them as a video. + + Args: + frame_dir (str): The directory containing video frames. + video_file (str): Output filename. + fps (float): FPS of the output video. + fourcc (str): Fourcc of the output video, this should be compatible + with the output file type. + filename_tmpl (str): Filename template with the index as the variable. + start (int): Starting frame index. + end (int): Ending frame index. + show_progress (bool): Whether to show a progress bar. + """ + if end == 0: + ext = filename_tmpl.split('.')[-1] + end = len([name for name in scandir(frame_dir, ext)]) + first_file = osp.join(frame_dir, filename_tmpl.format(start)) + check_file_exist(first_file, 'The start frame not found: ' + first_file) + img = cv2.imread(first_file) + height, width = img.shape[:2] + resolution = (width, height) + vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, + resolution) + + def write_frame(file_idx): + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + img = cv2.imread(filename) + vwriter.write(img) + + if show_progress: + track_progress(write_frame, range(start, end)) + else: + for i in range(start, end): + write_frame(i) + vwriter.release() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/optflow.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/optflow.py new file mode 100644 index 0000000000000000000000000000000000000000..7bd78970dce8faf30bce0d5f2ec278b994fdd623 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/optflow.py @@ -0,0 +1,254 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import cv2 +import numpy as np + +from annotator.mmpkg.mmcv.arraymisc import dequantize, quantize +from annotator.mmpkg.mmcv.image import imread, imwrite +from annotator.mmpkg.mmcv.utils import is_str + + +def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): + """Read an optical flow map. + + Args: + flow_or_path (ndarray or str): A flow map or filepath. + quantize (bool): whether to read quantized pair, if set to True, + remaining args will be passed to :func:`dequantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + + Returns: + ndarray: Optical flow represented as a (h, w, 2) numpy array + """ + if isinstance(flow_or_path, np.ndarray): + if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): + raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') + return flow_or_path + elif not is_str(flow_or_path): + raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' + f'not {type(flow_or_path)}') + + if not quantize: + with open(flow_or_path, 'rb') as f: + try: + header = f.read(4).decode('utf-8') + except Exception: + raise IOError(f'Invalid flow file: {flow_or_path}') + else: + if header != 'PIEH': + raise IOError(f'Invalid flow file: {flow_or_path}, ' + 'header does not contain PIEH') + + w = np.fromfile(f, np.int32, 1).squeeze() + h = np.fromfile(f, np.int32, 1).squeeze() + flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) + else: + assert concat_axis in [0, 1] + cat_flow = imread(flow_or_path, flag='unchanged') + if cat_flow.ndim != 2: + raise IOError( + f'{flow_or_path} is not a valid quantized flow file, ' + f'its dimension is {cat_flow.ndim}.') + assert cat_flow.shape[concat_axis] % 2 == 0 + dx, dy = np.split(cat_flow, 2, axis=concat_axis) + flow = dequantize_flow(dx, dy, *args, **kwargs) + + return flow.astype(np.float32) + + +def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): + """Write optical flow to file. + + If the flow is not quantized, it will be saved as a .flo file losslessly, + otherwise a jpeg image which is lossy but of much smaller size. (dx and dy + will be concatenated horizontally into a single image if quantize is True.) + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + filename (str): Output filepath. + quantize (bool): Whether to quantize the flow and save it to 2 jpeg + images. If set to True, remaining args will be passed to + :func:`quantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + """ + if not quantize: + with open(filename, 'wb') as f: + f.write('PIEH'.encode('utf-8')) + np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) + flow = flow.astype(np.float32) + flow.tofile(f) + f.flush() + else: + assert concat_axis in [0, 1] + dx, dy = quantize_flow(flow, *args, **kwargs) + dxdy = np.concatenate((dx, dy), axis=concat_axis) + imwrite(dxdy, filename) + + +def quantize_flow(flow, max_val=0.02, norm=True): + """Quantize flow to [0, 255]. + + After this step, the size of flow will be much smaller, and can be + dumped as jpeg images. + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + max_val (float): Maximum value of flow, values beyond + [-max_val, max_val] will be truncated. + norm (bool): Whether to divide flow values by image width/height. + + Returns: + tuple[ndarray]: Quantized dx and dy. + """ + h, w, _ = flow.shape + dx = flow[..., 0] + dy = flow[..., 1] + if norm: + dx = dx / w # avoid inplace operations + dy = dy / h + # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. + flow_comps = [ + quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy] + ] + return tuple(flow_comps) + + +def dequantize_flow(dx, dy, max_val=0.02, denorm=True): + """Recover from quantized flow. + + Args: + dx (ndarray): Quantized dx. + dy (ndarray): Quantized dy. + max_val (float): Maximum value used when quantizing. + denorm (bool): Whether to multiply flow values with width/height. + + Returns: + ndarray: Dequantized flow. + """ + assert dx.shape == dy.shape + assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) + + dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] + + if denorm: + dx *= dx.shape[1] + dy *= dx.shape[0] + flow = np.dstack((dx, dy)) + return flow + + +def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): + """Use flow to warp img. + + Args: + img (ndarray, float or uint8): Image to be warped. + flow (ndarray, float): Optical Flow. + filling_value (int): The missing pixels will be set with filling_value. + interpolate_mode (str): bilinear -> Bilinear Interpolation; + nearest -> Nearest Neighbor. + + Returns: + ndarray: Warped image with the same shape of img + """ + warnings.warn('This function is just for prototyping and cannot ' + 'guarantee the computational efficiency.') + assert flow.ndim == 3, 'Flow must be in 3D arrays.' + height = flow.shape[0] + width = flow.shape[1] + channels = img.shape[2] + + output = np.ones( + (height, width, channels), dtype=img.dtype) * filling_value + + grid = np.indices((height, width)).swapaxes(0, 1).swapaxes(1, 2) + dx = grid[:, :, 0] + flow[:, :, 1] + dy = grid[:, :, 1] + flow[:, :, 0] + sx = np.floor(dx).astype(int) + sy = np.floor(dy).astype(int) + valid = (sx >= 0) & (sx < height - 1) & (sy >= 0) & (sy < width - 1) + + if interpolate_mode == 'nearest': + output[valid, :] = img[dx[valid].round().astype(int), + dy[valid].round().astype(int), :] + elif interpolate_mode == 'bilinear': + # dirty walkround for integer positions + eps_ = 1e-6 + dx, dy = dx + eps_, dy + eps_ + left_top_ = img[np.floor(dx[valid]).astype(int), + np.floor(dy[valid]).astype(int), :] * ( + np.ceil(dx[valid]) - dx[valid])[:, None] * ( + np.ceil(dy[valid]) - dy[valid])[:, None] + left_down_ = img[np.ceil(dx[valid]).astype(int), + np.floor(dy[valid]).astype(int), :] * ( + dx[valid] - np.floor(dx[valid]))[:, None] * ( + np.ceil(dy[valid]) - dy[valid])[:, None] + right_top_ = img[np.floor(dx[valid]).astype(int), + np.ceil(dy[valid]).astype(int), :] * ( + np.ceil(dx[valid]) - dx[valid])[:, None] * ( + dy[valid] - np.floor(dy[valid]))[:, None] + right_down_ = img[np.ceil(dx[valid]).astype(int), + np.ceil(dy[valid]).astype(int), :] * ( + dx[valid] - np.floor(dx[valid]))[:, None] * ( + dy[valid] - np.floor(dy[valid]))[:, None] + output[valid, :] = left_top_ + left_down_ + right_top_ + right_down_ + else: + raise NotImplementedError( + 'We only support interpolation modes of nearest and bilinear, ' + f'but got {interpolate_mode}.') + return output.astype(img.dtype) + + +def flow_from_bytes(content): + """Read dense optical flow from bytes. + + .. note:: + This load optical flow function works for FlyingChairs, FlyingThings3D, + Sintel, FlyingChairsOcc datasets, but cannot load the data from + ChairsSDHom. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + ndarray: Loaded optical flow with the shape (H, W, 2). + """ + + # header in first 4 bytes + header = content[:4] + if header.decode('utf-8') != 'PIEH': + raise Exception('Flow file header does not contain PIEH') + # width in second 4 bytes + width = np.frombuffer(content[4:], np.int32, 1).squeeze() + # height in third 4 bytes + height = np.frombuffer(content[8:], np.int32, 1).squeeze() + # after first 12 bytes, all bytes are flow + flow = np.frombuffer(content[12:], np.float32, width * height * 2).reshape( + (height, width, 2)) + + return flow + + +def sparse_flow_from_bytes(content): + """Read the optical flow in KITTI datasets from bytes. + + This function is modified from RAFT load the `KITTI datasets + `_. + + Args: + content (bytes): Optical flow bytes got from files or other streams. + + Returns: + Tuple(ndarray, ndarray): Loaded optical flow with the shape (H, W, 2) + and flow valid mask with the shape (H, W). + """ # nopa + + content = np.frombuffer(content, np.uint8) + flow = cv2.imdecode(content, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR) + flow = flow[:, :, ::-1].astype(np.float32) + # flow shape (H, W, 2) valid shape (H, W) + flow, valid = flow[:, :, :2], flow[:, :, 2] + flow = (flow - 2**15) / 64.0 + return flow, valid diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/processing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..2b93a59215d56b6e5ba05f48bca3527772f0c744 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/video/processing.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +import subprocess +import tempfile + +from annotator.mmpkg.mmcv.utils import requires_executable + + +@requires_executable('ffmpeg') +def convert_video(in_file, + out_file, + print_cmd=False, + pre_options='', + **kwargs): + """Convert a video with ffmpeg. + + This provides a general api to ffmpeg, the executed command is:: + + `ffmpeg -y -i ` + + Options(kwargs) are mapped to ffmpeg commands with the following rules: + + - key=val: "-key val" + - key=True: "-key" + - key=False: "" + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + pre_options (str): Options appears before "-i ". + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = [] + for k, v in kwargs.items(): + if isinstance(v, bool): + if v: + options.append(f'-{k}') + elif k == 'log_level': + assert v in [ + 'quiet', 'panic', 'fatal', 'error', 'warning', 'info', + 'verbose', 'debug', 'trace' + ] + options.append(f'-loglevel {v}') + else: + options.append(f'-{k} {v}') + cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \ + f'{out_file}' + if print_cmd: + print(cmd) + subprocess.call(cmd, shell=True) + + +@requires_executable('ffmpeg') +def resize_video(in_file, + out_file, + size=None, + ratio=None, + keep_ar=False, + log_level='info', + print_cmd=False): + """Resize a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). + ratio (tuple or float): Expected resize ratio, (2, 0.5) means + (w*2, h*0.5). + keep_ar (bool): Whether to keep original aspect ratio. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + if size is None and ratio is None: + raise ValueError('expected size or ratio must be specified') + if size is not None and ratio is not None: + raise ValueError('size and ratio cannot be specified at the same time') + options = {'log_level': log_level} + if size: + if not keep_ar: + options['vf'] = f'scale={size[0]}:{size[1]}' + else: + options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \ + 'force_original_aspect_ratio=decrease' + else: + if not isinstance(ratio, tuple): + ratio = (ratio, ratio) + options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def cut_video(in_file, + out_file, + start=None, + end=None, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Cut a clip from a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + start (None or float): Start time (in seconds). + end (None or float): End time (in seconds). + vcodec (None or str): Output video codec, None for unchanged. + acodec (None or str): Output audio codec, None for unchanged. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + if start: + options['ss'] = start + else: + start = 0 + if end: + options['t'] = end - start + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def concat_video(video_list, + out_file, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Concatenate multiple videos into a single one. + + Args: + video_list (list): A list of video filenames + out_file (str): Output video filename + vcodec (None or str): Output video codec, None for unchanged + acodec (None or str): Output audio codec, None for unchanged + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + tmp_filehandler, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) + with open(tmp_filename, 'w') as f: + for filename in video_list: + f.write(f'file {osp.abspath(filename)}\n') + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + convert_video( + tmp_filename, + out_file, + print_cmd, + pre_options='-f concat -safe 0', + **options) + os.close(tmp_filehandler) + os.remove(tmp_filename) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..835df136bdcf69348281d22914d41aa84cdf92b1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .color import Color, color_val +from .image import imshow, imshow_bboxes, imshow_det_bboxes +from .optflow import flow2rgb, flowshow, make_color_wheel + +__all__ = [ + 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', + 'flowshow', 'flow2rgb', 'make_color_wheel' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/color.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/color.py new file mode 100644 index 0000000000000000000000000000000000000000..48379a283e48570f226426510270de8e15323c8d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/color.py @@ -0,0 +1,51 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from enum import Enum + +import numpy as np + +from annotator.mmpkg.mmcv.utils import is_str + + +class Color(Enum): + """An enum that defines common colors. + + Contains red, green, blue, cyan, yellow, magenta, white and black. + """ + red = (0, 0, 255) + green = (0, 255, 0) + blue = (255, 0, 0) + cyan = (255, 255, 0) + yellow = (0, 255, 255) + magenta = (255, 0, 255) + white = (255, 255, 255) + black = (0, 0, 0) + + +def color_val(color): + """Convert various input to color tuples. + + Args: + color (:obj:`Color`/str/tuple/int/ndarray): Color inputs + + Returns: + tuple[int]: A tuple of 3 integers indicating BGR channels. + """ + if is_str(color): + return Color[color].value + elif isinstance(color, Color): + return color.value + elif isinstance(color, tuple): + assert len(color) == 3 + for channel in color: + assert 0 <= channel <= 255 + return color + elif isinstance(color, int): + assert 0 <= color <= 255 + return color, color, color + elif isinstance(color, np.ndarray): + assert color.ndim == 1 and color.size == 3 + assert np.all((color >= 0) & (color <= 255)) + color = color.astype(np.uint8) + return tuple(color) + else: + raise TypeError(f'Invalid type for color: {type(color)}') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/image.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/image.py new file mode 100644 index 0000000000000000000000000000000000000000..378de2104f6554389fcb2e6a3904283345fd74b0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/image.py @@ -0,0 +1,152 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from annotator.mmpkg.mmcv.image import imread, imwrite +from .color import color_val + + +def imshow(img, win_name='', wait_time=0): + """Show an image. + + Args: + img (str or ndarray): The image to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + cv2.imshow(win_name, imread(img)) + if wait_time == 0: # prevent from hanging if windows was closed + while True: + ret = cv2.waitKey(1) + + closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 + # if user closed window or if some key pressed + if closed or ret != -1: + break + else: + ret = cv2.waitKey(wait_time) + + +def imshow_bboxes(img, + bboxes, + colors='green', + top_k=-1, + thickness=1, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (list or ndarray): A list of ndarray of shape (k, 4). + colors (list[str or tuple or Color]): A list of colors. + top_k (int): Plot the first k bboxes only if set positive. + thickness (int): Thickness of lines. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str, optional): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + img = imread(img) + img = np.ascontiguousarray(img) + + if isinstance(bboxes, np.ndarray): + bboxes = [bboxes] + if not isinstance(colors, list): + colors = [colors for _ in range(len(bboxes))] + colors = [color_val(c) for c in colors] + assert len(bboxes) == len(colors) + + for i, _bboxes in enumerate(bboxes): + _bboxes = _bboxes.astype(np.int32) + if top_k <= 0: + _top_k = _bboxes.shape[0] + else: + _top_k = min(top_k, _bboxes.shape[0]) + for j in range(_top_k): + left_top = (_bboxes[j, 0], _bboxes[j, 1]) + right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) + cv2.rectangle( + img, left_top, right_bottom, colors[i], thickness=thickness) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img + + +def imshow_det_bboxes(img, + bboxes, + labels, + class_names=None, + score_thr=0, + bbox_color='green', + text_color='green', + thickness=1, + font_scale=0.5, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes and class labels (with scores) on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or + (n, 5). + labels (ndarray): Labels of bboxes. + class_names (list[str]): Names of each classes. + score_thr (float): Minimum score of bboxes to be shown. + bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. + text_color (str or tuple or :obj:`Color`): Color of texts. + thickness (int): Thickness of lines. + font_scale (float): Font scales of texts. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str or None): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + assert bboxes.ndim == 2 + assert labels.ndim == 1 + assert bboxes.shape[0] == labels.shape[0] + assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 + img = imread(img) + img = np.ascontiguousarray(img) + + if score_thr > 0: + assert bboxes.shape[1] == 5 + scores = bboxes[:, -1] + inds = scores > score_thr + bboxes = bboxes[inds, :] + labels = labels[inds] + + bbox_color = color_val(bbox_color) + text_color = color_val(text_color) + + for bbox, label in zip(bboxes, labels): + bbox_int = bbox.astype(np.int32) + left_top = (bbox_int[0], bbox_int[1]) + right_bottom = (bbox_int[2], bbox_int[3]) + cv2.rectangle( + img, left_top, right_bottom, bbox_color, thickness=thickness) + label_text = class_names[ + label] if class_names is not None else f'cls {label}' + if len(bbox) > 4: + label_text += f'|{bbox[-1]:.02f}' + cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), + cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/optflow.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/optflow.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c3ce980f9f6c74c85fe714aca1623a08ae7a8d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmcv/visualization/optflow.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from __future__ import division + +import numpy as np + +from annotator.mmpkg.mmcv.image import rgb2bgr +from annotator.mmpkg.mmcv.video import flowread +from .image import imshow + + +def flowshow(flow, win_name='', wait_time=0): + """Show optical flow. + + Args: + flow (ndarray or str): The optical flow to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + flow = flowread(flow) + flow_img = flow2rgb(flow) + imshow(rgb2bgr(flow_img), win_name, wait_time) + + +def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): + """Convert flow map to RGB image. + + Args: + flow (ndarray): Array of optical flow. + color_wheel (ndarray or None): Color wheel used to map flow field to + RGB colorspace. Default color wheel will be used if not specified. + unknown_thr (str): Values above this threshold will be marked as + unknown and thus ignored. + + Returns: + ndarray: RGB image that can be visualized. + """ + assert flow.ndim == 3 and flow.shape[-1] == 2 + if color_wheel is None: + color_wheel = make_color_wheel() + assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 + num_bins = color_wheel.shape[0] + + dx = flow[:, :, 0].copy() + dy = flow[:, :, 1].copy() + + ignore_inds = ( + np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | + (np.abs(dy) > unknown_thr)) + dx[ignore_inds] = 0 + dy[ignore_inds] = 0 + + rad = np.sqrt(dx**2 + dy**2) + if np.any(rad > np.finfo(float).eps): + max_rad = np.max(rad) + dx /= max_rad + dy /= max_rad + + rad = np.sqrt(dx**2 + dy**2) + angle = np.arctan2(-dy, -dx) / np.pi + + bin_real = (angle + 1) / 2 * (num_bins - 1) + bin_left = np.floor(bin_real).astype(int) + bin_right = (bin_left + 1) % num_bins + w = (bin_real - bin_left.astype(np.float32))[..., None] + flow_img = (1 - + w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] + small_ind = rad <= 1 + flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) + flow_img[np.logical_not(small_ind)] *= 0.75 + + flow_img[ignore_inds, :] = 0 + + return flow_img + + +def make_color_wheel(bins=None): + """Build a color wheel. + + Args: + bins(list or tuple, optional): Specify the number of bins for each + color range, corresponding to six ranges: red -> yellow, + yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, + magenta -> red. [15, 6, 4, 11, 13, 6] is used for default + (see Middlebury). + + Returns: + ndarray: Color wheel of shape (total_bins, 3). + """ + if bins is None: + bins = [15, 6, 4, 11, 13, 6] + assert len(bins) == 6 + + RY, YG, GC, CB, BM, MR = tuple(bins) + + ry = [1, np.arange(RY) / RY, 0] + yg = [1 - np.arange(YG) / YG, 1, 0] + gc = [0, 1, np.arange(GC) / GC] + cb = [0, 1 - np.arange(CB) / CB, 1] + bm = [np.arange(BM) / BM, 0, 1] + mr = [1, 0, 1 - np.arange(MR) / MR] + + num_bins = RY + YG + GC + CB + BM + MR + + color_wheel = np.zeros((3, num_bins), dtype=np.float32) + + col = 0 + for i, color in enumerate([ry, yg, gc, cb, bm, mr]): + for j in range(3): + color_wheel[j, col:col + bins[i]] = color[j] + col += bins[i] + + return color_wheel.T diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..170724be38de42daf2bc1a1910e181d68818f165 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/__init__.py @@ -0,0 +1,9 @@ +from .inference import inference_segmentor, init_segmentor, show_result_pyplot +from .test import multi_gpu_test, single_gpu_test +from .train import get_root_logger, set_random_seed, train_segmentor + +__all__ = [ + 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', + 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', + 'show_result_pyplot' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/inference.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..515e459ff6e66e955624fedaf32d2076be750563 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/inference.py @@ -0,0 +1,138 @@ +import matplotlib.pyplot as plt +import annotator.mmpkg.mmcv as mmcv +import torch +from annotator.mmpkg.mmcv.parallel import collate, scatter +from annotator.mmpkg.mmcv.runner import load_checkpoint + +from annotator.mmpkg.mmseg.datasets.pipelines import Compose +from annotator.mmpkg.mmseg.models import build_segmentor +from modules import devices + + +def init_segmentor(config, checkpoint=None, device=devices.get_device_for("controlnet")): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img'][0] = data['img'][0].to(devices.get_device_for("controlnet")) + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, + img, + result, + palette=None, + fig_size=(15, 10), + opacity=0.5, + title='', + block=True): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result( + img, result, palette=palette, show=False, opacity=opacity) + # plt.figure(figsize=fig_size) + # plt.imshow(mmcv.bgr2rgb(img)) + # plt.title(title) + # plt.tight_layout() + # plt.show(block=block) + return mmcv.bgr2rgb(img) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/test.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/test.py new file mode 100644 index 0000000000000000000000000000000000000000..f9954e6a3709afdbf6a2027b213afcad644c47d7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/test.py @@ -0,0 +1,238 @@ +import os.path as osp +import pickle +import shutil +import tempfile + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +import torch +import torch.distributed as dist +from annotator.mmpkg.mmcv.image import tensor2imgs +from annotator.mmpkg.mmcv.runner import get_dist_info + + +def np2tmp(array, temp_file_name=None): + """Save ndarray to local numpy file. + + Args: + array (ndarray): Ndarray to save. + temp_file_name (str): Numpy file name. If 'temp_file_name=None', this + function will generate a file name with tempfile.NamedTemporaryFile + to save ndarray. Default: None. + + Returns: + str: The numpy file name. + """ + + if temp_file_name is None: + temp_file_name = tempfile.NamedTemporaryFile( + suffix='.npy', delete=False).name + np.save(temp_file_name, array) + return temp_file_name + + +def single_gpu_test(model, + data_loader, + show=False, + out_dir=None, + efficient_test=False, + opacity=0.5): + """Test with single GPU. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + show (bool): Whether show results during inference. Default: False. + out_dir (str, optional): If specified, the results will be dumped into + the directory to save output results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + + if show or out_dir: + img_tensor = data['img'][0] + img_metas = data['img_metas'][0].data[0] + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) + assert len(imgs) == len(img_metas) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + + ori_h, ori_w = img_meta['ori_shape'][:-1] + img_show = mmcv.imresize(img_show, (ori_w, ori_h)) + + if out_dir: + out_file = osp.join(out_dir, img_meta['ori_filename']) + else: + out_file = None + + model.module.show_result( + img_show, + result, + palette=dataset.PALETTE, + show=show, + out_file=out_file, + opacity=opacity) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, + data_loader, + tmpdir=None, + gpu_collect=False, + efficient_test=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' + it encodes results to gpu tensors and use gpu communication for results + collection. On cpu mode it saves the results on different gpus to 'tmpdir' + and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + + Returns: + list: The prediction results. + """ + + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + + if isinstance(result, list): + if efficient_test: + result = [np2tmp(_) for _ in result] + results.extend(result) + else: + if efficient_test: + result = np2tmp(result) + results.append(result) + + if rank == 0: + batch_size = data['img'][0].size(0) + for _ in range(batch_size * world_size): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results with CPU.""" + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + tmpdir = tempfile.mkdtemp() + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) + part_list.append(mmcv.load(part_file)) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results with GPU.""" + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_list.append( + pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/train.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/train.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a87d65c72e4581c96b41aebf879905510c9d22 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/apis/train.py @@ -0,0 +1,116 @@ +import random +import warnings + +import numpy as np +import torch +from annotator.mmpkg.mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from annotator.mmpkg.mmcv.runner import build_optimizer, build_runner + +from annotator.mmpkg.mmseg.core import DistEvalHook, EvalHook +from annotator.mmpkg.mmseg.datasets import build_dataloader, build_dataset +from annotator.mmpkg.mmseg.utils import get_root_logger + + +def set_random_seed(seed, deterministic=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def train_segmentor(model, + dataset, + cfg, + distributed=False, + validate=False, + timestamp=None, + meta=None): + """Launch segmentor training.""" + logger = get_root_logger(cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + # cfg.gpus will be ignored if distributed + len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed, + drop_last=True) for ds in dataset + ] + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model.cuda(), + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: + model = MMDataParallel( + model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + if cfg.get('runner') is None: + cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} + warnings.warn( + 'config is now expected to have a `runner` section, ' + 'please set `runner` in your config.', UserWarning) + + runner = build_runner( + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta)) + + # register hooks + runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + + # an ugly walkaround to make the .log and .log.json filenames the same + runner.timestamp = timestamp + + # register eval hooks + if validate: + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + val_dataloader = build_dataloader( + val_dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + eval_cfg = cfg.get('evaluation', {}) + eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' + eval_hook = DistEvalHook if distributed else EvalHook + runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..965605587211b7bf0bd6bc3acdbb33dd49cab023 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/__init__.py @@ -0,0 +1,3 @@ +from .evaluation import * # noqa: F401, F403 +from .seg import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7cc4b23413a0639e9de00eeb0bf600632d2c6cd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/__init__.py @@ -0,0 +1,8 @@ +from .class_names import get_classes, get_palette +from .eval_hooks import DistEvalHook, EvalHook +from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou + +__all__ = [ + 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', + 'eval_metrics', 'get_classes', 'get_palette' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/class_names.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/class_names.py new file mode 100644 index 0000000000000000000000000000000000000000..532c5fd78946ede66d747ec8e7b72dbb66471aac --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/class_names.py @@ -0,0 +1,152 @@ +import annotator.mmpkg.mmcv as mmcv + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag' + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', + 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor' + ] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/eval_hooks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/eval_hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..408e9670f61d1b118477562b341adc644c52799a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/eval_hooks.py @@ -0,0 +1,109 @@ +import os.path as osp + +from annotator.mmpkg.mmcv.runner import DistEvalHook as _DistEvalHook +from annotator.mmpkg.mmcv.runner import EvalHook as _EvalHook + + +class EvalHook(_EvalHook): + """Single GPU EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from annotator.mmpkg.mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test( + runner.model, + self.dataloader, + show=False, + efficient_test=self.efficient_test) + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``single_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from annotator.mmpkg.mmseg.apis import single_gpu_test + runner.log_buffer.clear() + results = single_gpu_test(runner.model, self.dataloader, show=False) + self.evaluate(runner, results) + + +class DistEvalHook(_DistEvalHook): + """Distributed EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.efficient_test = efficient_test + + def after_train_iter(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + from annotator.mmpkg.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect, + efficient_test=self.efficient_test) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) + + def after_train_epoch(self, runner): + """After train epoch hook. + + Override default ``multi_gpu_test``. + """ + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + from annotator.mmpkg.mmseg.apis import multi_gpu_test + runner.log_buffer.clear() + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=osp.join(runner.work_dir, '.eval_hook'), + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/metrics.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..8ede737624a0ba6e6365639f7019ac2527052cfd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/evaluation/metrics.py @@ -0,0 +1,326 @@ +from collections import OrderedDict + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +import torch + + +def f_score(precision, recall, beta=1): + """calcuate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta**2) * (precision * recall) / ( + (beta**2 * precision) + recall) + return score + + +def intersect_and_union(pred_label, + label, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray | str): Ground truth segmentation map + or label filename. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + if isinstance(label, str): + label = torch.from_numpy( + mmcv.imread(label, flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + +def total_intersect_and_union(results, + gt_seg_maps, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + ndarray: The intersection of prediction and ground truth histogram + on all classes. + ndarray: The union of prediction and ground truth histogram on all + classes. + ndarray: The prediction histogram on all classes. + ndarray: The ground truth histogram on all classes. + """ + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) + for i in range(num_imgs): + area_intersect, area_union, area_pred_label, area_label = \ + intersect_and_union( + results[i], gt_seg_maps[i], num_classes, ignore_index, + label_map, reduce_zero_label) + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + return total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label + + +def mean_iou(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category IoU, shape (num_classes, ). + """ + iou_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mIoU'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return iou_result + + +def mean_dice(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Dice (mDice) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category dice, shape (num_classes, ). + """ + + dice_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mDice'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return dice_result + + +def mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category recall, shape (num_classes, ). + ndarray: Per category precision, shape (num_classes, ). + ndarray: Per category f-score, shape (num_classes, ). + """ + fscore_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mFscore'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + beta=beta) + return fscore_result + + +def eval_metrics(results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError('metrics {} is not supported'.format(metrics)) + + total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, + reduce_zero_label) + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / ( + total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor( + [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = { + metric: value.numpy() + for metric, value in ret_metrics.items() + } + if nan_to_num is not None: + ret_metrics = OrderedDict({ + metric: np.nan_to_num(metric_value, nan=nan_to_num) + for metric, metric_value in ret_metrics.items() + }) + return ret_metrics diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..93bc129b685e4a3efca2cc891729981b2865900d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_pixel_sampler +from .sampler import BasePixelSampler, OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f8fff6375622282f85b3acf15af1a7d27fb9c426 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/builder.py @@ -0,0 +1,8 @@ +from annotator.mmpkg.mmcv.utils import Registry, build_from_cfg + +PIXEL_SAMPLERS = Registry('pixel sampler') + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..332b242c03d1c5e80d4577df442a9a037b1816e1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/__init__.py @@ -0,0 +1,4 @@ +from .base_pixel_sampler import BasePixelSampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/base_pixel_sampler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/base_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..b75b1566c9f18169cee51d4b55d75e0357b69c57 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/base_pixel_sampler.py @@ -0,0 +1,12 @@ +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/ohem_pixel_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..88bb10d44026ba9f21756eaea9e550841cd59b9f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F + +from ..builder import PIXEL_SAMPLERS +from .base_pixel_sampler import BasePixelSampler + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super(OHEMPixelSampler, self).__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, + sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + else: + losses = self.context.loss_decode( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none') + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1. + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2678b321c295bcceaef945111ac3524be19d6e4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .misc import add_prefix + +__all__ = ['add_prefix'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/misc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..eb862a82bd47c8624db3dd5c6fb6ad8a03b62466 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/core/utils/misc.py @@ -0,0 +1,17 @@ +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebeaef4a28ef655e43578552a8aef6b77f13a636 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/__init__.py @@ -0,0 +1,19 @@ +from .ade import ADE20KDataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .custom import CustomDataset +from .dataset_wrappers import ConcatDataset, RepeatDataset +from .drive import DRIVEDataset +from .hrf import HRFDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .stare import STAREDataset +from .voc import PascalVOCDataset + +__all__ = [ + 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', + 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', + 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', + 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', + 'STAREDataset' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/ade.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/ade.py new file mode 100644 index 0000000000000000000000000000000000000000..5913e43775ed4920b6934c855eb5a37c54218ebf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/ade.py @@ -0,0 +1,84 @@ +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ADE20KDataset(CustomDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + CLASSES = ( + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + def __init__(self, **kwargs): + super(ADE20KDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..6cf8b4d9d32d4464905507cd54a84eb534f38bb6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/builder.py @@ -0,0 +1,169 @@ +import copy +import platform +import random +from functools import partial + +import numpy as np +from annotator.mmpkg.mmcv.parallel import collate +from annotator.mmpkg.mmcv.runner import get_dist_info +from annotator.mmpkg.mmcv.utils import Registry, build_from_cfg +from annotator.mmpkg.mmcv.utils.parrots_wrapper import DataLoader, PoolDataLoader +from torch.utils.data import DistributedSampler + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + hard_limit = rlimit[1] + soft_limit = min(4096, hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + +DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') + + +def _concat_dataset(cfg, default_args=None): + """Build :obj:`ConcatDataset by.""" + from .dataset_wrappers import ConcatDataset + img_dir = cfg['img_dir'] + ann_dir = cfg.get('ann_dir', None) + split = cfg.get('split', None) + num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 + if ann_dir is not None: + num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 + else: + num_ann_dir = 0 + if split is not None: + num_split = len(split) if isinstance(split, (list, tuple)) else 1 + else: + num_split = 0 + if num_img_dir > 1: + assert num_img_dir == num_ann_dir or num_ann_dir == 0 + assert num_img_dir == num_split or num_split == 0 + else: + assert num_split == num_ann_dir or num_ann_dir <= 1 + num_dset = max(num_split, num_img_dir) + + datasets = [] + for i in range(num_dset): + data_cfg = copy.deepcopy(cfg) + if isinstance(img_dir, (list, tuple)): + data_cfg['img_dir'] = img_dir[i] + if isinstance(ann_dir, (list, tuple)): + data_cfg['ann_dir'] = ann_dir[i] + if isinstance(split, (list, tuple)): + data_cfg['split'] = split[i] + datasets.append(build_dataset(data_cfg, default_args)) + + return ConcatDataset(datasets) + + +def build_dataset(cfg, default_args=None): + """Build datasets.""" + from .dataset_wrappers import ConcatDataset, RepeatDataset + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset( + build_dataset(cfg['dataset'], default_args), cfg['times']) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance( + cfg.get('split', None), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset + + +def build_dataloader(dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + dataloader_type='PoolDataLoader', + **kwargs): + """Build PyTorch DataLoader. + + In distributed training, each GPU/process has a dataloader. + In non-distributed training, there is only one dataloader for all GPUs. + + Args: + dataset (Dataset): A PyTorch dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e., + batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data loading + for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed training. + dist (bool): Distributed training/test or not. Default: True. + shuffle (bool): Whether to shuffle the data at every epoch. + Default: True. + seed (int | None): Seed to be used. Default: None. + drop_last (bool): Whether to drop the last incomplete batch in epoch. + Default: False + pin_memory (bool): Whether to use pin_memory in DataLoader. + Default: True + dataloader_type (str): Type of dataloader. Default: 'PoolDataLoader' + kwargs: any keyword argument to be used to initialize DataLoader + + Returns: + DataLoader: A PyTorch dataloader. + """ + rank, world_size = get_dist_info() + if dist: + sampler = DistributedSampler( + dataset, world_size, rank, shuffle=shuffle) + shuffle = False + batch_size = samples_per_gpu + num_workers = workers_per_gpu + else: + sampler = None + batch_size = num_gpus * samples_per_gpu + num_workers = num_gpus * workers_per_gpu + + init_fn = partial( + worker_init_fn, num_workers=num_workers, rank=rank, + seed=seed) if seed is not None else None + + assert dataloader_type in ( + 'DataLoader', + 'PoolDataLoader'), f'unsupported dataloader {dataloader_type}' + + if dataloader_type == 'PoolDataLoader': + dataloader = PoolDataLoader + elif dataloader_type == 'DataLoader': + dataloader = DataLoader + + data_loader = dataloader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + **kwargs) + + return data_loader + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + np.random.seed(worker_seed) + random.seed(worker_seed) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/chase_db1.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..8bc29bea14704a4407f83474610cbc3bef32c708 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/chase_db1.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(CustomDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(ChaseDB1Dataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_1stHO.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/cityscapes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..38f80e8043d25178cf5dac18911241c74be4e3ac --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/cityscapes.py @@ -0,0 +1,217 @@ +import os.path as osp +import tempfile + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +from annotator.mmpkg.mmcv.utils import print_log +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class CityscapesDataset(CustomDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + + CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle') + + PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], + [0, 80, 100], [0, 0, 230], [119, 11, 32]] + + def __init__(self, **kwargs): + super(CityscapesDataset, self).__init__( + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtFine_labelTrainIds.png', + **kwargs) + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + import cityscapesscripts.helpers.labels as CSLabels + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy + + def results2img(self, results, imgfile_prefix, to_label_id): + """Write the segmentation results to images. + + Args: + results (list[list | tuple | ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + prog_bar = mmcv.ProgressBar(len(self)) + for idx in range(len(self)): + result = results[idx] + if to_label_id: + result = self._convert_to_label_id(result) + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + output = Image.fromarray(result.astype(np.uint8)).convert('P') + import cityscapesscripts.helpers.labels as CSLabels + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) + for label_id, label in CSLabels.id2label.items(): + palette[label_id] = label.color + + output.putpalette(palette) + output.save(png_filename) + result_files.append(png_filename) + prog_bar.update() + + return result_files + + def format_results(self, results, imgfile_prefix=None, to_label_id=True): + """Format the results into dir (standard format for Cityscapes + evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str | None): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". If not specified, a temp file will be created. + Default: None. + to_label_id (bool): whether convert output to label_id for + submission. Default: False + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + + assert isinstance(results, list), 'results must be a list' + assert len(results) == len(self), ( + 'The length of results is not equal to the dataset len: ' + f'{len(results)} != {len(self)}') + + if imgfile_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + imgfile_prefix = tmp_dir.name + else: + tmp_dir = None + result_files = self.results2img(results, imgfile_prefix, to_label_id) + + return result_files, tmp_dir + + def evaluate(self, + results, + metric='mIoU', + logger=None, + imgfile_prefix=None, + efficient_test=False): + """Evaluation in Cityscapes/default protocol. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file, + for cityscapes evaluation only. It includes the file path and + the prefix of filename, e.g., "a/b/prefix". + If results are evaluated with cityscapes protocol, it would be + the prefix of output png files. The output files would be + png images under folder "a/b/prefix/xxx.png", where "xxx" is + the image name of cityscapes. If not specified, a temp file + will be created for evaluation. + Default: None. + + Returns: + dict[str, float]: Cityscapes/default metrics. + """ + + eval_results = dict() + metrics = metric.copy() if isinstance(metric, list) else [metric] + if 'cityscapes' in metrics: + eval_results.update( + self._evaluate_cityscapes(results, logger, imgfile_prefix)) + metrics.remove('cityscapes') + if len(metrics) > 0: + eval_results.update( + super(CityscapesDataset, + self).evaluate(results, metrics, logger, efficient_test)) + + return eval_results + + def _evaluate_cityscapes(self, results, logger, imgfile_prefix): + """Evaluation in Cityscapes protocol. + + Args: + results (list): Testing results of the dataset. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + try: + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + except ImportError: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + result_files, tmp_dir = self.format_results(results, imgfile_prefix) + + if tmp_dir is None: + result_dir = imgfile_prefix + else: + result_dir = tmp_dir.name + + eval_results = dict() + print_log(f'Evaluating results under {result_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + seg_map_list = [] + pred_list = [] + + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + for seg_map in mmcv.scandir( + self.ann_dir, 'gtFine_labelIds.png', recursive=True): + seg_map_list.append(osp.join(self.ann_dir, seg_map)) + pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + + eval_results.update( + CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + + if tmp_dir is not None: + tmp_dir.cleanup() + + return eval_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/custom.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/custom.py new file mode 100644 index 0000000000000000000000000000000000000000..3a626976c7fa88c3d1c1e871ef621422acc1be83 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/custom.py @@ -0,0 +1,403 @@ +import os +import os.path as osp +from collections import OrderedDict +from functools import reduce + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +from annotator.mmpkg.mmcv.utils import print_log +from torch.utils.data import Dataset + +from annotator.mmpkg.mmseg.core import eval_metrics +from annotator.mmpkg.mmseg.utils import get_root_logger +from .builder import DATASETS +from .pipelines import Compose + + +@DATASETS.register_module() +class CustomDataset(Dataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of CustomDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/tutorials/new_dataset.md`` for more details. + + + Args: + pipeline (list[dict]): Processing pipeline + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. Default: '.jpg' + ann_dir (str, optional): Path to annotation directory. Default: None + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + split (str, optional): Split txt file. If split is specified, only + file with suffix in the splits will be loaded. Otherwise, all + images in img_dir/ann_dir will be loaded. Default: None + data_root (str, optional): Data root for img_dir/ann_dir. Default: + None. + test_mode (bool): If test_mode=True, gt wouldn't be loaded. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default: False + classes (str | Sequence[str], optional): Specify classes to load. + If is None, ``cls.CLASSES`` will be used. Default: None. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, and + self.PALETTE is None, random palette will be generated. + Default: None + """ + + CLASSES = None + + PALETTE = None + + def __init__(self, + pipeline, + img_dir, + img_suffix='.jpg', + ann_dir=None, + seg_map_suffix='.png', + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False, + classes=None, + palette=None): + self.pipeline = Compose(pipeline) + self.img_dir = img_dir + self.img_suffix = img_suffix + self.ann_dir = ann_dir + self.seg_map_suffix = seg_map_suffix + self.split = split + self.data_root = data_root + self.test_mode = test_mode + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.label_map = None + self.CLASSES, self.PALETTE = self.get_classes_and_palette( + classes, palette) + + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.img_dir): + self.img_dir = osp.join(self.data_root, self.img_dir) + if not (self.ann_dir is None or osp.isabs(self.ann_dir)): + self.ann_dir = osp.join(self.data_root, self.ann_dir) + if not (self.split is None or osp.isabs(self.split)): + self.split = osp.join(self.data_root, self.split) + + # load annotations + self.img_infos = self.load_annotations(self.img_dir, self.img_suffix, + self.ann_dir, + self.seg_map_suffix, self.split) + + def __len__(self): + """Total number of samples of data.""" + return len(self.img_infos) + + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, + split): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos + + def get_ann_info(self, idx): + """Get annotation by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + return self.img_infos[idx]['ann'] + + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + results['seg_fields'] = [] + results['img_prefix'] = self.img_dir + results['seg_prefix'] = self.ann_dir + if self.custom_classes: + results['label_map'] = self.label_map + + def __getitem__(self, idx): + """Get training/test data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training/test data (with annotation if `test_mode` is set + False). + """ + + if self.test_mode: + return self.prepare_test_img(idx) + else: + return self.prepare_train_img(idx) + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys + introduced by pipeline. + """ + + img_info = self.img_infos[idx] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def prepare_test_img(self, idx): + """Get testing data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Testing data after pipeline with new keys introduced by + pipeline. + """ + + img_info = self.img_infos[idx] + results = dict(img_info=img_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def format_results(self, results, **kwargs): + """Place holder to format result to dataset specific output.""" + + def get_gt_seg_maps(self, efficient_test=False): + """Get ground truth segmentation maps for evaluation.""" + gt_seg_maps = [] + for img_info in self.img_infos: + seg_map = osp.join(self.ann_dir, img_info['ann']['seg_map']) + if efficient_test: + gt_seg_map = seg_map + else: + gt_seg_map = mmcv.imread( + seg_map, flag='unchanged', backend='pillow') + gt_seg_maps.append(gt_seg_map) + return gt_seg_maps + + def get_classes_and_palette(self, classes=None, palette=None): + """Get class names of current dataset. + + Args: + classes (Sequence[str] | str | None): If classes is None, use + default CLASSES defined by builtin dataset. If classes is a + string, take it as a file name. The file contains the name of + classes where each line contains one class name. If classes is + a tuple or list, override the CLASSES defined by the dataset. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, random + palette will be generated. Default: None + """ + if classes is None: + self.custom_classes = False + return self.CLASSES, self.PALETTE + + self.custom_classes = True + if isinstance(classes, str): + # take it as a file path + class_names = mmcv.list_from_file(classes) + elif isinstance(classes, (tuple, list)): + class_names = classes + else: + raise ValueError(f'Unsupported type {type(classes)} of classes.') + + if self.CLASSES: + if not set(classes).issubset(self.CLASSES): + raise ValueError('classes is not a subset of CLASSES.') + + # dictionary, its keys are the old label ids and its values + # are the new label ids. + # used for changing pixel labels in load_annotations. + self.label_map = {} + for i, c in enumerate(self.CLASSES): + if c not in class_names: + self.label_map[i] = -1 + else: + self.label_map[i] = classes.index(c) + + palette = self.get_palette_for_custom_classes(class_names, palette) + + return class_names, palette + + def get_palette_for_custom_classes(self, class_names, palette=None): + + if self.label_map is not None: + # return subset of palette + palette = [] + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != -1: + palette.append(self.PALETTE[old_id]) + palette = type(self.PALETTE)(palette) + + elif palette is None: + if self.PALETTE is None: + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + else: + palette = self.PALETTE + + return palette + + def evaluate(self, + results, + metric='mIoU', + logger=None, + efficient_test=False, + **kwargs): + """Evaluate the dataset. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. 'mIoU', + 'mDice' and 'mFscore' are supported. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + + Returns: + dict[str, float]: Default metrics. + """ + + if isinstance(metric, str): + metric = [metric] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metric).issubset(set(allowed_metrics)): + raise KeyError('metric {} is not supported'.format(metric)) + eval_results = {} + gt_seg_maps = self.get_gt_seg_maps(efficient_test) + if self.CLASSES is None: + num_classes = len( + reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) + else: + num_classes = len(self.CLASSES) + ret_metrics = eval_metrics( + results, + gt_seg_maps, + num_classes, + self.ignore_index, + metric, + label_map=self.label_map, + reduce_zero_label=self.reduce_zero_label) + + if self.CLASSES is None: + class_names = tuple(range(num_classes)) + else: + class_names = self.CLASSES + + # summary table + ret_metrics_summary = OrderedDict({ + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict({ + ret_metric: np.round(ret_metric_value * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + + try: + from prettytable import PrettyTable + # for logger + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + summary_table_data = PrettyTable() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + summary_table_data.add_column(key, [val]) + else: + summary_table_data.add_column('m' + key, [val]) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + print_log('Summary:', logger) + print_log('\n' + summary_table_data.get_string(), logger=logger) + except ImportError: # prettytable is not installed + pass + + # each metric dict + for key, value in ret_metrics_summary.items(): + if key == 'aAcc': + eval_results[key] = value / 100.0 + else: + eval_results['m' + key] = value / 100.0 + + ret_metrics_class.pop('Class', None) + for key, value in ret_metrics_class.items(): + eval_results.update({ + key + '.' + str(name): value[idx] / 100.0 + for idx, name in enumerate(class_names) + }) + + if mmcv.is_list_of(results, str): + for file_name in results: + os.remove(file_name) + return eval_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/dataset_wrappers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..d6a5e957ec3b44465432617cf6e8f0b86a8a5efa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,50 @@ +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + +from .builder import DATASETS + + +@DATASETS.register_module() +class ConcatDataset(_ConcatDataset): + """A wrapper of concatenated dataset. + + Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but + concat the group flag for image aspect ratio. + + Args: + datasets (list[:obj:`Dataset`]): A list of datasets. + """ + + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + self.CLASSES = datasets[0].CLASSES + self.PALETTE = datasets[0].PALETTE + + +@DATASETS.register_module() +class RepeatDataset(object): + """A wrapper of repeated dataset. + + The length of repeated dataset will be `times` larger than the original + dataset. This is useful when the data loading time is long but the dataset + is small. Using RepeatDataset can reduce the data loading time between + epochs. + + Args: + dataset (:obj:`Dataset`): The dataset to be repeated. + times (int): Repeat times. + """ + + def __init__(self, dataset, times): + self.dataset = dataset + self.times = times + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self._ori_len = len(self.dataset) + + def __getitem__(self, idx): + """Get item from original dataset.""" + return self.dataset[idx % self._ori_len] + + def __len__(self): + """The length is multiplied by ``times``""" + return self.times * self._ori_len diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/drive.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..3cbfda8ae74bdf26c5aef197ff2866a7c7ad0cfd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/drive.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class DRIVEDataset(CustomDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(DRIVEDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_manual1.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/hrf.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..923203b51377f9344277fc561803d7a78bd2c684 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/hrf.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class HRFDataset(CustomDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(HRFDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pascal_context.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..541a63c66a13fb16fd52921e755715ad8d078fdd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pascal_context.py @@ -0,0 +1,103 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('background', 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', + 'bicycle', 'bird', 'boat', 'book', 'bottle', 'building', 'bus', + 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', + 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', + 'floor', 'flower', 'food', 'grass', 'ground', 'horse', + 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', 'person', + 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', + 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', + 'track', 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', + 'window', 'wood') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None + + +@DATASETS.register_module() +class PascalContextDataset59(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', + 'bird', 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', + 'car', 'cat', 'ceiling', 'chair', 'cloth', 'computer', 'cow', + 'cup', 'curtain', 'dog', 'door', 'fence', 'floor', 'flower', + 'food', 'grass', 'ground', 'horse', 'keyboard', 'light', + 'motorbike', 'mountain', 'mouse', 'person', 'plate', 'platform', + 'pottedplant', 'road', 'rock', 'sheep', 'shelves', 'sidewalk', + 'sign', 'sky', 'snow', 'sofa', 'table', 'track', 'train', + 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', 'wood') + + PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], + [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], + [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], + [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], + [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], + [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], + [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], + [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], + [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], + [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], + [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], + [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], + [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], + [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset59, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=True, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b9046b07bb4ddea7a707a392b42e72db7c9df67 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/__init__.py @@ -0,0 +1,16 @@ +from .compose import Compose +from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, + Transpose, to_tensor) +from .loading import LoadAnnotations, LoadImageFromFile +from .test_time_aug import MultiScaleFlipAug +from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, + PhotoMetricDistortion, RandomCrop, RandomFlip, + RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) + +__all__ = [ + 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', + 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', + 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', + 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', + 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/compose.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/compose.py new file mode 100644 index 0000000000000000000000000000000000000000..1683e533237ce6420e4a53e477513853d6b33b3e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/compose.py @@ -0,0 +1,51 @@ +import collections + +from annotator.mmpkg.mmcv.utils import build_from_cfg + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Compose(object): + """Compose multiple transforms sequentially. + + Args: + transforms (Sequence[dict | callable]): Sequence of transform object or + config dict to be composed. + """ + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + """Call function to apply transforms sequentially. + + Args: + data (dict): A result dict contains the data to transform. + + Returns: + dict: Transformed data. + """ + + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/formating.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/formating.py new file mode 100644 index 0000000000000000000000000000000000000000..82e2e08ff819506bb7a7693be189017d473e677f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/formating.py @@ -0,0 +1,288 @@ +from collections.abc import Sequence + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +import torch +from annotator.mmpkg.mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + + Args: + data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to + be converted. + """ + + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor(object): + """Convert some results to :obj:`torch.Tensor` by given keys. + + Args: + keys (Sequence[str]): Keys that need to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert data in results to :obj:`torch.Tensor`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted + to :obj:`torch.Tensor`. + """ + + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class ImageToTensor(object): + """Convert image to :obj:`torch.Tensor` by given keys. + + The dimension order of input image is (H, W, C). The pipeline will convert + it to (C, H, W). If only 2 dimension (H, W) is given, the output would be + (1, H, W). + + Args: + keys (Sequence[str]): Key of images to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + img = results[key] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + results[key] = to_tensor(img.transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose(object): + """Transpose some results by given keys. + + Args: + keys (Sequence[str]): Keys of results to be transposed. + order (Sequence[int]): Order of transpose. + """ + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, order={self.order})' + + +@PIPELINES.register_module() +class ToDataContainer(object): + """Convert results to :obj:`mmcv.DataContainer` by given fields. + + Args: + fields (Sequence[dict]): Each field is a dict like + ``dict(key='xxx', **kwargs)``. The ``key`` in result will + be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. + Default: ``(dict(key='img', stack=True), + dict(key='gt_semantic_seg'))``. + """ + + def __init__(self, + fields=(dict(key='img', + stack=True), dict(key='gt_semantic_seg'))): + self.fields = fields + + def __call__(self, results): + """Call function to convert data in results to + :obj:`mmcv.DataContainer`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted to + :obj:`mmcv.DataContainer`. + """ + + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img" + and "gt_semantic_seg". These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + """Call function to transform and format common fields in results. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data that is formatted with + default bundle. + """ + + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + if 'gt_semantic_seg' in results: + # convert to long + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, + ...].astype(np.int64)), + stack=True) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module() +class Collect(object): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "gt_semantic_seg". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg')`` + """ + + def __init__(self, + keys, + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg')): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + """Call function to collect keys in results. The keys in ``meta_keys`` + will be converted to :obj:mmcv.DataContainer. + + Args: + results (dict): Result dict contains the data to collect. + + Returns: + dict: The result dict contains the following keys + - keys in``self.keys`` + - ``img_metas`` + """ + + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_metas'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/loading.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..3ad8c2cb67cb1d2b593217fb1fb2e0ca5834c24f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/loading.py @@ -0,0 +1,153 @@ +import os.path as osp + +import annotator.mmpkg.mmcv as mmcv +import numpy as np + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class LoadImageFromFile(object): + """Load an image from file. + + Required keys are "img_prefix" and "img_info" (a dict that must contain the + key "filename"). Added or updated keys are "filename", "img", "img_shape", + "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), + "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + color_type (str): The flag argument for :func:`mmcv.imfrombytes`. + Defaults to 'color'. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'cv2' + """ + + def __init__(self, + to_float32=False, + color_type='color', + file_client_args=dict(backend='disk'), + imdecode_backend='cv2'): + self.to_float32 = to_float32 + self.color_type = color_type + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call functions to load image and get image meta information. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('img_prefix') is not None: + filename = osp.join(results['img_prefix'], + results['img_info']['filename']) + else: + filename = results['img_info']['filename'] + img_bytes = self.file_client.get(filename) + img = mmcv.imfrombytes( + img_bytes, flag=self.color_type, backend=self.imdecode_backend) + if self.to_float32: + img = img.astype(np.float32) + + results['filename'] = filename + results['ori_filename'] = results['img_info']['filename'] + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + num_channels = 1 if len(img.shape) < 3 else img.shape[2] + results['img_norm_cfg'] = dict( + mean=np.zeros(num_channels, dtype=np.float32), + std=np.ones(num_channels, dtype=np.float32), + to_rgb=False) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(to_float32={self.to_float32},' + repr_str += f"color_type='{self.color_type}'," + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str + + +@PIPELINES.register_module() +class LoadAnnotations(object): + """Load annotations for semantic segmentation. + + Args: + reduce_zero_label (bool): Whether reduce all label value by 1. + Usually used for datasets where 0 is background label. + Default: False. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'pillow' + """ + + def __init__(self, + reduce_zero_label=False, + file_client_args=dict(backend='disk'), + imdecode_backend='pillow'): + self.reduce_zero_label = reduce_zero_label + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call function to load multiple types annotations. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('seg_prefix', None) is not None: + filename = osp.join(results['seg_prefix'], + results['ann_info']['seg_map']) + else: + filename = results['ann_info']['seg_map'] + img_bytes = self.file_client.get(filename) + gt_semantic_seg = mmcv.imfrombytes( + img_bytes, flag='unchanged', + backend=self.imdecode_backend).squeeze().astype(np.uint8) + # modify if custom classes + if results.get('label_map', None) is not None: + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg == old_id] = new_id + # reduce zero_label + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + results['gt_semantic_seg'] = gt_semantic_seg + results['seg_fields'].append('gt_semantic_seg') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label},' + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/test_time_aug.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/test_time_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..fb781d928ed71aceb1abcaef44d3889c00d2261e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/test_time_aug.py @@ -0,0 +1,133 @@ +import warnings + +import annotator.mmpkg.mmcv as mmcv + +from ..builder import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module() +class MultiScaleFlipAug(object): + """Test-time augmentation with multiple scales and flipping. + + An example configuration is as followed: + + .. code-block:: + + img_scale=(2048, 1024), + img_ratios=[0.5, 1.0], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ] + + After MultiScaleFLipAug with above configuration, the results are wrapped + into lists of the same length as followed: + + .. code-block:: + + dict( + img=[...], + img_shape=[...], + scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] + flip=[False, True, False, True] + ... + ) + + Args: + transforms (list[dict]): Transforms to apply in each augmentation. + img_scale (None | tuple | list[tuple]): Images scales for resizing. + img_ratios (float | list[float]): Image ratios for resizing + flip (bool): Whether apply flip augmentation. Default: False. + flip_direction (str | list[str]): Flip augmentation directions, + options are "horizontal" and "vertical". If flip_direction is list, + multiple flip augmentations will be applied. + It has no effect when flip == False. Default: "horizontal". + """ + + def __init__(self, + transforms, + img_scale, + img_ratios=None, + flip=False, + flip_direction='horizontal'): + self.transforms = Compose(transforms) + if img_ratios is not None: + img_ratios = img_ratios if isinstance(img_ratios, + list) else [img_ratios] + assert mmcv.is_list_of(img_ratios, float) + if img_scale is None: + # mode 1: given img_scale=None and a range of image ratio + self.img_scale = None + assert mmcv.is_list_of(img_ratios, float) + elif isinstance(img_scale, tuple) and mmcv.is_list_of( + img_ratios, float): + assert len(img_scale) == 2 + # mode 2: given a scale and a range of image ratio + self.img_scale = [(int(img_scale[0] * ratio), + int(img_scale[1] * ratio)) + for ratio in img_ratios] + else: + # mode 3: given multiple scales + self.img_scale = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None + self.flip = flip + self.img_ratios = img_ratios + self.flip_direction = flip_direction if isinstance( + flip_direction, list) else [flip_direction] + assert mmcv.is_list_of(self.flip_direction, str) + if not self.flip and self.flip_direction != ['horizontal']: + warnings.warn( + 'flip_direction has no effect when flip is set to False') + if (self.flip + and not any([t['type'] == 'RandomFlip' for t in transforms])): + warnings.warn( + 'flip has no effect when RandomFlip is not in transforms') + + def __call__(self, results): + """Call function to apply test time augment transforms on results. + + Args: + results (dict): Result dict contains the data to transform. + + Returns: + dict[str: list]: The augmented data, where each value is wrapped + into a list. + """ + + aug_data = [] + if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): + h, w = results['img'].shape[:2] + img_scale = [(int(w * ratio), int(h * ratio)) + for ratio in self.img_ratios] + else: + img_scale = self.img_scale + flip_aug = [False, True] if self.flip else [False] + for scale in img_scale: + for flip in flip_aug: + for direction in self.flip_direction: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + _results['flip_direction'] = direction + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(transforms={self.transforms}, ' + repr_str += f'img_scale={self.img_scale}, flip={self.flip})' + repr_str += f'flip_direction={self.flip_direction}' + return repr_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/transforms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..842763db97685dd9280424204d62ee65993fdd5a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/pipelines/transforms.py @@ -0,0 +1,889 @@ +import annotator.mmpkg.mmcv as mmcv +import numpy as np +from annotator.mmpkg.mmcv.utils import deprecated_api_warning, is_tuple_of +from numpy import random + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Resize(object): + """Resize images & seg. + + This transform resizes the input image to some scale. If the input dict + contains the key "scale", then the scale in the input dict is used, + otherwise the specified scale in the init method is used. + + ``img_scale`` can be None, a tuple (single-scale) or a list of tuple + (multi-scale). There are 4 multiscale modes: + + - ``ratio_range is not None``: + 1. When img_scale is None, img_scale is the shape of image in results + (img_scale = results['img'].shape[:2]) and the image is resized based + on the original size. (mode 1) + 2. When img_scale is a tuple (single-scale), randomly sample a ratio from + the ratio range and multiply it with the image scale. (mode 2) + + - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a + scale from the a range. (mode 3) + + - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a + scale from multiple scales. (mode 4) + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + multiscale_mode (str): Either "range" or "value". + ratio_range (tuple[float]): (min_ratio, max_ratio) + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + + Args: + img_scales (list[tuple]): Images scales for selection. + + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and upper bound of image scales. + + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + + Args: + results (dict): Result dict from :obj:`dataset`. + + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), + self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[key], results['scale'], interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results[key], results['scale'], interpolation='nearest') + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})') + return repr_str + + +@PIPELINES.register_module() +class RandomFlip(object): + """Flip the image & seg. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + prob (float, optional): The flipping probability. Default: None. + direction(str, optional): The flipping direction. Options are + 'horizontal' and 'vertical'. Default: 'horizontal'. + """ + + @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') + def __init__(self, prob=None, direction='horizontal'): + self.prob = prob + self.direction = direction + if prob is not None: + assert prob >= 0 and prob <= 1 + assert direction in ['horizontal', 'vertical'] + + def __call__(self, results): + """Call function to flip bounding boxes, masks, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Flipped results, 'flip', 'flip_direction' keys are added into + result dict. + """ + + if 'flip' not in results: + flip = True if np.random.rand() < self.prob else False + results['flip'] = flip + if 'flip_direction' not in results: + results['flip_direction'] = self.direction + if results['flip']: + # flip image + results['img'] = mmcv.imflip( + results['img'], direction=results['flip_direction']) + + # flip segs + for key in results.get('seg_fields', []): + # use copy() to make numpy stride positive + results[key] = mmcv.imflip( + results[key], direction=results['flip_direction']).copy() + return results + + def __repr__(self): + return self.__class__.__name__ + f'(prob={self.prob})' + + +@PIPELINES.register_module() +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + """ + + def __init__(self, + size=None, + size_divisor=None, + pad_val=0, + seg_pad_val=255): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + """Pad images according to ``self.size``.""" + if self.size is not None: + padded_img = mmcv.impad( + results['img'], shape=self.size, pad_val=self.pad_val) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple( + results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_seg(self, results): + """Pad masks according to ``results['pad_shape']``.""" + for key in results.get('seg_fields', []): + results[key] = mmcv.impad( + results[key], + shape=results['pad_shape'][:2], + pad_val=self.seg_pad_val) + + def __call__(self, results): + """Call function to pad images, masks, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + + self._pad_img(results) + self._pad_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \ + f'pad_val={self.pad_val})' + return repr_str + + +@PIPELINES.register_module() +class Normalize(object): + """Normalize the image. + + Added key is "img_norm_cfg". + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + """Call function to normalize images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Normalized results, 'img_norm_cfg' key is added into + result dict. + """ + + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, + self.to_rgb) + results['img_norm_cfg'] = dict( + mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \ + f'{self.to_rgb})' + return repr_str + + +@PIPELINES.register_module() +class Rerange(object): + """Rerange the image pixel value. + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def __call__(self, results): + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@PIPELINES.register_module() +class CLAHE(object): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def __call__(self, results): + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), + self.clip_limit, self.tile_grid_size) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, '\ + f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@PIPELINES.register_module() +class RandomCrop(object): + """Random crop the image & seg. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + cat_max_ratio (float): The maximum ratio that single category could + occupy. + """ + + def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255): + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + def get_crop_bbox(self, img): + """Randomly get a crop bounding box.""" + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + def crop(self, img, crop_bbox): + """Crop from ``img``""" + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def __call__(self, results): + """Call function to randomly crop images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.get_crop_bbox(img) + if self.cat_max_ratio < 1.: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum( + cnt) < self.cat_max_ratio: + break + crop_bbox = self.get_crop_bbox(img) + + # crop the image + img = self.crop(img, crop_bbox) + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@PIPELINES.register_module() +class RandomRotate(object): + """Rotate the image & seg. + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, + prob, + degree, + pad_val=0, + seg_pad_val=255, + center=None, + auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + def __call__(self, results): + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate = True if np.random.rand() < self.prob else False + degree = np.random.uniform(min(*self.degree), max(*self.degree)) + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], + angle=degree, + border_value=self.pal_val, + center=self.center, + auto_bound=self.auto_bound) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' \ + f'degree={self.degree}, ' \ + f'pad_val={self.pal_val}, ' \ + f'seg_pad_val={self.seg_pad_val}, ' \ + f'center={self.center}, ' \ + f'auto_bound={self.auto_bound})' + return repr_str + + +@PIPELINES.register_module() +class RGB2Gray(object): + """Convert RGB image to grayscale image. + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def __call__(self, results): + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' \ + f'weights={self.weights})' + return repr_str + + +@PIPELINES.register_module() +class AdjustGamma(object): + """Using gamma correction to process the image. + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0)**inv_gamma * 255 + for i in np.arange(256)]).astype('uint8') + + def __call__(self, results): + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform( + np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@PIPELINES.register_module() +class SegRescale(object): + """Rescale semantic segmentation maps. + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale( + results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@PIPELINES.register_module() +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, img, alpha=1, beta=0): + """Multiple with alpha and add beat with clip.""" + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img): + """Brightness distortion.""" + if random.randint(2): + return self.convert( + img, + beta=random.uniform(-self.brightness_delta, + self.brightness_delta)) + return img + + def contrast(self, img): + """Contrast distortion.""" + if random.randint(2): + return self.convert( + img, + alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img): + """Saturation distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], + alpha=random.uniform(self.saturation_lower, + self.saturation_upper)) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img): + """Hue distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, + 0] = (img[:, :, 0].astype(int) + + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def __call__(self, results): + """Call function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})') + return repr_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/stare.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..cbd14e0920e7f6a73baff1432e5a32ccfdb0dfae --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/stare.py @@ -0,0 +1,27 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class STAREDataset(CustomDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(STAREDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.ah.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/voc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..a8855203b14ee0dc4da9099a2945d4aedcffbcd6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/datasets/voc.py @@ -0,0 +1,29 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalVOCDataset(CustomDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', + 'train', 'tvmonitor') + + PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + def __init__(self, split, **kwargs): + super(PascalVOCDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3cf93f8bec9cf0cef0a3bd76ca3ca92eb188f535 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/__init__.py @@ -0,0 +1,12 @@ +from .backbones import * # noqa: F401,F403 +from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, + build_head, build_loss, build_segmentor) +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', + 'build_head', 'build_loss', 'build_segmentor' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a1116c00a17c8bd9ed7f18743baee22b3b7d3f8d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/__init__.py @@ -0,0 +1,16 @@ +from .cgnet import CGNet +# from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .unet import UNet +from .vit import VisionTransformer + +__all__ = [ + 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', + 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', + 'VisionTransformer' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/cgnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..45c235e2e7fcef21e933ecb3ff88a37fa953abe6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/cgnet.py @@ -0,0 +1,367 @@ +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from annotator.mmpkg.mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, + constant_init, kaiming_init) +from annotator.mmpkg.mmcv.runner import load_checkpoint +from annotator.mmpkg.mmcv.utils.parrots_wrapper import _BatchNorm + +from annotator.mmpkg.mmseg.utils import get_root_logger +from ..builder import BACKBONES + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super(GlobalContextExtractor, self).__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False): + super(ContextGuidedBlock, self).__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, + channels, + kernel_size, + stride, + padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.f_loc = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=1, + groups=channels, + bias=False) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer( + conv_cfg, + 2 * channels, + out_channels, + kernel_size=1, + bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super(InputInjection, self).__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@BACKBONES.register_module() +class CGNet(nn.Module): + """CGNet backbone. + + A Light-weight Context Guided Network for Semantic Segmentation + arXiv: https://arxiv.org/abs/1811.08201 + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False): + + super(CGNet, self).__init__() + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len( + self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + elif isinstance(m, nn.PReLU): + constant_init(m, 0) + else: + raise TypeError('pretrained must be a str or None') + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(CGNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/fast_scnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..417114417ebc830ea11ae7216aa12d8f7a79e5cb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,375 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, constant_init, + kaiming_init) +from torch.nn.modules.batchnorm import _BatchNorm + +from annotator.mmpkg.mmseg.models.decode_heads.psp_head import PPM +from annotator.mmpkg.mmseg.ops import resize +from ..builder import BACKBONES +from ..utils.inverted_residual import InvertedResidual + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + """ + + def __init__(self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU')): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], strides[0], + expand_ratio) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + strides[1], expand_ratio) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + strides[2], expand_ratio) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, + in_channels, + out_channels, + blocks, + stride=1, + expand_ratio=6): + layers = [ + InvertedResidual( + in_channels, + out_channels, + stride, + expand_ratio, + norm_cfg=self.norm_cfg) + ] + for i in range(1, blocks): + layers.append( + InvertedResidual( + out_channels, + out_channels, + 1, + expand_ratio, + norm_cfg=self.norm_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(FeatureFusionModule, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + size=higher_res_feature.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(nn.Module): + """Fast-SCNN Backbone. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + + super(FastSCNN, self).__init__() + if global_in_channels != higher_in_channels: + raise AssertionError('Global Input Channels must be the same \ + with Higher Input Channels!') + elif global_out_channels != lower_in_channels: + raise AssertionError('Global Output Channels must be the same \ + with Lower Input Channels!') + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def init_weights(self, pretrained=None): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/hrnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8d77fd6eadeec25a6b84619f0d7efa7c577b0464 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/hrnet.py @@ -0,0 +1,555 @@ +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init, + kaiming_init) +from annotator.mmpkg.mmcv.runner import load_checkpoint +from annotator.mmpkg.mmcv.utils.parrots_wrapper import _BatchNorm + +from annotator.mmpkg.mmseg.ops import Upsample, resize +from annotator.mmpkg.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from .resnet import BasicBlock, Bottleneck + + +class HRModule(nn.Module): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__(self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True)): + super(HRModule, self).__init__() + self._check_branches(num_branches, num_blocks, in_channels, + num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, + num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, + num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ + f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ + f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ + f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, + branch_index, + block, + num_blocks, + num_channels, + stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or \ + self.in_channels[branch_index] != \ + num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * + block.expansion)[1]) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + self.in_channels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample( + scale_factor=2**(j - i), + mode='bilinear', + align_corners=False))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[i])[1])) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[j])[1], + nn.ReLU(inplace=False))) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), + size=x[i].shape[2:], + mode='bilinear', + align_corners=False) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@BACKBONES.register_module() +class HRNet(nn.Module): + """HRNet backbone. + + High-Resolution Representations for Labeling Pixels and Regions + arXiv: https://arxiv.org/abs/1904.04514 + + Args: + extra (dict): detailed configuration for each stage of HRNet. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): dictionary to construct and config conv layer. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from annotator.mmpkg.mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__(self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + zero_init_residual=False): + super(HRNet, self).__init__() + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.zero_init_residual = zero_init_residual + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + 64, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], + num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, + num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + inplanes, + planes, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg)) + + return nn.Sequential(*hr_modules), in_channels + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(HRNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v2.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..7b5b6cd6d04c9da04669550d7f1fd24381460bf3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,180 @@ +import logging + +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule, constant_init, kaiming_init +from annotator.mmpkg.mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidual, make_divisible + + +@BACKBONES.register_module() +class MobileNetV2(nn.Module): + """MobileNetV2 backbone. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], + [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__(self, + widen_factor=1., + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False): + super(MobileNetV2, self).__init__() + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' + f'range(0, 8). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' + f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, + expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp)) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV2, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v3.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..e3c22bdd22356a600454f14c2ed12e7ef72c8ca1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,255 @@ +import logging + +import annotator.mmpkg.mmcv as mmcv +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule, constant_init, kaiming_init +from annotator.mmpkg.mmcv.cnn.bricks import Conv2dAdaptivePadding +from annotator.mmpkg.mmcv.runner import load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidualV3 as InvertedResidual + + +@BACKBONES.register_module() +class MobileNetV3(nn.Module): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + """ + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1]], + 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1]] + } # yapf: disable + + def __init__(self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False): + super(MobileNetV3, self).__init__() + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert mmcv.is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}') + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError('frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}') + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, + stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ + i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp) + in_channels = out_channels + layer_name = 'layer{}'.format(i + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + layer_name = 'layer{}'.format(len(layer_setting) + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV3, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnest.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnest.py new file mode 100644 index 0000000000000000000000000000000000000000..076ef62195bac2a9660261446b5756c3880dfdf2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnest.py @@ -0,0 +1,314 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from annotator.mmpkg.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None): + super(SplitAttentionConv2d, self).__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False) + self.norm0_name, norm0 = build_norm_layer( + norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer( + None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer( + None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + """Bottleneck block for ResNeSt.""" + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)) + } + + def __init__(self, + groups=1, + base_width=4, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super(ResNeSt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b3304dc5238110adcf21fa4c0a4e230158894fea --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnet.py @@ -0,0 +1,688 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from annotator.mmpkg.mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer, + constant_init, kaiming_init) +from annotator.mmpkg.mmcv.runner import load_checkpoint +from annotator.mmpkg.mmcv.utils.parrots_wrapper import _BatchNorm + +from annotator.mmpkg.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import ResLayer + + +class BasicBlock(nn.Module): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(BasicBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None): + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default" 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + norm_cfg (dict): Dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages' + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from annotator.mmpkg.mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.zero_init_residual = zero_init_residual + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len( + self.stage_blocks) - 1 else None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + + if self.dcn is not None: + for m in self.modules(): + if isinstance(m, Bottleneck) and hasattr( + m, 'conv2_offset'): + constant_init(m.conv2_offset, 0) + + if self.zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + constant_init(m.norm3, 0) + elif isinstance(m, BasicBlock): + constant_init(m.norm2, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@BACKBONES.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv + in the input stem with three 3x3 convs. + + References: + .. [1] https://arxiv.org/pdf/1812.01187.pdf + """ + + def __init__(self, **kwargs): + super(ResNetV1c, self).__init__( + deep_stem=True, avg_down=False, **kwargs) + + +@BACKBONES.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1d, self).__init__( + deep_stem=True, avg_down=True, **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnext.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..be0194da1714e8431309a9dd8a42afebdbc1baf5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/resnext.py @@ -0,0 +1,145 @@ +import math + +from annotator.mmpkg.mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from annotator.mmpkg.mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/unet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..3d19902ba273af02f8c9ce60f6632634633c1101 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/unet.py @@ -0,0 +1,429 @@ +import torch.nn as nn +import torch.utils.checkpoint as cp +from annotator.mmpkg.mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer, + build_norm_layer, constant_init, kaiming_init) +from annotator.mmpkg.mmcv.runner import load_checkpoint +from annotator.mmpkg.mmcv.utils.parrots_wrapper import _BatchNorm + +from annotator.mmpkg.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import UpConvBlock + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None): + super(BasicConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2): + super(DeconvModule, self).__init__() + + assert (kernel_size - scale_factor >= 0) and\ + (kernel_size - scale_factor) % 2 == 0,\ + f'kernel_size should be greater than or equal to scale_factor '\ + f'and (kernel_size - scale_factor) should be even numbers, '\ + f'while the kernel size is {kernel_size} and scale_factor is '\ + f'{scale_factor}.' + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)): + super(InterpConv, self).__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + upsample = nn.Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@BACKBONES.register_module() +class UNet(nn.Module): + """UNet backbone. + U-Net: Convolutional Networks for Biomedical Image Segmentation. + https://arxiv.org/pdf/1505.04597.pdf + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + + """ + + def __init__(self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None): + super(UNet, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, \ + 'The length of strides should be equal to num_stages, '\ + f'while the strides is {strides}, the length of '\ + f'strides is {len(strides)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_num_convs) == num_stages, \ + 'The length of enc_num_convs should be equal to num_stages, '\ + f'while the enc_num_convs is {enc_num_convs}, the length of '\ + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_num_convs) == (num_stages-1), \ + 'The length of dec_num_convs should be equal to (num_stages-1), '\ + f'while the dec_num_convs is {dec_num_convs}, the length of '\ + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(downsamples) == (num_stages-1), \ + 'The length of downsamples should be equal to (num_stages-1), '\ + f'while the downsamples is {downsamples}, the length of '\ + f'downsamples is {len(downsamples)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_dilations) == num_stages, \ + 'The length of enc_dilations should be equal to num_stages, '\ + f'while the enc_dilations is {enc_dilations}, the length of '\ + f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_dilations) == (num_stages-1), \ + 'The length of dec_dilations should be equal to (num_stages-1), '\ + f'while the dec_dilations is {dec_dilations}, the length of '\ + f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + f'{num_stages}.' + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = (strides[i] != 1 or downsamples[i - 1]) + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2**i, + skip_channels=base_channels * 2**(i - 1), + out_channels=base_channels * 2**(i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None)) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2**i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None)) + self.encoder.append((nn.Sequential(*enc_conv_block))) + in_channels = base_channels * 2**i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(UNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) \ + and (w % whole_downsample_rate == 0),\ + f'The input image size {(h, w)} should be divisible by the whole '\ + f'downsample rate {whole_downsample_rate}, when num_stages is '\ + f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + f'is {self.downsamples}.' + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, (_BatchNorm, nn.GroupNorm)): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/vit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..ab1a393741b21c8185f4204946b751b1913ef98c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/backbones/vit.py @@ -0,0 +1,459 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/vision_transformer.py.""" + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from annotator.mmpkg.mmcv.cnn import (Conv2d, Linear, build_activation_layer, build_norm_layer, + constant_init, kaiming_init, normal_init) +from annotator.mmpkg.mmcv.runner import _load_checkpoint +from annotator.mmpkg.mmcv.utils.parrots_wrapper import _BatchNorm + +from annotator.mmpkg.mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import DropPath, trunc_normal_ + + +class Mlp(nn.Module): + """MLP layer for Encoder block. + + Args: + in_features(int): Input dimension for the first fully + connected layer. + hidden_features(int): Output dimension for the first fully + connected layer. + out_features(int): Output dementsion for the second fully + connected layer. + act_cfg(dict): Config dict for activation layer. + Default: dict(type='GELU'). + drop(float): Drop rate for the dropout layer. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_cfg=dict(type='GELU'), + drop=0.): + super(Mlp, self).__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = Linear(in_features, hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + """Attention layer for Encoder block. + + Args: + dim (int): Dimension for the input vector. + num_heads (int): Number of parallel attention heads. + qkv_bias (bool): Enable bias for qkv if True. Default: False. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for output weights. Default: 0. + """ + + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + super(Attention, self).__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + b, n, c = x.shape + qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, + c // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Module): + """Implements encoder block with residual connection. + + Args: + dim (int): The feature dimension. + num_heads (int): Number of parallel attention heads. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop (float): Drop rate for mlp output weights. Default: 0. + attn_drop (float): Drop rate for attention output weights. + Default: 0. + proj_drop (float): Drop rate for attn layer output weights. + Default: 0. + drop_path (float): Drop rate for paths of model. + Default: 0. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', requires_grad=True). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + dim, + num_heads, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + proj_drop=0., + drop_path=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + with_cp=False): + super(Block, self).__init__() + self.with_cp = with_cp + _, self.norm1 = build_norm_layer(norm_cfg, dim) + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, + proj_drop) + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + _, self.norm2 = build_norm_layer(norm_cfg, dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_cfg=act_cfg, + drop=drop) + + def forward(self, x): + + def _inner_forward(x): + out = x + self.drop_path(self.attn(self.norm1(x))) + out = out + self.drop_path(self.mlp(self.norm2(out))) + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding. + + Args: + img_size (int | tuple): Input image size. + default: 224. + patch_size (int): Width and height for a patch. + default: 16. + in_channels (int): Input channels for images. Default: 3. + embed_dim (int): The embedding dimension. Default: 768. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dim=768): + super(PatchEmbed, self).__init__() + if isinstance(img_size, int): + self.img_size = (img_size, img_size) + elif isinstance(img_size, tuple): + self.img_size = img_size + else: + raise TypeError('img_size must be type of int or tuple') + h, w = self.img_size + self.patch_size = (patch_size, patch_size) + self.num_patches = (h // patch_size) * (w // patch_size) + self.proj = Conv2d( + in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + return self.proj(x).flatten(2).transpose(1, 2) + + +@BACKBONES.register_module() +class VisionTransformer(nn.Module): + """Vision transformer backbone. + + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for + Image Recognition at Scale` - https://arxiv.org/abs/2010.11929 + + Args: + img_size (tuple): input image size. Default: (224, 224). + patch_size (int, tuple): patch size. Default: 16. + in_channels (int): number of input channels. Default: 3. + embed_dim (int): embedding dimension. Default: 768. + depth (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + qk_scale (float): override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): dropout rate. Default: 0. + attn_drop_rate (float): attention dropout rate. Default: 0. + drop_path_rate (float): Rate of DropPath. Default: 0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN', eps=1e-6, requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='GELU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + with_cls_token (bool): If concatenating class token into image tokens + as transformer input. Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + """ + + def __init__(self, + img_size=(224, 224), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + out_indices=11, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), + act_cfg=dict(type='GELU'), + norm_eval=False, + final_norm=False, + with_cls_token=True, + interpolate_mode='bicubic', + with_cp=False): + super(VisionTransformer, self).__init__() + self.img_size = img_size + self.patch_size = patch_size + self.features = self.embed_dim = embed_dim + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=embed_dim) + + self.with_cls_token = with_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) + self.pos_embed = nn.Parameter( + torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) + self.pos_drop = nn.Dropout(p=drop_rate) + + if isinstance(out_indices, int): + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) + ] # stochastic depth decay rule + self.blocks = nn.ModuleList([ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=dpr[i], + attn_drop=attn_drop_rate, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp) for i in range(depth) + ]) + + self.interpolate_mode = interpolate_mode + self.final_norm = final_norm + if final_norm: + _, self.norm = build_norm_layer(norm_cfg, embed_dim) + + self.norm_eval = norm_eval + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = get_root_logger() + checkpoint = _load_checkpoint(pretrained, logger=logger) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + logger.info(msg=f'Resize the pos_embed shape from \ +{state_dict["pos_embed"].shape} to {self.pos_embed.shape}') + h, w = self.img_size + pos_size = int( + math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], (h, w), (pos_size, pos_size), + self.patch_size, self.interpolate_mode) + + self.load_state_dict(state_dict, False) + + elif pretrained is None: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=.02) + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'mlp' in n: + normal_init(m.bias, std=1e-6) + else: + constant_init(m.bias, 0) + elif isinstance(m, Conv2d): + kaiming_init(m.weight, mode='fan_in') + if m.bias is not None: + constant_init(m.bias, 0) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m.bias, 0) + constant_init(m.weight, 1.0) + else: + raise TypeError('pretrained must be a str or None') + + def _pos_embeding(self, img, patched_img, pos_embed): + """Positiong embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + img (torch.Tensor): The inference image tensor, the shape + must be [B, C, H, W]. + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * ( + self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError( + 'Unexpected shape of pos_embed, got {}.'.format( + pos_embed.shape)) + pos_embed = self.resize_pos_embed(pos_embed, img.shape[2:], + (pos_h, pos_w), self.patch_size, + self.interpolate_mode) + return self.pos_drop(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, patch_size, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): pos_embed weights. + input_shpae (tuple): Tuple for (input_h, intput_w). + pos_shape (tuple): Tuple for (pos_h, pos_w). + patch_size (int): Patch size. + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + input_h, input_w = input_shpae + pos_h, pos_w = pos_shape + cls_token_weight = pos_embed[:, 0] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] + pos_embed_weight = pos_embed_weight.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = F.interpolate( + pos_embed_weight, + size=[input_h // patch_size, input_w // patch_size], + align_corners=False, + mode=mode) + cls_token_weight = cls_token_weight.unsqueeze(1) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x = self.patch_embed(inputs) + + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(inputs, x, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer input + x = x[:, 1:] + + outs = [] + for i, blk in enumerate(self.blocks): + x = blk(x) + if i == len(self.blocks) - 1: + if self.final_norm: + x = self.norm(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, inputs.shape[2] // self.patch_size, + inputs.shape[3] // self.patch_size, + C).permute(0, 3, 1, 2) + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(VisionTransformer, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..fd29ff66d523b854c739b580137db6f4155fc550 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/builder.py @@ -0,0 +1,46 @@ +import warnings + +from annotator.mmpkg.mmcv.cnn import MODELS as MMCV_MODELS +from annotator.mmpkg.mmcv.utils import Registry + +MODELS = Registry('models', parent=MMCV_MODELS) + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn( + 'train_cfg and test_cfg is deprecated, ' + 'please specify them in model', UserWarning) + assert cfg.get('train_cfg') is None or train_cfg is None, \ + 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, \ + 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build( + cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac66d3cfe0ea04af45c0f3594bf135841c3812e3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,28 @@ +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .lraspp_head import LRASPPHead +from .nl_head import NLHead +from .ocr_head import OCRHead +# from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .uper_head import UPerHead + +__all__ = [ + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'APCHead', 'DMHead', 'LRASPPHead' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ann_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 0000000000000000000000000000000000000000..958c88e0ca4b9acdaf146b836462b9a101b2cdad --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,245 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super(PPMConcat, self).__init__( + [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, share_key_query, query_scale, key_pool_scales, + conv_cfg, norm_cfg, act_cfg): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super(SelfAttentionBlock, self).__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, query_scales, key_pool_scales, conv_cfg, + norm_cfg, act_cfg): + super(AFNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + out_channels + high_in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, out_channels, query_scales, + key_pool_scales, conv_cfg, norm_cfg, act_cfg): + super(APNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + 2 * in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@HEADS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, + project_channels, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + **kwargs): + super(ANNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/apc_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..4f363dba391c3eb6fb5a4d61c145fd4976a5717d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(ACM, self).__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.global_info = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, + -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = self.gla(x + resize( + self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) + ).permute(0, 2, 3, 1).reshape( + batch_size, -1, self.pool_scale**2) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@HEADS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super(APCHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM(pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/aspp_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..3c0aadb2b097a604d96ba1c99c05663b7884b6e0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,107 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, + act_cfg): + super(ASPPModule, self).__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@HEADS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super(ASPPHead, self).__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cascade_decode_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..d02122ca0e68743b1bf7a893afae96042f23838c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,57 @@ +from abc import ABCMeta, abstractmethod + +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.losses(seg_logits, gt_semantic_seg) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs, prev_output) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cc_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1f4f5b052445a4071952aa04274274da7d897c2c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,45 @@ +import torch + +from ..builder import HEADS +from .fcn_head import FCNHead + +try: + try: + from mmcv.ops import CrissCrossAttention + except ImportError: + from annotator.mmpkg.mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@HEADS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' + 'CrissCrossAttention ops') + super(CCHead, self).__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/da_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/da_head.py new file mode 100644 index 0000000000000000000000000000000000000000..b0b7616501c04cc0faf92accac9d3fdb6807f9e1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,178 @@ +import torch +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule, Scale +from torch import nn + +from annotator.mmpkg.mmseg.core import add_prefix +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super(DAHead, self).losses(pam_cam_seg_logit, seg_label), + 'pam_cam')) + loss.update( + add_prefix( + super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update( + add_prefix( + super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/decode_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a74c89f2ef1274ffe947995722576ab2c78eaec1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,234 @@ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import normal_init +from annotator.mmpkg.mmcv.runner import auto_fp16, force_fp32 + +from annotator.mmpkg.mmseg.core import build_pixel_sampler +from annotator.mmpkg.mmseg.ops import resize +from ..builder import build_loss +from ..losses import accuracy + + +class BaseDecodeHead(nn.Module, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255 + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__(self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False): + super(BaseDecodeHead, self).__init__() + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.num_classes = num_classes + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + self.loss_decode = build_loss(loss_decode) + self.ignore_index = ignore_index + self.align_corners = align_corners + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + self.fp16_enabled = False + + def extra_repr(self): + """Extra repr.""" + s = f'input_transform={self.input_transform}, ' \ + f'ignore_index={self.ignore_index}, ' \ + f'align_corners={self.align_corners}' + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.conv_seg, mean=0, std=0.01) + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize( + input=x, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @auto_fp16() + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.losses(seg_logits, gt_semantic_seg) + return losses + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs) + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + @force_fp32(apply_to=('seg_logit', )) + def losses(self, seg_logit, seg_label): + """Compute segmentation loss.""" + loss = dict() + seg_logit = resize( + input=seg_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logit, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + loss['loss_seg'] = self.loss_decode( + seg_logit, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + loss['acc_seg'] = accuracy(seg_logit, seg_label) + return loss diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dm_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 0000000000000000000000000000000000000000..de6d0f6390d96c1eef4242cdc9aed91ec7714c6a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(DCM, self).__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, + 0) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv( + F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, + self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@HEADS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super(DMHead, self).__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM(filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dnl_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..b3bb1de1499ad043cc51b2269b4d970d07c16076 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,131 @@ +import torch +from annotator.mmpkg.mmcv.cnn import NonLocal2d +from torch import nn + +from ..builder import HEADS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight /= self.temperature + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( + n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@HEADS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + temperature=0.05, + **kwargs): + super(DNLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ema_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 0000000000000000000000000000000000000000..aaebae7b25579cabcd3967da765568a282869a49 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,168 @@ +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super(EMAModule, self).__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2. / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - + self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@HEADS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, + ema_channels, + num_bases, + num_stages, + concat_input=True, + momentum=0.1, + **kwargs): + super(EMAHead, self).__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, + self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=None, + act_cfg=None) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/enc_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..4c2a22a90b26f3264f63234694f0f290a7891ea2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,187 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule, build_norm_layer + +from annotator.mmpkg.mmseg.ops import Encoding, resize +from ..builder import HEADS, build_loss +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super(EncModule, self).__init__() + self.encoding_project = ConvModule( + in_channels, + in_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( + '2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True)) + self.fc = nn.Sequential( + nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@HEADS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__(self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=0.2), + **kwargs): + super(EncHead, self).__init__( + input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.enc_module = EncModule( + self.channels, + num_codes=num_codes, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize( + lateral_conv(inputs[i]), + size=feat.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + return self.forward(inputs)[0] + else: + return self.forward(inputs) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc( + bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def losses(self, seg_logit, seg_label): + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super(EncHead, self).losses(seg_logit, seg_label)) + se_loss = self.loss_se_decode( + se_seg_logit, + self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fcn_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c4583c57246e8e3b1d15d240b943d046afa5cba5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, + num_convs=2, + kernel_size=3, + concat_input=True, + dilation=1, + **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super(FCNHead, self).__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs(x) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fpn_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9ba39eebc406bfa422dc98eeaa32a800008a83 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,68 @@ +import numpy as np +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super(FPNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max( + 1, + int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if feature_strides[i] != feature_strides[0]: + scale_head.append( + nn.Upsample( + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), + size=output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/gc_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 0000000000000000000000000000000000000000..6342811f67e4affac7886c8fc745a28abcc32c55 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,47 @@ +import torch +from annotator.mmpkg.mmcv.cnn import ContextBlock + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + **kwargs): + super(GCHead, self).__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, + ratio=self.ratio, + pooling_type=self.pooling_type, + fusion_types=self.fusion_types) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/lraspp_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..b29d80e77d05cc0c12118e335e266a73bda99ed0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,90 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv import is_tuple_of +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super(LRASPPHead, self).__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'') + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module( + f'conv{i}', + nn.Conv2d( + self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False)) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule( + self.in_channels[2], + self.channels, + 1, + act_cfg=dict(type='Sigmoid'), + bias=False)) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize( + x, + size=inputs[i].size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/nl_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 0000000000000000000000000000000000000000..5990df1b8b0d57cfa772ec1b6b6be20a8f667ce7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,49 @@ +import torch +from annotator.mmpkg.mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + **kwargs): + super(NLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ocr_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c46d10e5baff54e182af0426a1ecfea9ca190a9f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,127 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super(SpatialGatherModule, self).__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, + act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super(ObjectAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.bottleneck = ConvModule( + in_channels * 2, + in_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super(ObjectAttentionBlock, + self).forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@HEADS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super(OCRHead, self).__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/point_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/point_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c6782763e30386d99115977ebe5a4d9291bae8d9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,354 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn + +try: + from mmcv.cnn import ConvModule, normal_init + from mmcv.ops import point_sample +except ImportError: + from annotator.mmpkg.mmcv.cnn import ConvModule, normal_init + from annotator.mmpkg.mmcv.ops import point_sample + +from annotator.mmpkg.mmseg.models.builder import HEADS +from annotator.mmpkg.mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super(PointHead, self).__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def init_weights(self): + """Initialize weights of classification layer.""" + normal_init(self.fc_seg, std=0.001) + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + loss['loss_point'] = self.loss_decode( + point_logits, point_label, ignore_index=self.ignore_index) + loss['acc_point'] = accuracy(point_logits, point_label) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psa_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ba6fe3a8b8f8dc7c4e4d3b9bc09e9642c0b3732f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,199 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + +try: + try: + from mmcv.ops import PSAMask + except ImportError: + from annotator.mmpkg.mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@HEADS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__(self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super(PSAHead, self).__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize( + out, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm( + out.view(n, c, h * w), y.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize( + x_col, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + x_dis = resize( + x_dis, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm( + x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + x_dis = torch.bmm( + x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize( + out, + size=identity.shape[2:], + mode='bilinear', + align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psp_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..2a88d807bfe11fe224305f8de745cde3aa739db0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, + act_cfg, align_corners): + super(PPM, self).__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg))) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize( + ppm_out, + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_aspp_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a23970699df7afd86f483316be3c8d1a34d43c18 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + +@HEADS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, + c1_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + DepthwiseSeparableConvModule( + self.channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize( + input=output, + size=c1_output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_fcn_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..3ea198ab8a96919dfb6974fd73b1476aa488aef2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,51 @@ +from annotator.mmpkg.mmcv.cnn import DepthwiseSeparableConvModule + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to Fast-SCNN paper. + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + """ + + def __init__(self, **kwargs): + super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/uper_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 0000000000000000000000000000000000000000..952473578c1f5b903f5fc7f9d13a4e40ea5dec87 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from annotator.mmpkg.mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@HEADS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(UPerHead, self).__init__( + input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def forward(self, inputs): + """Forward function.""" + + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += resize( + laterals[i], + size=prev_shape, + mode='bilinear', + align_corners=self.align_corners) + + # build outputs + fpn_outs = [ + self.fpn_convs[i](laterals[i]) + for i in range(used_backbone_levels - 1) + ] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], + size=fpn_outs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fpn_outs = torch.cat(fpn_outs, dim=1) + output = self.fpn_bottleneck(fpn_outs) + output = self.cls_seg(output) + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..beca72045694273d63465bac2f27dbc6672271db --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/__init__.py @@ -0,0 +1,12 @@ +from .accuracy import Accuracy, accuracy +from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, + cross_entropy, mask_cross_entropy) +from .dice_loss import DiceLoss +from .lovasz_loss import LovaszLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', + 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', + 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/accuracy.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..c0fd2e7e74a0f721c4a814c09d6e453e5956bb38 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/accuracy.py @@ -0,0 +1,78 @@ +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk, ) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), \ + f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / target.numel())) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1, ), thresh=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/cross_entropy_loss.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..42c0790c98616bb69621deed55547fc04c7392ef --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,198 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, + label, + weight=None, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=-100): + """The wrapper function for :func:`F.cross_entropy`""" + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy( + pred, + label, + weight=class_weight, + reduction='none', + ignore_index=ignore_index) + + # apply weights and do the reduction + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss( + loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights *= valid_mask + + return bin_labels, bin_label_weights + + +def binary_cross_entropy(pred, + label, + weight=None, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=255): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int | None): The label index to be ignored. Default: 255 + + Returns: + torch.Tensor: The calculated loss + """ + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or ( + pred.dim() == 4 and label.dim() == 3), \ + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + 'H, W], label shape [N, H, W] are supported' + label, weight = _expand_onehot_labels(label, weight, pred.shape, + ignore_index) + + # weighted element-wise losses + if weight is not None: + weight = weight.float() + loss = F.binary_cross_entropy_with_logits( + pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss( + loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, + target, + label, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=None): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + use_sigmoid=False, + use_mask=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(CrossEntropyLoss, self).__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + weight, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/dice_loss.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/dice_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..27a77b962d7d8b3079c7d6cd9db52280c6fb4970 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/dice_loss.py @@ -0,0 +1,119 @@ +"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ +segmentron/solver/loss.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def dice_loss(pred, + target, + valid_mask, + smooth=1, + exponent=2, + class_weight=None, + ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + dice_loss = binary_dice_loss( + pred[:, i], + target[..., i], + valid_mask=valid_mask, + smooth=smooth, + exponent=exponent) + if class_weight is not None: + dice_loss *= class_weight[i] + total_loss += dice_loss + return total_loss / num_classes + + +@weighted_loss +def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwards): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth + den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth + + return 1 - num / den + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + """DiceLoss. + + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1 + exponent (float): An float number to calculate denominator + value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + """ + + def __init__(self, + smooth=1, + exponent=2, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ignore_index=255, + **kwards): + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.reduction = reduction + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, + pred, + target, + avg_factor=None, + reduction_override=None, + **kwards): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), + num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor, + smooth=self.smooth, + exponent=self.exponent, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/lovasz_loss.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..50f0f70fd432316b081a0114c28df61d320b5a47 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,303 @@ +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import annotator.mmpkg.mmcv as mmcv +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = (labels != ignore_index) + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = (labels != ignore_index) + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge(logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits( + logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat( + *flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0. + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if (classes == 'present' and fg.sum() == 0): + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax(probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs( + prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), + classes=classes, + class_weight=class_weight) + return loss + + +@LOSSES.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + """ + + def __init__(self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0): + super(LovaszLoss, self).__init__() + assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) + if not per_image: + assert reduction == 'none', "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2afb477a153ba9dead71066fa66ee024482afd82 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/losses/utils.py @@ -0,0 +1,121 @@ +import functools + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +import torch.nn.functional as F + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = mmcv.load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Avarage factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + loss = loss.sum() / avg_factor + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9d3d5b3fe80247642d962edd6fb787537d01d6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/__init__.py @@ -0,0 +1,4 @@ +from .fpn import FPN +from .multilevel_neck import MultiLevelNeck + +__all__ = ['FPN', 'MultiLevelNeck'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/fpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..ba47bbe1a0225587315627ac288e5ddf6497a244 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/fpn.py @@ -0,0 +1,212 @@ +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule, xavier_init + +from ..builder import NECKS + + +@NECKS.register_module() +class FPN(nn.Module): + """Feature Pyramid Network. + + This is an implementation of - Feature Pyramid Networks for Object + Detection (https://arxiv.org/abs/1612.03144) + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale) + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (str): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: `dict(mode='nearest')` + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest')): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] += F.interpolate(laterals[i], + **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] += F.interpolate( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/multilevel_neck.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..0b86c073cd1a72354d2426846125e80f7ab20dbc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,70 @@ +import torch.nn as nn +import torch.nn.functional as F +from annotator.mmpkg.mmcv.cnn import ConvModule + +from ..builder import NECKS + + +@NECKS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[int]): Scale factors for each input feature map. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scales=[0.5, 1, 2, 4], + norm_cfg=None, + act_cfg=None): + super(MultiLevelNeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule( + in_channel, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, + out_channels, + kernel_size=3, + padding=1, + stride=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + print(inputs[0].shape) + inputs = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = F.interpolate( + inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dca2f09405330743c476e190896bee39c45498ea --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/__init__.py @@ -0,0 +1,5 @@ +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .encoder_decoder import EncoderDecoder + +__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/base.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a12d8beb8ea40bfa234197eddb4d3ef40dbfeb6f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/base.py @@ -0,0 +1,273 @@ +import logging +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import annotator.mmpkg.mmcv as mmcv +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +from annotator.mmpkg.mmcv.runner import auto_fp16 + + +class BaseSegmentor(nn.Module): + """Base class for segmentors.""" + + __metaclass__ = ABCMeta + + def __init__(self): + super(BaseSegmentor, self).__init__() + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self): + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, + 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self): + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, imgs): + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, img, img_metas): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + """Placeholder for Forward function for training.""" + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + """Placeholder for single image test.""" + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + """Placeholder for augmentation test.""" + pass + + def init_weights(self, pretrained=None): + """Initialize the weights in segmentor. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + if pretrained is not None: + logger = logging.getLogger() + logger.info(f'load model from: {pretrained}') + + def forward_test(self, imgs, img_metas, **kwargs): + """ + Args: + imgs (List[Tensor]): the outer list indicates test-time + augmentations and inner Tensor should have a shape NxCxHxW, + which contains all images in the batch. + img_metas (List[List[dict]]): the outer list indicates test-time + augs (multiscale, flip, etc.) and the inner list indicates + images in a batch. + """ + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError(f'{name} must be a list, but got ' + f'{type(var)}') + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError(f'num of augmentations ({len(imgs)}) != ' + f'num of image meta ({len(img_metas)})') + # all images in the same aug batch all of the same ori_shape and pad + # shape + for img_meta in img_metas: + ori_shapes = [_['ori_shape'] for _ in img_meta] + assert all(shape == ori_shapes[0] for shape in ori_shapes) + img_shapes = [_['img_shape'] for _ in img_meta] + assert all(shape == img_shapes[0] for shape in img_shapes) + pad_shapes = [_['pad_shape'] for _ in img_meta] + assert all(shape == pad_shapes[0] for shape in pad_shapes) + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + @auto_fp16(apply_to=('img', )) + def forward(self, img, img_metas, return_loss=True, **kwargs): + """Calls either :func:`forward_train` or :func:`forward_test` depending + on whether ``return_loss`` is ``True``. + + Note this setting will change the expected inputs. When + ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor + and List[dict]), and when ``resturn_loss=False``, img and img_meta + should be double nested (i.e. List[Tensor], List[List[dict]]), with + the outer list indicating test time augmentations. + """ + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: + return self.forward_test(img, img_metas, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + outputs = dict( + loss=loss, + log_vars=log_vars, + num_samples=len(data_batch['img_metas'])) + + return outputs + + def val_step(self, data_batch, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + output = self(**data_batch, **kwargs) + return output + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def show_result(self, + img, + result, + palette=None, + win_name='', + show=False, + wait_time=0, + out_file=None, + opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The semantic segmentation results to draw over + `img`. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + if palette is None: + if self.PALETTE is None: + palette = np.random.randint( + 0, 255, size=(len(self.CLASSES), 3)) + else: + palette = self.PALETTE + palette = np.array(palette) + assert palette.shape[0] == len(self.CLASSES) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' + 'result image will be returned') + return img diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/cascade_encoder_decoder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..74547f0fb01da9fe32c1d142768eb788b7e8673c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,98 @@ +from torch import nn + +from annotator.mmpkg.mmseg.core import add_prefix +from annotator.mmpkg.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .encoder_decoder import EncoderDecoder + + +@SEGMENTORS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + """ + + def __init__(self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + self.num_stages = num_stages + super(CascadeEncoderDecoder, self).__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(builder.build_head(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + self.backbone.init_weights(pretrained=pretrained) + for i in range(self.num_stages): + self.decode_head[i].init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + for i in range(1, self.num_stages): + out = self.decode_head[i].forward_test(x, out, img_metas, + self.test_cfg) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + prev_outputs = self.decode_head[i - 1].forward_test( + x, img_metas, self.test_cfg) + loss_decode = self.decode_head[i].forward_train( + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/encoder_decoder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..30c25f35a15e65e45f9221a3f19ace8579f73301 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,298 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.mmpkg.mmseg.core import add_prefix +from annotator.mmpkg.mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .base import BaseSegmentor + + +@SEGMENTORS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + """ + + def __init__(self, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(EncoderDecoder, self).__init__() + self.backbone = builder.build_backbone(backbone) + if neck is not None: + self.neck = builder.build_neck(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + self.init_weights(pretrained=pretrained) + + assert self.with_decode_head + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + self.decode_head = builder.build_head(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + + def _init_auxiliary_head(self, auxiliary_head): + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(builder.build_head(head_cfg)) + else: + self.auxiliary_head = builder.build_head(auxiliary_head) + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone and heads. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + + super(EncoderDecoder, self).init_weights(pretrained) + self.backbone.init_weights(pretrained=pretrained) + self.decode_head.init_weights() + if self.with_auxiliary_head: + if isinstance(self.auxiliary_head, nn.ModuleList): + for aux_head in self.auxiliary_head: + aux_head.init_weights() + else: + self.auxiliary_head.init_weights() + + def extract_feat(self, img): + """Extract features from images.""" + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self._decode_head_forward_test(x, img_metas) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _decode_head_forward_test(self, x, img_metas): + """Run forward function and calculate loss for decode head in + inference.""" + seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + return seg_logits + + def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def forward_dummy(self, img): + """Dummy forward function.""" + seg_logit = self.encode_decode(img, None) + + return seg_logit + + def forward_train(self, img, img_metas, gt_semantic_seg): + """Forward function for training. + + Args: + img (Tensor): Input images. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(img) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, img_metas, + gt_semantic_seg) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train( + x, img_metas, gt_semantic_seg) + losses.update(loss_aux) + + return losses + + # TODO refactor + def slide_inference(self, img, img_meta, rescale): + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = img.size() + num_classes = self.num_classes + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = img[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, img_meta) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + if torch.onnx.is_in_onnx_export(): + # cast count_mat to constant while exporting to ONNX + count_mat = torch.from_numpy( + count_mat.cpu().detach().numpy()).to(device=img.device) + preds = preds / count_mat + if rescale: + preds = resize( + preds, + size=img_meta[0]['ori_shape'][:2], + mode='bilinear', + align_corners=self.align_corners, + warning=False) + return preds + + def whole_inference(self, img, img_meta, rescale): + """Inference with full image.""" + + seg_logit = self.encode_decode(img, img_meta) + if rescale: + # support dynamic shape for onnx + if torch.onnx.is_in_onnx_export(): + size = img.shape[2:] + else: + size = img_meta[0]['ori_shape'][:2] + seg_logit = resize( + seg_logit, + size=size, + mode='bilinear', + align_corners=self.align_corners, + warning=False) + + return seg_logit + + def inference(self, img, img_meta, rescale): + """Inference with slide/whole style. + + Args: + img (Tensor): The input image of shape (N, 3, H, W). + img_meta (dict): Image info dict where each dict has: 'img_shape', + 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + rescale (bool): Whether rescale back to original shape. + + Returns: + Tensor: The output segmentation map. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = img_meta[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in img_meta) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(img, img_meta, rescale) + else: + seg_logit = self.whole_inference(img, img_meta, rescale) + output = F.softmax(seg_logit, dim=1) + flip = img_meta[0]['flip'] + if flip: + flip_direction = img_meta[0]['flip_direction'] + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + output = output.flip(dims=(3, )) + elif flip_direction == 'vertical': + output = output.flip(dims=(2, )) + + return output + + def simple_test(self, img, img_meta, rescale=True): + """Simple test with single image.""" + seg_logit = self.inference(img, img_meta, rescale) + seg_pred = seg_logit.argmax(dim=1) + if torch.onnx.is_in_onnx_export(): + # our inference backend only support 4D output + seg_pred = seg_pred.unsqueeze(0) + return seg_pred + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(imgs[0], img_metas[0], rescale) + for i in range(1, len(imgs)): + cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit += cur_seg_logit + seg_logit /= len(imgs) + seg_pred = seg_logit.argmax(dim=1) + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d3bdd349b9f2ae499a2fcb2ac1d2e3c77befebe --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/__init__.py @@ -0,0 +1,13 @@ +from .drop import DropPath +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .up_conv_block import UpConvBlock +from .weight_init import trunc_normal_ + +__all__ = [ + 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', + 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_' +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/drop.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/drop.py new file mode 100644 index 0000000000000000000000000000000000000000..4520b0ff407d2a95a864086bdbca0065f222aa63 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/drop.py @@ -0,0 +1,31 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import torch +from torch import nn + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + Args: + drop_prob (float): Drop rate for paths of model. Dropout rate has + to be between 0 and 1. Default: 0. + """ + + def __init__(self, drop_prob=0.): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.keep_prob = 1 - drop_prob + + def forward(self, x): + if self.drop_prob == 0. or not self.training: + return x + shape = (x.shape[0], ) + (1, ) * ( + x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = self.keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(self.keep_prob) * random_tensor + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/inverted_residual.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/inverted_residual.py new file mode 100644 index 0000000000000000000000000000000000000000..2df5ebd7c94c0a66b0d05ef9e200ddbeabfa79f6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,208 @@ +from annotator.mmpkg.mmcv.cnn import ConvModule +from torch import nn +from torch.utils import checkpoint as cp + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' \ + f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + layers.extend([ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False): + super(InvertedResidualV3, self).__init__() + self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict( + type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, x): + + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/make_divisible.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/make_divisible.py new file mode 100644 index 0000000000000000000000000000000000000000..75ad756052529f52fe83bb95dd1f0ecfc9a13078 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/make_divisible.py @@ -0,0 +1,27 @@ +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/res_layer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/res_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d41075a57356b4fd802bc4ff199e55e63678b589 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/res_layer.py @@ -0,0 +1,94 @@ +from annotator.mmpkg.mmcv.cnn import build_conv_layer, build_norm_layer +from torch import nn as nn + + +class ResLayer(nn.Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__(self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + downsample.extend([ + build_conv_layer( + conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=conv_stride, + bias=False), + build_norm_layer(norm_cfg, planes * block.expansion)[1] + ]) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + super(ResLayer, self).__init__(*layers) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/se_layer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/se_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..42ab005e1fe2211e9ecb651d31de128cf95cfec7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/se_layer.py @@ -0,0 +1,57 @@ +import annotator.mmpkg.mmcv as mmcv +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__(self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))): + super(SELayer, self).__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert mmcv.is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0]) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1]) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/self_attention_block.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/self_attention_block.py new file mode 100644 index 0000000000000000000000000000000000000000..a342e2b29ad53916c98d0342bde8f0f6cb10197a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,159 @@ +import torch +from annotator.mmpkg.mmcv.cnn import ConvModule, constant_init +from torch import nn as nn +from torch.nn import functional as F + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out, + conv_cfg, norm_cfg, act_cfg): + super(SelfAttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, + conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ + ConvModule( + in_channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ] + for _ in range(num_convs - 1): + convs.append( + ConvModule( + channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/up_conv_block.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/up_conv_block.py new file mode 100644 index 0000000000000000000000000000000000000000..86328011a9704d17e9f9d0d54994719ead5caa56 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,101 @@ +import torch +import torch.nn as nn +from annotator.mmpkg.mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None): + super(UpConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/weight_init.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..38141ba3d61f64ddfc0a31574b4648cbad96d7dd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/models/utils/weight_init.py @@ -0,0 +1,62 @@ +"""Modified from https://github.com/rwightman/pytorch-image- +models/blob/master/timm/models/layers/drop.py.""" + +import math +import warnings + +import torch + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + """Reference: https://people.sc.fsu.edu/~jburkardt/presentations + /truncated_normal.pdf""" + + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower_bound = norm_cdf((a - mean) / std) + upper_bound = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor` + mean (float): the mean of the normal distribution + std (float): the standard deviation of the normal distribution + a (float): the minimum cutoff value + b (float): the maximum cutoff value + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bec51c75b9363a9a19e9fb5c35f4e7dbd6f7751c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/__init__.py @@ -0,0 +1,4 @@ +from .encoding import Encoding +from .wrappers import Upsample, resize + +__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/encoding.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..7eb3629a6426550b8e4c537ee1ff4341893e489e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/encoding.py @@ -0,0 +1,74 @@ +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super(Encoding, self).__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1. / ((num_codes * channels)**0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, + dtype=torch.float).uniform_(-std, std), + requires_grad=True) + # [num_codes] + self.scale = nn.Parameter( + torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), + requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * ( + expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * + (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax( + self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ + f'x{self.channels})' + return repr_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/wrappers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed9a0cb8d7c0e0ec2748dd89c652756653cac78 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/ops/wrappers.py @@ -0,0 +1,50 @@ +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None): + super(Upsample, self).__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac489e2dbbc0e6fa87f5088b4edcc20f8cadc1a6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/__init__.py @@ -0,0 +1,4 @@ +from .collect_env import collect_env +from .logger import get_root_logger + +__all__ = ['get_root_logger', 'collect_env'] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/collect_env.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/collect_env.py new file mode 100644 index 0000000000000000000000000000000000000000..015d5a6b4f3ff31859cca36584879f646b3864d4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/collect_env.py @@ -0,0 +1,17 @@ +from annotator.mmpkg.mmcv.utils import collect_env as collect_base_env +from annotator.mmpkg.mmcv.utils import get_git_hash + +import annotator.mmpkg.mmseg as mmseg + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print('{}: {}'.format(name, val)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/logger.py b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..0c37733358e3e21479b41f54220bfe34b482009c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/mmpkg/mmseg/utils/logger.py @@ -0,0 +1,27 @@ +import logging + +from annotator.mmpkg.mmcv.utils import get_logger + + +def get_root_logger(log_file=None, log_level=logging.INFO): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name, + e.g., "mmseg". + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + + Returns: + logging.Logger: The root logger. + """ + + logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) + + return logger diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..16a9d56a3d4c15e4f34ac5426459c58487b01520 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Caroline Chan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24cd4d887cccdfb20aec1210a4e0e05bf15e15cc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/__init__.py @@ -0,0 +1,45 @@ +import os +from modules import devices +from annotator.annotator_path import models_path +from .api import make_detectron2_model, semantic_run + + +class OneformerDetector: + model_dir = os.path.join(models_path, "oneformer") + configs = { + "coco": { + "name": "150_16_swin_l_oneformer_coco_100ep.pth", + "config": 'configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml' + }, + "ade20k": { + "name": "250_16_swin_l_oneformer_ade20k_160k.pth", + "config": 'configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml' + } + } + + def __init__(self, config): + self.model = None + self.metadata = None + self.config = config + self.device = devices.get_device_for("controlnet") + + def load_model(self): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/" + self.config["name"] + modelpath = os.path.join(self.model_dir, self.config["name"]) + if not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + config = os.path.join(os.path.dirname(__file__), self.config["config"]) + model, self.metadata = make_detectron2_model(config, modelpath) + self.model = model + + def unload_model(self): + if self.model is not None: + self.model.model.cpu() + + def __call__(self, img): + if self.model is None: + self.load_model() + + self.model.model.to(self.device) + return semantic_run(img, self.model, self.metadata) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/api.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/api.py new file mode 100644 index 0000000000000000000000000000000000000000..59e4439f10d537949180b8a9d1b2a0ee347b8ff3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/api.py @@ -0,0 +1,39 @@ +import os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" + +import torch + +from annotator.oneformer.detectron2.config import get_cfg +from annotator.oneformer.detectron2.projects.deeplab import add_deeplab_config +from annotator.oneformer.detectron2.data import MetadataCatalog + +from annotator.oneformer.oneformer import ( + add_oneformer_config, + add_common_config, + add_swin_config, + add_dinat_config, +) + +from annotator.oneformer.oneformer.demo.defaults import DefaultPredictor +from annotator.oneformer.oneformer.demo.visualizer import Visualizer, ColorMode + + +def make_detectron2_model(config_path, ckpt_path): + cfg = get_cfg() + add_deeplab_config(cfg) + add_common_config(cfg) + add_swin_config(cfg) + add_oneformer_config(cfg) + add_dinat_config(cfg) + cfg.merge_from_file(config_path) + cfg.MODEL.WEIGHTS = ckpt_path + cfg.freeze() + metadata = MetadataCatalog.get(cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused") + return DefaultPredictor(cfg), metadata + + +def semantic_run(img, predictor, metadata): + predictions = predictor(img[:, :, ::-1], "semantic") # Predictor of OneFormer must use BGR image !!! + visualizer_map = Visualizer(img, is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE) + out_map = visualizer_map.draw_sem_seg(predictions["sem_seg"].argmax(dim=0).cpu(), alpha=1, is_text=False).get_image() + return out_map diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/Base-ADE20K-UnifiedSegmentation.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/Base-ADE20K-UnifiedSegmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31eab45b878433fc844a13dbdd54f97c936d9b89 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/Base-ADE20K-UnifiedSegmentation.yaml @@ -0,0 +1,68 @@ +MODEL: + BACKBONE: + FREEZE_AT: 0 + NAME: "build_resnet_backbone" + WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STEM_TYPE: "basic" # not used + STEM_OUT_CHANNELS: 64 + STRIDE_IN_1X1: False + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + # NORM: "SyncBN" + RES5_MULTI_GRID: [1, 1, 1] # not used +DATASETS: + TRAIN: ("ade20k_panoptic_train",) + TEST_PANOPTIC: ("ade20k_panoptic_val",) + TEST_INSTANCE: ("ade20k_instance_val",) + TEST_SEMANTIC: ("ade20k_sem_seg_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.0001 + MAX_ITER: 160000 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 0 + WEIGHT_DECAY: 0.05 + OPTIMIZER: "ADAMW" + LR_SCHEDULER_NAME: "WarmupPolyLR" + BACKBONE_MULTIPLIER: 0.1 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "full_model" + CLIP_VALUE: 0.01 + NORM_TYPE: 2.0 + AMP: + ENABLED: True +INPUT: + MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 512) for x in range(5, 21)]"] + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 512 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 + CROP: + ENABLED: True + TYPE: "absolute" + SIZE: (512, 512) + SINGLE_CATEGORY_MAX_AREA: 1.0 + COLOR_AUG_SSD: True + SIZE_DIVISIBILITY: 512 # used in dataset mapper + FORMAT: "RGB" + DATASET_MAPPER_NAME: "oneformer_unified" + MAX_SEQ_LEN: 77 + TASK_SEQ_LEN: 77 + TASK_PROB: + SEMANTIC: 0.33 + INSTANCE: 0.66 +TEST: + EVAL_PERIOD: 5000 + AUG: + ENABLED: False + MIN_SIZES: [256, 384, 512, 640, 768, 896] + MAX_SIZE: 3584 + FLIP: True +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: True + NUM_WORKERS: 4 +VERSION: 2 \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_R50_bs16_160k.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_R50_bs16_160k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..770ffc81907f8d7c7520e079b1c46060707254b8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_R50_bs16_160k.yaml @@ -0,0 +1,58 @@ +_BASE_: Base-ADE20K-UnifiedSegmentation.yaml +MODEL: + META_ARCHITECTURE: "OneFormer" + SEM_SEG_HEAD: + NAME: "OneFormerHead" + IGNORE_VALUE: 255 + NUM_CLASSES: 150 + LOSS_WEIGHT: 1.0 + CONVS_DIM: 256 + MASK_DIM: 256 + NORM: "GN" + # pixel decoder + PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] + COMMON_STRIDE: 4 + TRANSFORMER_ENC_LAYERS: 6 + ONE_FORMER: + TRANSFORMER_DECODER_NAME: "ContrastiveMultiScaleMaskedTransformerDecoder" + TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" + DEEP_SUPERVISION: True + NO_OBJECT_WEIGHT: 0.1 + CLASS_WEIGHT: 2.0 + MASK_WEIGHT: 5.0 + DICE_WEIGHT: 5.0 + CONTRASTIVE_WEIGHT: 0.5 + CONTRASTIVE_TEMPERATURE: 0.07 + HIDDEN_DIM: 256 + NUM_OBJECT_QUERIES: 150 + USE_TASK_NORM: True + NHEADS: 8 + DROPOUT: 0.1 + DIM_FEEDFORWARD: 2048 + ENC_LAYERS: 0 + PRE_NORM: False + ENFORCE_INPUT_PROJ: False + SIZE_DIVISIBILITY: 32 + CLASS_DEC_LAYERS: 2 + DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query + TRAIN_NUM_POINTS: 12544 + OVERSAMPLE_RATIO: 3.0 + IMPORTANCE_SAMPLE_RATIO: 0.75 + TEXT_ENCODER: + WIDTH: 256 + CONTEXT_LENGTH: 77 + NUM_LAYERS: 6 + VOCAB_SIZE: 49408 + PROJ_NUM_LAYERS: 2 + N_CTX: 16 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.8 + TASK: "panoptic" +TEST: + DETECTIONS_PER_IMAGE: 150 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69c44ade144e4504077c0fe04fa8bb3491a679ed --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml @@ -0,0 +1,40 @@ +_BASE_: oneformer_R50_bs16_160k.yaml +MODEL: + BACKBONE: + NAME: "D2SwinTransformer" + SWIN: + EMBED_DIM: 192 + DEPTHS: [2, 2, 18, 2] + NUM_HEADS: [6, 12, 24, 48] + WINDOW_SIZE: 12 + APE: False + DROP_PATH_RATE: 0.3 + PATCH_NORM: True + PRETRAIN_IMG_SIZE: 384 + WEIGHTS: "swin_large_patch4_window12_384_22k.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + ONE_FORMER: + NUM_OBJECT_QUERIES: 250 +INPUT: + MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"] + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 640 + MAX_SIZE_TRAIN: 2560 + MAX_SIZE_TEST: 2560 + CROP: + ENABLED: True + TYPE: "absolute" + SIZE: (640, 640) + SINGLE_CATEGORY_MAX_AREA: 1.0 + COLOR_AUG_SSD: True + SIZE_DIVISIBILITY: 640 # used in dataset mapper + FORMAT: "RGB" +TEST: + DETECTIONS_PER_IMAGE: 250 + EVAL_PERIOD: 5000 + AUG: + ENABLED: False + MIN_SIZES: [320, 480, 640, 800, 960, 1120] + MAX_SIZE: 4480 + FLIP: True diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/Base-COCO-UnifiedSegmentation.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/Base-COCO-UnifiedSegmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccd24f348f9bc7d60dcdc4b74d887708e57cb8a8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/Base-COCO-UnifiedSegmentation.yaml @@ -0,0 +1,54 @@ +MODEL: + BACKBONE: + FREEZE_AT: 0 + NAME: "build_resnet_backbone" + WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STEM_TYPE: "basic" # not used + STEM_OUT_CHANNELS: 64 + STRIDE_IN_1X1: False + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + # NORM: "SyncBN" + RES5_MULTI_GRID: [1, 1, 1] # not used +DATASETS: + TRAIN: ("coco_2017_train_panoptic_with_sem_seg",) + TEST_PANOPTIC: ("coco_2017_val_panoptic_with_sem_seg",) # to evaluate instance and semantic performance as well + TEST_INSTANCE: ("coco_2017_val",) + TEST_SEMANTIC: ("coco_2017_val_panoptic_with_sem_seg",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.0001 + STEPS: (327778, 355092) + MAX_ITER: 368750 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 10 + WEIGHT_DECAY: 0.05 + OPTIMIZER: "ADAMW" + BACKBONE_MULTIPLIER: 0.1 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "full_model" + CLIP_VALUE: 0.01 + NORM_TYPE: 2.0 + AMP: + ENABLED: True +INPUT: + IMAGE_SIZE: 1024 + MIN_SCALE: 0.1 + MAX_SCALE: 2.0 + FORMAT: "RGB" + DATASET_MAPPER_NAME: "coco_unified_lsj" + MAX_SEQ_LEN: 77 + TASK_SEQ_LEN: 77 + TASK_PROB: + SEMANTIC: 0.33 + INSTANCE: 0.66 +TEST: + EVAL_PERIOD: 5000 +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: True + NUM_WORKERS: 4 +VERSION: 2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_R50_bs16_50ep.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_R50_bs16_50ep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f768c8fa8b5e4fc1121e65e050053e0d8870cd73 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_R50_bs16_50ep.yaml @@ -0,0 +1,59 @@ +_BASE_: Base-COCO-UnifiedSegmentation.yaml +MODEL: + META_ARCHITECTURE: "OneFormer" + SEM_SEG_HEAD: + NAME: "OneFormerHead" + IGNORE_VALUE: 255 + NUM_CLASSES: 133 + LOSS_WEIGHT: 1.0 + CONVS_DIM: 256 + MASK_DIM: 256 + NORM: "GN" + # pixel decoder + PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] + COMMON_STRIDE: 4 + TRANSFORMER_ENC_LAYERS: 6 + ONE_FORMER: + TRANSFORMER_DECODER_NAME: "ContrastiveMultiScaleMaskedTransformerDecoder" + TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" + DEEP_SUPERVISION: True + NO_OBJECT_WEIGHT: 0.1 + CLASS_WEIGHT: 2.0 + MASK_WEIGHT: 5.0 + DICE_WEIGHT: 5.0 + CONTRASTIVE_WEIGHT: 0.5 + CONTRASTIVE_TEMPERATURE: 0.07 + HIDDEN_DIM: 256 + NUM_OBJECT_QUERIES: 150 + USE_TASK_NORM: True + NHEADS: 8 + DROPOUT: 0.1 + DIM_FEEDFORWARD: 2048 + ENC_LAYERS: 0 + PRE_NORM: False + ENFORCE_INPUT_PROJ: False + SIZE_DIVISIBILITY: 32 + CLASS_DEC_LAYERS: 2 + DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query + TRAIN_NUM_POINTS: 12544 + OVERSAMPLE_RATIO: 3.0 + IMPORTANCE_SAMPLE_RATIO: 0.75 + TEXT_ENCODER: + WIDTH: 256 + CONTEXT_LENGTH: 77 + NUM_LAYERS: 6 + VOCAB_SIZE: 49408 + PROJ_NUM_LAYERS: 2 + N_CTX: 16 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + DETECTION_ON: False + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.8 + TASK: "panoptic" +TEST: + DETECTIONS_PER_IMAGE: 150 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..faae655317c52d90b9f756417f8b1a1adcbe78f2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml @@ -0,0 +1,25 @@ +_BASE_: oneformer_R50_bs16_50ep.yaml +MODEL: + BACKBONE: + NAME: "D2SwinTransformer" + SWIN: + EMBED_DIM: 192 + DEPTHS: [2, 2, 18, 2] + NUM_HEADS: [6, 12, 24, 48] + WINDOW_SIZE: 12 + APE: False + DROP_PATH_RATE: 0.3 + PATCH_NORM: True + PRETRAIN_IMG_SIZE: 384 + WEIGHTS: "swin_large_patch4_window12_384_22k.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + ONE_FORMER: + NUM_OBJECT_QUERIES: 150 +SOLVER: + STEPS: (655556, 735184) + MAX_ITER: 737500 + AMP: + ENABLED: False +TEST: + DETECTIONS_PER_IMAGE: 150 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bdd994b49294485c27610772f97f177741f5518f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +from .utils.env import setup_environment + +setup_environment() + + +# This line will be programatically read/write by setup.py. +# Leave them at the bottom of this file and don't touch them. +__version__ = "0.6" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..99da0469ae7e169d8970e4b642fed3f870076860 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. +# File: + + +from . import catalog as _UNUSED # register the handler +from .detection_checkpoint import DetectionCheckpointer +from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer + +__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/c2_model_loading.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/c2_model_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..c6de2a3c830089aa7a0d27df96bb4a45fc5a7b0d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/c2_model_loading.py @@ -0,0 +1,412 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import re +from typing import Dict, List +import torch +from tabulate import tabulate + + +def convert_basic_c2_names(original_keys): + """ + Apply some basic name conversion to names in C2 weights. + It only deals with typical backbone models. + + Args: + original_keys (list[str]): + Returns: + list[str]: The same number of strings matching those in original_keys. + """ + layer_keys = copy.deepcopy(original_keys) + layer_keys = [ + {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys + ] # some hard-coded mappings + + layer_keys = [k.replace("_", ".") for k in layer_keys] + layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys] + layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys] + # Uniform both bn and gn names to "norm" + layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys] + layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys] + + # stem + layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys] + # to avoid mis-matching with "conv1" in other components (e.g. detection head) + layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys] + + # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5) + # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys] + # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys] + # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys] + # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys] + + # blocks + layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys] + layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] + layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] + layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] + + # DensePose substitutions + layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys] + layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys] + layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys] + layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys] + layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys] + return layer_keys + + +def convert_c2_detectron_names(weights): + """ + Map Caffe2 Detectron weight names to Detectron2 names. + + Args: + weights (dict): name -> tensor + + Returns: + dict: detectron2 names -> tensor + dict: detectron2 names -> C2 names + """ + logger = logging.getLogger(__name__) + logger.info("Renaming Caffe2 weights ......") + original_keys = sorted(weights.keys()) + layer_keys = copy.deepcopy(original_keys) + + layer_keys = convert_basic_c2_names(layer_keys) + + # -------------------------------------------------------------------------- + # RPN hidden representation conv + # -------------------------------------------------------------------------- + # FPN case + # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then + # shared for all other levels, hence the appearance of "fpn2" + layer_keys = [ + k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys + ] + # Non-FPN case + layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys] + + # -------------------------------------------------------------------------- + # RPN box transformation conv + # -------------------------------------------------------------------------- + # FPN case (see note above about "fpn2") + layer_keys = [ + k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") + for k in layer_keys + ] + layer_keys = [ + k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") + for k in layer_keys + ] + # Non-FPN case + layer_keys = [ + k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys + ] + layer_keys = [ + k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") + for k in layer_keys + ] + + # -------------------------------------------------------------------------- + # Fast R-CNN box head + # -------------------------------------------------------------------------- + layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys] + layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys] + layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys] + layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys] + # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s + layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys] + + # -------------------------------------------------------------------------- + # FPN lateral and output convolutions + # -------------------------------------------------------------------------- + def fpn_map(name): + """ + Look for keys with the following patterns: + 1) Starts with "fpn.inner." + Example: "fpn.inner.res2.2.sum.lateral.weight" + Meaning: These are lateral pathway convolutions + 2) Starts with "fpn.res" + Example: "fpn.res2.2.sum.weight" + Meaning: These are FPN output convolutions + """ + splits = name.split(".") + norm = ".norm" if "norm" in splits else "" + if name.startswith("fpn.inner."): + # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight'] + stage = int(splits[2][len("res") :]) + return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1]) + elif name.startswith("fpn.res"): + # splits example: ['fpn', 'res2', '2', 'sum', 'weight'] + stage = int(splits[1][len("res") :]) + return "fpn_output{}{}.{}".format(stage, norm, splits[-1]) + return name + + layer_keys = [fpn_map(k) for k in layer_keys] + + # -------------------------------------------------------------------------- + # Mask R-CNN mask head + # -------------------------------------------------------------------------- + # roi_heads.StandardROIHeads case + layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys] + layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys] + layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys] + # roi_heads.Res5ROIHeads case + layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys] + + # -------------------------------------------------------------------------- + # Keypoint R-CNN head + # -------------------------------------------------------------------------- + # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX" + layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys] + layer_keys = [ + k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys + ] + layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys] + + # -------------------------------------------------------------------------- + # Done with replacements + # -------------------------------------------------------------------------- + assert len(set(layer_keys)) == len(layer_keys) + assert len(original_keys) == len(layer_keys) + + new_weights = {} + new_keys_to_original_keys = {} + for orig, renamed in zip(original_keys, layer_keys): + new_keys_to_original_keys[renamed] = orig + if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): + # remove the meaningless prediction weight for background class + new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 + new_weights[renamed] = weights[orig][new_start_idx:] + logger.info( + "Remove prediction weight for background class in {}. The shape changes from " + "{} to {}.".format( + renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) + ) + ) + elif renamed.startswith("cls_score."): + # move weights of bg class from original index 0 to last index + logger.info( + "Move classification weights for background class in {} from index 0 to " + "index {}.".format(renamed, weights[orig].shape[0] - 1) + ) + new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) + else: + new_weights[renamed] = weights[orig] + + return new_weights, new_keys_to_original_keys + + +# Note the current matching is not symmetric. +# it assumes model_state_dict will have longer names. +def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True): + """ + Match names between the two state-dict, and returns a new chkpt_state_dict with names + converted to match model_state_dict with heuristics. The returned dict can be later + loaded with fvcore checkpointer. + If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2 + model and will be renamed at first. + + Strategy: suppose that the models that we will create will have prefixes appended + to each of its keys, for example due to an extra level of nesting that the original + pre-trained weights from ImageNet won't contain. For example, model.state_dict() + might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains + res2.conv1.weight. We thus want to match both parameters together. + For that, we look for each model weight, look among all loaded keys if there is one + that is a suffix of the current weight name, and use it if that's the case. + If multiple matches exist, take the one with longest size + of the corresponding name. For example, for the same model as before, the pretrained + weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, + we want to match backbone[0].body.conv1.weight to conv1.weight, and + backbone[0].body.res2.conv1.weight to res2.conv1.weight. + """ + model_keys = sorted(model_state_dict.keys()) + if c2_conversion: + ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict) + # original_keys: the name in the original dict (before renaming) + else: + original_keys = {x: x for x in ckpt_state_dict.keys()} + ckpt_keys = sorted(ckpt_state_dict.keys()) + + def match(a, b): + # Matched ckpt_key should be a complete (starts with '.') suffix. + # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1, + # but matches whatever_conv1 or mesh_head.whatever_conv1. + return a == b or a.endswith("." + b) + + # get a matrix of string matches, where each (i, j) entry correspond to the size of the + # ckpt_key string, if it matches + match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys] + match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys)) + # use the matched one with longest size in case of multiple matches + max_match_size, idxs = match_matrix.max(1) + # remove indices that correspond to no-match + idxs[max_match_size == 0] = -1 + + logger = logging.getLogger(__name__) + # matched_pairs (matched checkpoint key --> matched model key) + matched_keys = {} + result_state_dict = {} + for idx_model, idx_ckpt in enumerate(idxs.tolist()): + if idx_ckpt == -1: + continue + key_model = model_keys[idx_model] + key_ckpt = ckpt_keys[idx_ckpt] + value_ckpt = ckpt_state_dict[key_ckpt] + shape_in_model = model_state_dict[key_model].shape + + if shape_in_model != value_ckpt.shape: + logger.warning( + "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( + key_ckpt, value_ckpt.shape, key_model, shape_in_model + ) + ) + logger.warning( + "{} will not be loaded. Please double check and see if this is desired.".format( + key_ckpt + ) + ) + continue + + assert key_model not in result_state_dict + result_state_dict[key_model] = value_ckpt + if key_ckpt in matched_keys: # already added to matched_keys + logger.error( + "Ambiguity found for {} in checkpoint!" + "It matches at least two keys in the model ({} and {}).".format( + key_ckpt, key_model, matched_keys[key_ckpt] + ) + ) + raise ValueError("Cannot match one checkpoint key to multiple keys in the model.") + + matched_keys[key_ckpt] = key_model + + # logging: + matched_model_keys = sorted(matched_keys.values()) + if len(matched_model_keys) == 0: + logger.warning("No weights in checkpoint matched with model.") + return ckpt_state_dict + common_prefix = _longest_common_prefix(matched_model_keys) + rev_matched_keys = {v: k for k, v in matched_keys.items()} + original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys} + + model_key_groups = _group_keys_by_module(matched_model_keys, original_keys) + table = [] + memo = set() + for key_model in matched_model_keys: + if key_model in memo: + continue + if key_model in model_key_groups: + group = model_key_groups[key_model] + memo |= set(group) + shapes = [tuple(model_state_dict[k].shape) for k in group] + table.append( + ( + _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*", + _group_str([original_keys[k] for k in group]), + " ".join([str(x).replace(" ", "") for x in shapes]), + ) + ) + else: + key_checkpoint = original_keys[key_model] + shape = str(tuple(model_state_dict[key_model].shape)) + table.append((key_model[len(common_prefix) :], key_checkpoint, shape)) + table_str = tabulate( + table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"] + ) + logger.info( + "Following weights matched with " + + (f"submodule {common_prefix[:-1]}" if common_prefix else "model") + + ":\n" + + table_str + ) + + unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())] + for k in unmatched_ckpt_keys: + result_state_dict[k] = ckpt_state_dict[k] + return result_state_dict + + +def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]): + """ + Params in the same submodule are grouped together. + + Args: + keys: names of all parameters + original_names: mapping from parameter name to their name in the checkpoint + + Returns: + dict[name -> all other names in the same group] + """ + + def _submodule_name(key): + pos = key.rfind(".") + if pos < 0: + return None + prefix = key[: pos + 1] + return prefix + + all_submodules = [_submodule_name(k) for k in keys] + all_submodules = [x for x in all_submodules if x] + all_submodules = sorted(all_submodules, key=len) + + ret = {} + for prefix in all_submodules: + group = [k for k in keys if k.startswith(prefix)] + if len(group) <= 1: + continue + original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group]) + if len(original_name_lcp) == 0: + # don't group weights if original names don't share prefix + continue + + for k in group: + if k in ret: + continue + ret[k] = group + return ret + + +def _longest_common_prefix(names: List[str]) -> str: + """ + ["abc.zfg", "abc.zef"] -> "abc." + """ + names = [n.split(".") for n in names] + m1, m2 = min(names), max(names) + ret = [a for a, b in zip(m1, m2) if a == b] + ret = ".".join(ret) + "." if len(ret) else "" + return ret + + +def _longest_common_prefix_str(names: List[str]) -> str: + m1, m2 = min(names), max(names) + lcp = [] + for a, b in zip(m1, m2): + if a == b: + lcp.append(a) + else: + break + lcp = "".join(lcp) + return lcp + + +def _group_str(names: List[str]) -> str: + """ + Turn "common1", "common2", "common3" into "common{1,2,3}" + """ + lcp = _longest_common_prefix_str(names) + rest = [x[len(lcp) :] for x in names] + rest = "{" + ",".join(rest) + "}" + ret = lcp + rest + + # add some simplification for BN specifically + ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*") + ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*") + return ret diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/catalog.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/catalog.py new file mode 100644 index 0000000000000000000000000000000000000000..b5641858fea4936ad10b07a4237faba78dda77ff --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/catalog.py @@ -0,0 +1,115 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging + +from annotator.oneformer.detectron2.utils.file_io import PathHandler, PathManager + + +class ModelCatalog(object): + """ + Store mappings from names to third-party models. + """ + + S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" + + # MSRA models have STRIDE_IN_1X1=True. False otherwise. + # NOTE: all BN models here have fused BN into an affine layer. + # As a result, you should only load them to a model with "FrozenBN". + # Loading them to a model with regular BN or SyncBN is wrong. + # Even when loaded to FrozenBN, it is still different from affine by an epsilon, + # which should be negligible for training. + # NOTE: all models here uses PIXEL_STD=[1,1,1] + # NOTE: Most of the BN models here are no longer used. We use the + # re-converted pre-trained models under detectron2 model zoo instead. + C2_IMAGENET_MODELS = { + "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", + "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", + "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", + "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", + "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", + "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", + "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", + } + + C2_DETECTRON_PATH_FORMAT = ( + "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 + ) + + C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" + C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" + + # format: {model_name} -> part of the url + C2_DETECTRON_MODELS = { + "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 + "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 + "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 + "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 + "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 + "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 + "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 + "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 + "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 + "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 + "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 + "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 + "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 + } + + @staticmethod + def get(name): + if name.startswith("Caffe2Detectron/COCO"): + return ModelCatalog._get_c2_detectron_baseline(name) + if name.startswith("ImageNetPretrained/"): + return ModelCatalog._get_c2_imagenet_pretrained(name) + raise RuntimeError("model not present in the catalog: {}".format(name)) + + @staticmethod + def _get_c2_imagenet_pretrained(name): + prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX + name = name[len("ImageNetPretrained/") :] + name = ModelCatalog.C2_IMAGENET_MODELS[name] + url = "/".join([prefix, name]) + return url + + @staticmethod + def _get_c2_detectron_baseline(name): + name = name[len("Caffe2Detectron/COCO/") :] + url = ModelCatalog.C2_DETECTRON_MODELS[name] + if "keypoint_rcnn" in name: + dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS + else: + dataset = ModelCatalog.C2_DATASET_COCO + + if "35998355/rpn_R-50-C4_1x" in name: + # this one model is somehow different from others .. + type = "rpn" + else: + type = "generalized_rcnn" + + # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. + url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( + prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset + ) + return url + + +class ModelCatalogHandler(PathHandler): + """ + Resolve URL like catalog://. + """ + + PREFIX = "catalog://" + + def _get_supported_prefixes(self): + return [self.PREFIX] + + def _get_local_path(self, path, **kwargs): + logger = logging.getLogger(__name__) + catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) + logger.info("Catalog entry {} points to {}".format(path, catalog_path)) + return PathManager.get_local_path(catalog_path, **kwargs) + + def _open(self, path, mode="r", **kwargs): + return PathManager.open(self._get_local_path(path), mode, **kwargs) + + +PathManager.register_handler(ModelCatalogHandler()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/detection_checkpoint.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/detection_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..7d411e54bd5e004504423ba052db6f85ec511f72 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/checkpoint/detection_checkpoint.py @@ -0,0 +1,145 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import os +import pickle +from urllib.parse import parse_qs, urlparse +import torch +from fvcore.common.checkpoint import Checkpointer +from torch.nn.parallel import DistributedDataParallel + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .c2_model_loading import align_and_update_state_dicts + + +class DetectionCheckpointer(Checkpointer): + """ + Same as :class:`Checkpointer`, but is able to: + 1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models. + 2. correctly load checkpoints that are only available on the master worker + """ + + def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): + is_main_process = comm.is_main_process() + super().__init__( + model, + save_dir, + save_to_disk=is_main_process if save_to_disk is None else save_to_disk, + **checkpointables, + ) + self.path_manager = PathManager + self._parsed_url_during_load = None + + def load(self, path, *args, **kwargs): + assert self._parsed_url_during_load is None + need_sync = False + logger = logging.getLogger(__name__) + logger.info("[DetectionCheckpointer] Loading from {} ...".format(path)) + + if path and isinstance(self.model, DistributedDataParallel): + path = self.path_manager.get_local_path(path) + has_file = os.path.isfile(path) + all_has_file = comm.all_gather(has_file) + if not all_has_file[0]: + raise OSError(f"File {path} not found on main worker.") + if not all(all_has_file): + logger.warning( + f"Not all workers can read checkpoint {path}. " + "Training may fail to fully resume." + ) + # TODO: broadcast the checkpoint file contents from main + # worker, and load from it instead. + need_sync = True + if not has_file: + path = None # don't load if not readable + + if path: + parsed_url = urlparse(path) + self._parsed_url_during_load = parsed_url + path = parsed_url._replace(query="").geturl() # remove query from filename + path = self.path_manager.get_local_path(path) + + self.logger.setLevel('CRITICAL') + ret = super().load(path, *args, **kwargs) + + if need_sync: + logger.info("Broadcasting model states from main worker ...") + self.model._sync_params_and_buffers() + self._parsed_url_during_load = None # reset to None + return ret + + def _load_file(self, filename): + if filename.endswith(".pkl"): + with PathManager.open(filename, "rb") as f: + data = pickle.load(f, encoding="latin1") + if "model" in data and "__author__" in data: + # file is in Detectron2 model zoo format + self.logger.info("Reading a file from '{}'".format(data["__author__"])) + return data + else: + # assume file is from Caffe2 / Detectron1 model zoo + if "blobs" in data: + # Detection models have "blobs", but ImageNet models don't + data = data["blobs"] + data = {k: v for k, v in data.items() if not k.endswith("_momentum")} + return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} + elif filename.endswith(".pyth"): + # assume file is from pycls; no one else seems to use the ".pyth" extension + with PathManager.open(filename, "rb") as f: + data = torch.load(f) + assert ( + "model_state" in data + ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'." + model_state = { + k: v + for k, v in data["model_state"].items() + if not k.endswith("num_batches_tracked") + } + return {"model": model_state, "__author__": "pycls", "matching_heuristics": True} + + loaded = self._torch_load(filename) + if "model" not in loaded: + loaded = {"model": loaded} + assert self._parsed_url_during_load is not None, "`_load_file` must be called inside `load`" + parsed_url = self._parsed_url_during_load + queries = parse_qs(parsed_url.query) + if queries.pop("matching_heuristics", "False") == ["True"]: + loaded["matching_heuristics"] = True + if len(queries) > 0: + raise ValueError( + f"Unsupported query remaining: f{queries}, orginal filename: {parsed_url.geturl()}" + ) + return loaded + + def _torch_load(self, f): + return super()._load_file(f) + + def _load_model(self, checkpoint): + if checkpoint.get("matching_heuristics", False): + self._convert_ndarray_to_tensor(checkpoint["model"]) + # convert weights by name-matching heuristics + checkpoint["model"] = align_and_update_state_dicts( + self.model.state_dict(), + checkpoint["model"], + c2_conversion=checkpoint.get("__author__", None) == "Caffe2", + ) + # for non-caffe2 models, use standard ways to load it + incompatible = super()._load_model(checkpoint) + + model_buffers = dict(self.model.named_buffers(recurse=False)) + for k in ["pixel_mean", "pixel_std"]: + # Ignore missing key message about pixel_mean/std. + # Though they may be missing in old checkpoints, they will be correctly + # initialized from config anyway. + if k in model_buffers: + try: + incompatible.missing_keys.remove(k) + except ValueError: + pass + for k in incompatible.unexpected_keys[:]: + # Ignore unexpected keys about cell anchors. They exist in old checkpoints + # but now they are non-persistent buffers and will not be in new checkpoints. + if "anchor_generator.cell_anchors" in k: + incompatible.unexpected_keys.remove(k) + return incompatible diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a78ed118685fcfd869f7a72caf6b94621530196a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .compat import downgrade_config, upgrade_config +from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable +from .instantiate import instantiate +from .lazy import LazyCall, LazyConfig + +__all__ = [ + "CfgNode", + "get_cfg", + "global_cfg", + "set_global_cfg", + "downgrade_config", + "upgrade_config", + "configurable", + "instantiate", + "LazyCall", + "LazyConfig", +] + + +from annotator.oneformer.detectron2.utils.env import fixup_module_metadata + +fixup_module_metadata(__name__, globals(), __all__) +del fixup_module_metadata diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/compat.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..11a08c439bf14defd880e37a938fab8a08e68eeb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/compat.py @@ -0,0 +1,229 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Backward compatibility of configs. + +Instructions to bump version: ++ It's not needed to bump version if new keys are added. + It's only needed when backward-incompatible changes happen + (i.e., some existing keys disappear, or the meaning of a key changes) ++ To bump version, do the following: + 1. Increment _C.VERSION in defaults.py + 2. Add a converter in this file. + + Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X, + and a function "downgrade" which in-place downgrades config from X to X-1 + + In each function, VERSION is left unchanged. + + Each converter assumes that its input has the relevant keys + (i.e., the input is not a partial config). + 3. Run the tests (test_config.py) to make sure the upgrade & downgrade + functions are consistent. +""" + +import logging +from typing import List, Optional, Tuple + +from .config import CfgNode as CN +from .defaults import _C + +__all__ = ["upgrade_config", "downgrade_config"] + + +def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN: + """ + Upgrade a config from its current version to a newer version. + + Args: + cfg (CfgNode): + to_version (int): defaults to the latest version. + """ + cfg = cfg.clone() + if to_version is None: + to_version = _C.VERSION + + assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format( + cfg.VERSION, to_version + ) + for k in range(cfg.VERSION, to_version): + converter = globals()["ConverterV" + str(k + 1)] + converter.upgrade(cfg) + cfg.VERSION = k + 1 + return cfg + + +def downgrade_config(cfg: CN, to_version: int) -> CN: + """ + Downgrade a config from its current version to an older version. + + Args: + cfg (CfgNode): + to_version (int): + + Note: + A general downgrade of arbitrary configs is not always possible due to the + different functionalities in different versions. + The purpose of downgrade is only to recover the defaults in old versions, + allowing it to load an old partial yaml config. + Therefore, the implementation only needs to fill in the default values + in the old version when a general downgrade is not possible. + """ + cfg = cfg.clone() + assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format( + cfg.VERSION, to_version + ) + for k in range(cfg.VERSION, to_version, -1): + converter = globals()["ConverterV" + str(k)] + converter.downgrade(cfg) + cfg.VERSION = k - 1 + return cfg + + +def guess_version(cfg: CN, filename: str) -> int: + """ + Guess the version of a partial config where the VERSION field is not specified. + Returns the version, or the latest if cannot make a guess. + + This makes it easier for users to migrate. + """ + logger = logging.getLogger(__name__) + + def _has(name: str) -> bool: + cur = cfg + for n in name.split("."): + if n not in cur: + return False + cur = cur[n] + return True + + # Most users' partial configs have "MODEL.WEIGHT", so guess on it + ret = None + if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"): + ret = 1 + + if ret is not None: + logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret)) + else: + ret = _C.VERSION + logger.warning( + "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format( + filename, ret + ) + ) + return ret + + +def _rename(cfg: CN, old: str, new: str) -> None: + old_keys = old.split(".") + new_keys = new.split(".") + + def _set(key_seq: List[str], val: str) -> None: + cur = cfg + for k in key_seq[:-1]: + if k not in cur: + cur[k] = CN() + cur = cur[k] + cur[key_seq[-1]] = val + + def _get(key_seq: List[str]) -> CN: + cur = cfg + for k in key_seq: + cur = cur[k] + return cur + + def _del(key_seq: List[str]) -> None: + cur = cfg + for k in key_seq[:-1]: + cur = cur[k] + del cur[key_seq[-1]] + if len(cur) == 0 and len(key_seq) > 1: + _del(key_seq[:-1]) + + _set(new_keys, _get(old_keys)) + _del(old_keys) + + +class _RenameConverter: + """ + A converter that handles simple rename. + """ + + RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name) + + @classmethod + def upgrade(cls, cfg: CN) -> None: + for old, new in cls.RENAME: + _rename(cfg, old, new) + + @classmethod + def downgrade(cls, cfg: CN) -> None: + for old, new in cls.RENAME[::-1]: + _rename(cfg, new, old) + + +class ConverterV1(_RenameConverter): + RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")] + + +class ConverterV2(_RenameConverter): + """ + A large bulk of rename, before public release. + """ + + RENAME = [ + ("MODEL.WEIGHT", "MODEL.WEIGHTS"), + ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"), + ( + "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD", + "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH", + ), + ( + "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT", + "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT", + ), + ( + "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD", + "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH", + ), + ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"), + ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"), + ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"), + ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"), + ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"), + ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"), + ("TEST.AUG_ON", "TEST.AUG.ENABLED"), + ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"), + ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"), + ("TEST.AUG_FLIP", "TEST.AUG.FLIP"), + ] + + @classmethod + def upgrade(cls, cfg: CN) -> None: + super().upgrade(cfg) + + if cfg.MODEL.META_ARCHITECTURE == "RetinaNet": + _rename( + cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS" + ) + _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") + del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"] + del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"] + else: + _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS") + _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") + del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"] + del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"] + del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"] + + @classmethod + def downgrade(cls, cfg: CN) -> None: + super().downgrade(cfg) + + _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS") + _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES") + cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS + cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES + cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/config.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..c5b1303422481dc7adb3ee5221377770e0c01a81 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/config.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import functools +import inspect +import logging +from fvcore.common.config import CfgNode as _CfgNode + +from annotator.oneformer.detectron2.utils.file_io import PathManager + + +class CfgNode(_CfgNode): + """ + The same as `fvcore.common.config.CfgNode`, but different in: + + 1. Use unsafe yaml loading by default. + Note that this may lead to arbitrary code execution: you must not + load a config file from untrusted sources before manually inspecting + the content of the file. + 2. Support config versioning. + When attempting to merge an old config, it will convert the old config automatically. + + .. automethod:: clone + .. automethod:: freeze + .. automethod:: defrost + .. automethod:: is_frozen + .. automethod:: load_yaml_with_base + .. automethod:: merge_from_list + .. automethod:: merge_from_other_cfg + """ + + @classmethod + def _open_cfg(cls, filename): + return PathManager.open(filename, "r") + + # Note that the default value of allow_unsafe is changed to True + def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: + """ + Load content from the given config file and merge it into self. + + Args: + cfg_filename: config filename + allow_unsafe: allow unsafe yaml syntax + """ + assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!" + loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) + loaded_cfg = type(self)(loaded_cfg) + + # defaults.py needs to import CfgNode + from .defaults import _C + + latest_ver = _C.VERSION + assert ( + latest_ver == self.VERSION + ), "CfgNode.merge_from_file is only allowed on a config object of latest version!" + + logger = logging.getLogger(__name__) + + loaded_ver = loaded_cfg.get("VERSION", None) + if loaded_ver is None: + from .compat import guess_version + + loaded_ver = guess_version(loaded_cfg, cfg_filename) + assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( + loaded_ver, self.VERSION + ) + + if loaded_ver == self.VERSION: + self.merge_from_other_cfg(loaded_cfg) + else: + # compat.py needs to import CfgNode + from .compat import upgrade_config, downgrade_config + + logger.warning( + "Loading an old v{} config file '{}' by automatically upgrading to v{}. " + "See docs/CHANGELOG.md for instructions to update your files.".format( + loaded_ver, cfg_filename, self.VERSION + ) + ) + # To convert, first obtain a full config at an old version + old_self = downgrade_config(self, to_version=loaded_ver) + old_self.merge_from_other_cfg(loaded_cfg) + new_config = upgrade_config(old_self) + self.clear() + self.update(new_config) + + def dump(self, *args, **kwargs): + """ + Returns: + str: a yaml string representation of the config + """ + # to make it show up in docs + return super().dump(*args, **kwargs) + + +global_cfg = CfgNode() + + +def get_cfg() -> CfgNode: + """ + Get a copy of the default config. + + Returns: + a detectron2 CfgNode instance. + """ + from .defaults import _C + + return _C.clone() + + +def set_global_cfg(cfg: CfgNode) -> None: + """ + Let the global config point to the given cfg. + + Assume that the given "cfg" has the key "KEY", after calling + `set_global_cfg(cfg)`, the key can be accessed by: + :: + from annotator.oneformer.detectron2.config import global_cfg + print(global_cfg.KEY) + + By using a hacky global config, you can access these configs anywhere, + without having to pass the config object or the values deep into the code. + This is a hacky feature introduced for quick prototyping / research exploration. + """ + global global_cfg + global_cfg.clear() + global_cfg.update(cfg) + + +def configurable(init_func=None, *, from_config=None): + """ + Decorate a function or a class's __init__ method so that it can be called + with a :class:`CfgNode` object using a :func:`from_config` function that translates + :class:`CfgNode` to arguments. + + Examples: + :: + # Usage 1: Decorator on __init__: + class A: + @configurable + def __init__(self, a, b=2, c=3): + pass + + @classmethod + def from_config(cls, cfg): # 'cfg' must be the first argument + # Returns kwargs to be passed to __init__ + return {"a": cfg.A, "b": cfg.B} + + a1 = A(a=1, b=2) # regular construction + a2 = A(cfg) # construct with a cfg + a3 = A(cfg, b=3, c=4) # construct with extra overwrite + + # Usage 2: Decorator on any function. Needs an extra from_config argument: + @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B}) + def a_func(a, b=2, c=3): + pass + + a1 = a_func(a=1, b=2) # regular call + a2 = a_func(cfg) # call with a cfg + a3 = a_func(cfg, b=3, c=4) # call with extra overwrite + + Args: + init_func (callable): a class's ``__init__`` method in usage 1. The + class must have a ``from_config`` classmethod which takes `cfg` as + the first argument. + from_config (callable): the from_config function in usage 2. It must take `cfg` + as its first argument. + """ + + if init_func is not None: + assert ( + inspect.isfunction(init_func) + and from_config is None + and init_func.__name__ == "__init__" + ), "Incorrect use of @configurable. Check API documentation for examples." + + @functools.wraps(init_func) + def wrapped(self, *args, **kwargs): + try: + from_config_func = type(self).from_config + except AttributeError as e: + raise AttributeError( + "Class with @configurable must have a 'from_config' classmethod." + ) from e + if not inspect.ismethod(from_config_func): + raise TypeError("Class with @configurable must have a 'from_config' classmethod.") + + if _called_with_cfg(*args, **kwargs): + explicit_args = _get_args_from_config(from_config_func, *args, **kwargs) + init_func(self, **explicit_args) + else: + init_func(self, *args, **kwargs) + + return wrapped + + else: + if from_config is None: + return configurable # @configurable() is made equivalent to @configurable + assert inspect.isfunction( + from_config + ), "from_config argument of configurable must be a function!" + + def wrapper(orig_func): + @functools.wraps(orig_func) + def wrapped(*args, **kwargs): + if _called_with_cfg(*args, **kwargs): + explicit_args = _get_args_from_config(from_config, *args, **kwargs) + return orig_func(**explicit_args) + else: + return orig_func(*args, **kwargs) + + wrapped.from_config = from_config + return wrapped + + return wrapper + + +def _get_args_from_config(from_config_func, *args, **kwargs): + """ + Use `from_config` to obtain explicit arguments. + + Returns: + dict: arguments to be used for cls.__init__ + """ + signature = inspect.signature(from_config_func) + if list(signature.parameters.keys())[0] != "cfg": + if inspect.isfunction(from_config_func): + name = from_config_func.__name__ + else: + name = f"{from_config_func.__self__}.from_config" + raise TypeError(f"{name} must take 'cfg' as the first argument!") + support_var_arg = any( + param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD] + for param in signature.parameters.values() + ) + if support_var_arg: # forward all arguments to from_config, if from_config accepts them + ret = from_config_func(*args, **kwargs) + else: + # forward supported arguments to from_config + supported_arg_names = set(signature.parameters.keys()) + extra_kwargs = {} + for name in list(kwargs.keys()): + if name not in supported_arg_names: + extra_kwargs[name] = kwargs.pop(name) + ret = from_config_func(*args, **kwargs) + # forward the other arguments to __init__ + ret.update(extra_kwargs) + return ret + + +def _called_with_cfg(*args, **kwargs): + """ + Returns: + bool: whether the arguments contain CfgNode and should be considered + forwarded to from_config. + """ + from omegaconf import DictConfig + + if len(args) and isinstance(args[0], (_CfgNode, DictConfig)): + return True + if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)): + return True + # `from_config`'s first argument is forced to be "cfg". + # So the above check covers all cases. + return False diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/defaults.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..ffb79e763f076c9ae982c727309e19b8e0ef170f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/defaults.py @@ -0,0 +1,650 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .config import CfgNode as CN + +# NOTE: given the new config system +# (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html), +# we will stop adding new functionalities to default CfgNode. + +# ----------------------------------------------------------------------------- +# Convention about Training / Test specific parameters +# ----------------------------------------------------------------------------- +# Whenever an argument can be either used for training or for testing, the +# corresponding name will be post-fixed by a _TRAIN for a training parameter, +# or _TEST for a test-specific parameter. +# For example, the number of images during training will be +# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be +# IMAGES_PER_BATCH_TEST + +# ----------------------------------------------------------------------------- +# Config definition +# ----------------------------------------------------------------------------- + +_C = CN() + +# The version number, to upgrade from old configs to new ones if any +# changes happen. It's recommended to keep a VERSION in your config file. +_C.VERSION = 2 + +_C.MODEL = CN() +_C.MODEL.LOAD_PROPOSALS = False +_C.MODEL.MASK_ON = False +_C.MODEL.KEYPOINT_ON = False +_C.MODEL.DEVICE = "cuda" +_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" + +# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file +# to be loaded to the model. You can find available models in the model zoo. +_C.MODEL.WEIGHTS = "" + +# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR). +# To train on images of different number of channels, just set different mean & std. +# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] +_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675] +# When using pre-trained models in Detectron1 or any MSRA models, +# std has been absorbed into its conv1 weights, so the std needs to be set 1. +# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) +_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0] + + +# ----------------------------------------------------------------------------- +# INPUT +# ----------------------------------------------------------------------------- +_C.INPUT = CN() +# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge. +# Please refer to ResizeShortestEdge for detailed definition. +# Size of the smallest side of the image during training +_C.INPUT.MIN_SIZE_TRAIN = (800,) +# Sample size of smallest side by choice or random selection from range give by +# INPUT.MIN_SIZE_TRAIN +_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice" +# Maximum size of the side of the image during training +_C.INPUT.MAX_SIZE_TRAIN = 1333 +# Size of the smallest side of the image during testing. Set to zero to disable resize in testing. +_C.INPUT.MIN_SIZE_TEST = 800 +# Maximum size of the side of the image during testing +_C.INPUT.MAX_SIZE_TEST = 1333 +# Mode for flipping images used in data augmentation during training +# choose one of ["horizontal, "vertical", "none"] +_C.INPUT.RANDOM_FLIP = "horizontal" + +# `True` if cropping is used for data augmentation during training +_C.INPUT.CROP = CN({"ENABLED": False}) +# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation. +_C.INPUT.CROP.TYPE = "relative_range" +# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of +# pixels if CROP.TYPE is "absolute" +_C.INPUT.CROP.SIZE = [0.9, 0.9] + + +# Whether the model needs RGB, YUV, HSV etc. +# Should be one of the modes defined here, as we use PIL to read the image: +# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes +# with BGR being the one exception. One can set image format to BGR, we will +# internally use RGB for conversion and flip the channels over +_C.INPUT.FORMAT = "BGR" +# The ground truth mask format that the model will use. +# Mask R-CNN supports either "polygon" or "bitmask" as ground truth. +_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask" + + +# ----------------------------------------------------------------------------- +# Dataset +# ----------------------------------------------------------------------------- +_C.DATASETS = CN() +# List of the dataset names for training. Must be registered in DatasetCatalog +# Samples from these datasets will be merged and used as one dataset. +_C.DATASETS.TRAIN = () +# List of the pre-computed proposal files for training, which must be consistent +# with datasets listed in DATASETS.TRAIN. +_C.DATASETS.PROPOSAL_FILES_TRAIN = () +# Number of top scoring precomputed proposals to keep for training +_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000 +# List of the dataset names for testing. Must be registered in DatasetCatalog +_C.DATASETS.TEST = () +# List of the pre-computed proposal files for test, which must be consistent +# with datasets listed in DATASETS.TEST. +_C.DATASETS.PROPOSAL_FILES_TEST = () +# Number of top scoring precomputed proposals to keep for test +_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000 + +# ----------------------------------------------------------------------------- +# DataLoader +# ----------------------------------------------------------------------------- +_C.DATALOADER = CN() +# Number of data loading threads +_C.DATALOADER.NUM_WORKERS = 4 +# If True, each batch should contain only images for which the aspect ratio +# is compatible. This groups portrait images together, and landscape images +# are not batched with portrait images. +_C.DATALOADER.ASPECT_RATIO_GROUPING = True +# Options: TrainingSampler, RepeatFactorTrainingSampler +_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler" +# Repeat threshold for RepeatFactorTrainingSampler +_C.DATALOADER.REPEAT_THRESHOLD = 0.0 +# Tf True, when working on datasets that have instance annotations, the +# training dataloader will filter out images without associated annotations +_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True + +# ---------------------------------------------------------------------------- # +# Backbone options +# ---------------------------------------------------------------------------- # +_C.MODEL.BACKBONE = CN() + +_C.MODEL.BACKBONE.NAME = "build_resnet_backbone" +# Freeze the first several stages so they are not trained. +# There are 5 stages in ResNet. The first is a convolution, and the following +# stages are each group of residual blocks. +_C.MODEL.BACKBONE.FREEZE_AT = 2 + + +# ---------------------------------------------------------------------------- # +# FPN options +# ---------------------------------------------------------------------------- # +_C.MODEL.FPN = CN() +# Names of the input feature maps to be used by FPN +# They must have contiguous power of 2 strides +# e.g., ["res2", "res3", "res4", "res5"] +_C.MODEL.FPN.IN_FEATURES = [] +_C.MODEL.FPN.OUT_CHANNELS = 256 + +# Options: "" (no norm), "GN" +_C.MODEL.FPN.NORM = "" + +# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg" +_C.MODEL.FPN.FUSE_TYPE = "sum" + + +# ---------------------------------------------------------------------------- # +# Proposal generator options +# ---------------------------------------------------------------------------- # +_C.MODEL.PROPOSAL_GENERATOR = CN() +# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals" +_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" +# Proposal height and width both need to be greater than MIN_SIZE +# (a the scale used during training or inference) +_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0 + + +# ---------------------------------------------------------------------------- # +# Anchor generator options +# ---------------------------------------------------------------------------- # +_C.MODEL.ANCHOR_GENERATOR = CN() +# The generator can be any name in the ANCHOR_GENERATOR registry +_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" +# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input. +# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for +# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1. +# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES. +_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]] +# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect +# ratios are generated by an anchor generator. +# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W) +# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true, +# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used +# for all IN_FEATURES. +_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] +# Anchor angles. +# list[list[float]], the angle in degrees, for each input feature map. +# ANGLES[i] specifies the list of angles for IN_FEATURES[i]. +_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]] +# Relative offset between the center of the first anchor and the top-left corner of the image +# Value has to be in [0, 1). Recommend to use 0.5, which means half stride. +# The value is not expected to affect model accuracy. +_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0 + +# ---------------------------------------------------------------------------- # +# RPN options +# ---------------------------------------------------------------------------- # +_C.MODEL.RPN = CN() +_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY + +# Names of the input feature maps to be used by RPN +# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN +_C.MODEL.RPN.IN_FEATURES = ["res4"] +# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels +# Set to -1 or a large value, e.g. 100000, to disable pruning anchors +_C.MODEL.RPN.BOUNDARY_THRESH = -1 +# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD] +# Minimum overlap required between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD +# ==> positive RPN example: 1) +# Maximum overlap allowed between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD +# ==> negative RPN example: 0) +# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD) +# are ignored (-1) +_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7] +_C.MODEL.RPN.IOU_LABELS = [0, -1, 1] +# Number of regions per image used to train RPN +_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 +# Target fraction of foreground (positive) examples per RPN minibatch +_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 +# Options are: "smooth_l1", "giou", "diou", "ciou" +_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1" +_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0 +# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets +_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) +# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. +_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0 +_C.MODEL.RPN.LOSS_WEIGHT = 1.0 +# Number of top scoring RPN proposals to keep before applying NMS +# When FPN is used, this is *per FPN level* (not total) +_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000 +_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 +# Number of top scoring RPN proposals to keep after applying NMS +# When FPN is used, this limit is applied per level and then again to the union +# of proposals from all levels +# NOTE: When FPN is used, the meaning of this config is different from Detectron1. +# It means per-batch topk in Detectron1, but per-image topk here. +# See the "find_top_rpn_proposals" function for details. +_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000 +_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000 +# NMS threshold used on RPN proposals +_C.MODEL.RPN.NMS_THRESH = 0.7 +# Set this to -1 to use the same number of output channels as input channels. +_C.MODEL.RPN.CONV_DIMS = [-1] + +# ---------------------------------------------------------------------------- # +# ROI HEADS options +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_HEADS = CN() +_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads" +# Number of foreground classes +_C.MODEL.ROI_HEADS.NUM_CLASSES = 80 +# Names of the input feature maps to be used by ROI heads +# Currently all heads (box, mask, ...) use the same input feature map list +# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN +_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"] +# IOU overlap ratios [IOU_THRESHOLD] +# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD) +# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD) +_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5] +_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1] +# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training +# Total number of RoIs per training minibatch = +# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH +# E.g., a common configuration is: 512 * 16 = 8192 +_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 +# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) +_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 + +# Only used on test mode + +# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to +# balance obtaining high recall with not having too many low precision +# detections that will slow down inference post processing steps (like NMS) +# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down +# inference. +_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 +# Overlap threshold used for non-maximum suppression (suppress boxes with +# IoU >= this threshold) +_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 +# If True, augment proposals with ground-truth boxes before sampling proposals to +# train ROI heads. +_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True + +# ---------------------------------------------------------------------------- # +# Box Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_BOX_HEAD = CN() +# C4 don't use head name option +# Options for non-C4 models: FastRCNNConvFCHead, +_C.MODEL.ROI_BOX_HEAD.NAME = "" +# Options are: "smooth_l1", "giou", "diou", "ciou" +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1" +# The final scaling coefficient on the box regression loss, used to balance the magnitude of its +# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`. +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0 +# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets +# These are empirically chosen to approximately lead to unit variance targets +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0) +# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. +_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0 +_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" + +_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0 +# Hidden layer dimension for FC layers in the RoI box head +_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024 +_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0 +# Channel dimension for Conv layers in the RoI box head +_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256 +# Normalization method for the convolution layers. +# Options: "" (no norm), "GN", "SyncBN". +_C.MODEL.ROI_BOX_HEAD.NORM = "" +# Whether to use class agnostic for bbox regression +_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False +# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes. +_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False + +# Federated loss can be used to improve the training of LVIS +_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False +# Sigmoid cross entrophy is used with federated loss +_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False +# The power value applied to image_count when calcualting frequency weight +_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5 +# Number of classes to keep in total +_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50 + +# ---------------------------------------------------------------------------- # +# Cascaded Box Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_BOX_CASCADE_HEAD = CN() +# The number of cascade stages is implicitly defined by the length of the following two configs. +_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = ( + (10.0, 10.0, 5.0, 5.0), + (20.0, 20.0, 10.0, 10.0), + (30.0, 30.0, 15.0, 15.0), +) +_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7) + + +# ---------------------------------------------------------------------------- # +# Mask Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_MASK_HEAD = CN() +_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead" +_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head +_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256 +# Normalization method for the convolution layers. +# Options: "" (no norm), "GN", "SyncBN". +_C.MODEL.ROI_MASK_HEAD.NORM = "" +# Whether to use class agnostic for mask prediction +_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2" + + +# ---------------------------------------------------------------------------- # +# Keypoint Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_KEYPOINT_HEAD = CN() +_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead" +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8)) +_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO. + +# Images with too few (or no) keypoints are excluded from training. +_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1 +# Normalize by the total number of visible keypoints in the minibatch if True. +# Otherwise, normalize by the total number of keypoints that could ever exist +# in the minibatch. +# The keypoint softmax loss is only calculated on visible keypoints. +# Since the number of visible keypoints can vary significantly between +# minibatches, this has the effect of up-weighting the importance of +# minibatches with few visible keypoints. (Imagine the extreme case of +# only one visible keypoint versus N: in the case of N, each one +# contributes 1/N to the gradient compared to the single keypoint +# determining the gradient direction). Instead, we can normalize the +# loss by the total number of keypoints, if it were the case that all +# keypoints were visible in a full minibatch. (Returning to the example, +# this means that the one visible keypoint contributes as much as each +# of the N keypoints.) +_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True +# Multi-task loss weight to use for keypoints +# Recommended values: +# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True +# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False +_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0 +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2" + +# ---------------------------------------------------------------------------- # +# Semantic Segmentation Head +# ---------------------------------------------------------------------------- # +_C.MODEL.SEM_SEG_HEAD = CN() +_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead" +_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"] +# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for +# the correposnding pixel. +_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255 +# Number of classes in the semantic segmentation head +_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54 +# Number of channels in the 3x3 convs inside semantic-FPN heads. +_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128 +# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. +_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4 +# Normalization method for the convolution layers. Options: "" (no norm), "GN". +_C.MODEL.SEM_SEG_HEAD.NORM = "GN" +_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0 + +_C.MODEL.PANOPTIC_FPN = CN() +# Scaling of all losses from instance detection / segmentation head. +_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0 + +# options when combining instance & semantic segmentation outputs +_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used +_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5 +_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096 +_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5 + + +# ---------------------------------------------------------------------------- # +# RetinaNet Head +# ---------------------------------------------------------------------------- # +_C.MODEL.RETINANET = CN() + +# This is the number of foreground classes. +_C.MODEL.RETINANET.NUM_CLASSES = 80 + +_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] + +# Convolutions to use in the cls and bbox tower +# NOTE: this doesn't include the last conv for logits +_C.MODEL.RETINANET.NUM_CONVS = 4 + +# IoU overlap ratio [bg, fg] for labeling anchors. +# Anchors with < bg are labeled negative (0) +# Anchors with >= bg and < fg are ignored (-1) +# Anchors with >= fg are labeled positive (1) +_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5] +_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1] + +# Prior prob for rare case (i.e. foreground) at the beginning of training. +# This is used to set the bias for the logits layer of the classifier subnet. +# This improves training stability in the case of heavy class imbalance. +_C.MODEL.RETINANET.PRIOR_PROB = 0.01 + +# Inference cls score threshold, only anchors with score > INFERENCE_TH are +# considered for inference (to improve speed) +_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05 +# Select topk candidates before NMS +_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000 +_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5 + +# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets +_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) + +# Loss parameters +_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0 +_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25 +_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1 +# Options are: "smooth_l1", "giou", "diou", "ciou" +_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1" + +# One of BN, SyncBN, FrozenBN, GN +# Only supports GN until unshared norm is implemented +_C.MODEL.RETINANET.NORM = "" + + +# ---------------------------------------------------------------------------- # +# ResNe[X]t options (ResNets = {ResNet, ResNeXt} +# Note that parts of a resnet may be used for both the backbone and the head +# These options apply to both +# ---------------------------------------------------------------------------- # +_C.MODEL.RESNETS = CN() + +_C.MODEL.RESNETS.DEPTH = 50 +_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone + +# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt +_C.MODEL.RESNETS.NUM_GROUPS = 1 + +# Options: FrozenBN, GN, "SyncBN", "BN" +_C.MODEL.RESNETS.NORM = "FrozenBN" + +# Baseline width of each group. +# Scaling this parameters will scale the width of all bottleneck layers. +_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 + +# Place the stride 2 conv on the 1x1 filter +# Use True only for the original MSRA ResNet; use False for C2 and Torch models +_C.MODEL.RESNETS.STRIDE_IN_1X1 = True + +# Apply dilation in stage "res5" +_C.MODEL.RESNETS.RES5_DILATION = 1 + +# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet +# For R18 and R34, this needs to be set to 64 +_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 +_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 + +# Apply Deformable Convolution in stages +# Specify if apply deform_conv on Res2, Res3, Res4, Res5 +_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False] +# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168); +# Use False for DeformableV1. +_C.MODEL.RESNETS.DEFORM_MODULATED = False +# Number of groups in deformable conv. +_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1 + + +# ---------------------------------------------------------------------------- # +# Solver +# ---------------------------------------------------------------------------- # +_C.SOLVER = CN() + +# Options: WarmupMultiStepLR, WarmupCosineLR. +# See detectron2/solver/build.py for definition. +_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" + +_C.SOLVER.MAX_ITER = 40000 + +_C.SOLVER.BASE_LR = 0.001 +# The end lr, only used by WarmupCosineLR +_C.SOLVER.BASE_LR_END = 0.0 + +_C.SOLVER.MOMENTUM = 0.9 + +_C.SOLVER.NESTEROV = False + +_C.SOLVER.WEIGHT_DECAY = 0.0001 +# The weight decay that's applied to parameters of normalization layers +# (typically the affine transformation) +_C.SOLVER.WEIGHT_DECAY_NORM = 0.0 + +_C.SOLVER.GAMMA = 0.1 +# The iteration number to decrease learning rate by GAMMA. +_C.SOLVER.STEPS = (30000,) +# Number of decays in WarmupStepWithFixedGammaLR schedule +_C.SOLVER.NUM_DECAYS = 3 + +_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000 +_C.SOLVER.WARMUP_ITERS = 1000 +_C.SOLVER.WARMUP_METHOD = "linear" +# Whether to rescale the interval for the learning schedule after warmup +_C.SOLVER.RESCALE_INTERVAL = False + +# Save a checkpoint after every this number of iterations +_C.SOLVER.CHECKPOINT_PERIOD = 5000 + +# Number of images per batch across all machines. This is also the number +# of training images per step (i.e. per iteration). If we use 16 GPUs +# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch. +# May be adjusted automatically if REFERENCE_WORLD_SIZE is set. +_C.SOLVER.IMS_PER_BATCH = 16 + +# The reference number of workers (GPUs) this config is meant to train with. +# It takes no effect when set to 0. +# With a non-zero value, it will be used by DefaultTrainer to compute a desired +# per-worker batch size, and then scale the other related configs (total batch size, +# learning rate, etc) to match the per-worker batch size. +# See documentation of `DefaultTrainer.auto_scale_workers` for details: +_C.SOLVER.REFERENCE_WORLD_SIZE = 0 + +# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for +# biases. This is not useful (at least for recent models). You should avoid +# changing these and they exist only to reproduce Detectron v1 training if +# desired. +_C.SOLVER.BIAS_LR_FACTOR = 1.0 +_C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY + +# Gradient clipping +_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False}) +# Type of gradient clipping, currently 2 values are supported: +# - "value": the absolute values of elements of each gradients are clipped +# - "norm": the norm of the gradient for each parameter is clipped thus +# affecting all elements in the parameter +_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value" +# Maximum absolute value used for clipping gradients +_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0 +# Floating point number p for L-p norm to be used with the "norm" +# gradient clipping type; for L-inf, please specify .inf +_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0 + +# Enable automatic mixed precision for training +# Note that this does not change model's inference behavior. +# To use AMP in inference, run inference under autocast() +_C.SOLVER.AMP = CN({"ENABLED": False}) + +# ---------------------------------------------------------------------------- # +# Specific test options +# ---------------------------------------------------------------------------- # +_C.TEST = CN() +# For end-to-end tests to verify the expected accuracy. +# Each item is [task, metric, value, tolerance] +# e.g.: [['bbox', 'AP', 38.5, 0.2]] +_C.TEST.EXPECTED_RESULTS = [] +# The period (in terms of steps) to evaluate the model during training. +# Set to 0 to disable. +_C.TEST.EVAL_PERIOD = 0 +# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval +# When empty, it will use the defaults in COCO. +# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. +_C.TEST.KEYPOINT_OKS_SIGMAS = [] +# Maximum number of detections to return per image during inference (100 is +# based on the limit established for the COCO dataset). +_C.TEST.DETECTIONS_PER_IMAGE = 100 + +_C.TEST.AUG = CN({"ENABLED": False}) +_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) +_C.TEST.AUG.MAX_SIZE = 4000 +_C.TEST.AUG.FLIP = True + +_C.TEST.PRECISE_BN = CN({"ENABLED": False}) +_C.TEST.PRECISE_BN.NUM_ITER = 200 + +# ---------------------------------------------------------------------------- # +# Misc options +# ---------------------------------------------------------------------------- # +# Directory where output files are written +_C.OUTPUT_DIR = "./output" +# Set seed to negative to fully randomize everything. +# Set seed to positive to use a fixed seed. Note that a fixed seed increases +# reproducibility but does not guarantee fully deterministic behavior. +# Disabling all parallelism further increases reproducibility. +_C.SEED = -1 +# Benchmark different cudnn algorithms. +# If input images have very different sizes, this option will have large overhead +# for about 10k iterations. It usually hurts total time, but can benefit for certain models. +# If input images have the same or similar sizes, benchmark is often helpful. +_C.CUDNN_BENCHMARK = False +# The period (in terms of steps) for minibatch visualization at train time. +# Set to 0 to disable. +_C.VIS_PERIOD = 0 + +# global config is for quick hack purposes. +# You can set them in command line or config files, +# and access it with: +# +# from annotator.oneformer.detectron2.config import global_cfg +# print(global_cfg.HACK) +# +# Do not commit any configs into it. +_C.GLOBAL = CN() +_C.GLOBAL.HACK = 1.0 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/instantiate.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/instantiate.py new file mode 100644 index 0000000000000000000000000000000000000000..26d191b03f800dae5620128957d137cd4fdb1728 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/instantiate.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import collections.abc as abc +import dataclasses +import logging +from typing import Any + +from annotator.oneformer.detectron2.utils.registry import _convert_target_to_string, locate + +__all__ = ["dump_dataclass", "instantiate"] + + +def dump_dataclass(obj: Any): + """ + Dump a dataclass recursively into a dict that can be later instantiated. + + Args: + obj: a dataclass object + + Returns: + dict + """ + assert dataclasses.is_dataclass(obj) and not isinstance( + obj, type + ), "dump_dataclass() requires an instance of a dataclass." + ret = {"_target_": _convert_target_to_string(type(obj))} + for f in dataclasses.fields(obj): + v = getattr(obj, f.name) + if dataclasses.is_dataclass(v): + v = dump_dataclass(v) + if isinstance(v, (list, tuple)): + v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v] + ret[f.name] = v + return ret + + +def instantiate(cfg): + """ + Recursively instantiate objects defined in dictionaries by + "_target_" and arguments. + + Args: + cfg: a dict-like object with "_target_" that defines the caller, and + other keys that define the arguments + + Returns: + object instantiated by cfg + """ + from omegaconf import ListConfig, DictConfig, OmegaConf + + if isinstance(cfg, ListConfig): + lst = [instantiate(x) for x in cfg] + return ListConfig(lst, flags={"allow_objects": True}) + if isinstance(cfg, list): + # Specialize for list, because many classes take + # list[objects] as arguments, such as ResNet, DatasetMapper + return [instantiate(x) for x in cfg] + + # If input is a DictConfig backed by dataclasses (i.e. omegaconf's structured config), + # instantiate it to the actual dataclass. + if isinstance(cfg, DictConfig) and dataclasses.is_dataclass(cfg._metadata.object_type): + return OmegaConf.to_object(cfg) + + if isinstance(cfg, abc.Mapping) and "_target_" in cfg: + # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all, + # but faster: https://github.com/facebookresearch/hydra/issues/1200 + cfg = {k: instantiate(v) for k, v in cfg.items()} + cls = cfg.pop("_target_") + cls = instantiate(cls) + + if isinstance(cls, str): + cls_name = cls + cls = locate(cls_name) + assert cls is not None, cls_name + else: + try: + cls_name = cls.__module__ + "." + cls.__qualname__ + except Exception: + # target could be anything, so the above could fail + cls_name = str(cls) + assert callable(cls), f"_target_ {cls} does not define a callable object" + try: + return cls(**cfg) + except TypeError: + logger = logging.getLogger(__name__) + logger.error(f"Error when instantiating {cls_name}!") + raise + return cfg # return as-is if don't know what to do diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/lazy.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/lazy.py new file mode 100644 index 0000000000000000000000000000000000000000..72a3e5c036f9f78a2cdf3ef0975639da3299d694 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/config/lazy.py @@ -0,0 +1,435 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import ast +import builtins +import collections.abc as abc +import importlib +import inspect +import logging +import os +import uuid +from contextlib import contextmanager +from copy import deepcopy +from dataclasses import is_dataclass +from typing import List, Tuple, Union +import yaml +from omegaconf import DictConfig, ListConfig, OmegaConf, SCMode + +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.registry import _convert_target_to_string + +__all__ = ["LazyCall", "LazyConfig"] + + +class LazyCall: + """ + Wrap a callable so that when it's called, the call will not be executed, + but returns a dict that describes the call. + + LazyCall object has to be called with only keyword arguments. Positional + arguments are not yet supported. + + Examples: + :: + from annotator.oneformer.detectron2.config import instantiate, LazyCall + + layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32) + layer_cfg.out_channels = 64 # can edit it afterwards + layer = instantiate(layer_cfg) + """ + + def __init__(self, target): + if not (callable(target) or isinstance(target, (str, abc.Mapping))): + raise TypeError( + f"target of LazyCall must be a callable or defines a callable! Got {target}" + ) + self._target = target + + def __call__(self, **kwargs): + if is_dataclass(self._target): + # omegaconf object cannot hold dataclass type + # https://github.com/omry/omegaconf/issues/784 + target = _convert_target_to_string(self._target) + else: + target = self._target + kwargs["_target_"] = target + + return DictConfig(content=kwargs, flags={"allow_objects": True}) + + +def _visit_dict_config(cfg, func): + """ + Apply func recursively to all DictConfig in cfg. + """ + if isinstance(cfg, DictConfig): + func(cfg) + for v in cfg.values(): + _visit_dict_config(v, func) + elif isinstance(cfg, ListConfig): + for v in cfg: + _visit_dict_config(v, func) + + +def _validate_py_syntax(filename): + # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py + with PathManager.open(filename, "r") as f: + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError(f"Config file {filename} has syntax error!") from e + + +def _cast_to_config(obj): + # if given a dict, return DictConfig instead + if isinstance(obj, dict): + return DictConfig(obj, flags={"allow_objects": True}) + return obj + + +_CFG_PACKAGE_NAME = "detectron2._cfg_loader" +""" +A namespace to put all imported config into. +""" + + +def _random_package_name(filename): + # generate a random package name when loading config files + return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename) + + +@contextmanager +def _patch_import(): + """ + Enhance relative import statements in config files, so that they: + 1. locate files purely based on relative location, regardless of packages. + e.g. you can import file without having __init__ + 2. do not cache modules globally; modifications of module states has no side effect + 3. support other storage system through PathManager, so config files can be in the cloud + 4. imported dict are turned into omegaconf.DictConfig automatically + """ + old_import = builtins.__import__ + + def find_relative_file(original_file, relative_import_path, level): + # NOTE: "from . import x" is not handled. Because then it's unclear + # if such import should produce `x` as a python module or DictConfig. + # This can be discussed further if needed. + relative_import_err = """ +Relative import of directories is not allowed within config files. +Within a config file, relative import can only import other config files. +""".replace( + "\n", " " + ) + if not len(relative_import_path): + raise ImportError(relative_import_err) + + cur_file = os.path.dirname(original_file) + for _ in range(level - 1): + cur_file = os.path.dirname(cur_file) + cur_name = relative_import_path.lstrip(".") + for part in cur_name.split("."): + cur_file = os.path.join(cur_file, part) + if not cur_file.endswith(".py"): + cur_file += ".py" + if not PathManager.isfile(cur_file): + cur_file_no_suffix = cur_file[: -len(".py")] + if PathManager.isdir(cur_file_no_suffix): + raise ImportError(f"Cannot import from {cur_file_no_suffix}." + relative_import_err) + else: + raise ImportError( + f"Cannot import name {relative_import_path} from " + f"{original_file}: {cur_file} does not exist." + ) + return cur_file + + def new_import(name, globals=None, locals=None, fromlist=(), level=0): + if ( + # Only deal with relative imports inside config files + level != 0 + and globals is not None + and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME) + ): + cur_file = find_relative_file(globals["__file__"], name, level) + _validate_py_syntax(cur_file) + spec = importlib.machinery.ModuleSpec( + _random_package_name(cur_file), None, origin=cur_file + ) + module = importlib.util.module_from_spec(spec) + module.__file__ = cur_file + with PathManager.open(cur_file) as f: + content = f.read() + exec(compile(content, cur_file, "exec"), module.__dict__) + for name in fromlist: # turn imported dict into DictConfig automatically + val = _cast_to_config(module.__dict__[name]) + module.__dict__[name] = val + return module + return old_import(name, globals, locals, fromlist=fromlist, level=level) + + builtins.__import__ = new_import + yield new_import + builtins.__import__ = old_import + + +class LazyConfig: + """ + Provide methods to save, load, and overrides an omegaconf config object + which may contain definition of lazily-constructed objects. + """ + + @staticmethod + def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): + """ + Similar to :meth:`load()`, but load path relative to the caller's + source file. + + This has the same functionality as a relative import, except that this method + accepts filename as a string, so more characters are allowed in the filename. + """ + caller_frame = inspect.stack()[1] + caller_fname = caller_frame[0].f_code.co_filename + assert caller_fname != "", "load_rel Unable to find caller" + caller_dir = os.path.dirname(caller_fname) + filename = os.path.join(caller_dir, filename) + return LazyConfig.load(filename, keys) + + @staticmethod + def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): + """ + Load a config file. + + Args: + filename: absolute path or relative path w.r.t. the current working directory + keys: keys to load and return. If not given, return all keys + (whose values are config objects) in a dict. + """ + has_keys = keys is not None + filename = filename.replace("/./", "/") # redundant + if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]: + raise ValueError(f"Config file {filename} has to be a python or yaml file.") + if filename.endswith(".py"): + _validate_py_syntax(filename) + + with _patch_import(): + # Record the filename + module_namespace = { + "__file__": filename, + "__package__": _random_package_name(filename), + } + with PathManager.open(filename) as f: + content = f.read() + # Compile first with filename to: + # 1. make filename appears in stacktrace + # 2. make load_rel able to find its parent's (possibly remote) location + exec(compile(content, filename, "exec"), module_namespace) + + ret = module_namespace + else: + with PathManager.open(filename) as f: + obj = yaml.unsafe_load(f) + ret = OmegaConf.create(obj, flags={"allow_objects": True}) + + if has_keys: + if isinstance(keys, str): + return _cast_to_config(ret[keys]) + else: + return tuple(_cast_to_config(ret[a]) for a in keys) + else: + if filename.endswith(".py"): + # when not specified, only load those that are config objects + ret = DictConfig( + { + name: _cast_to_config(value) + for name, value in ret.items() + if isinstance(value, (DictConfig, ListConfig, dict)) + and not name.startswith("_") + }, + flags={"allow_objects": True}, + ) + return ret + + @staticmethod + def save(cfg, filename: str): + """ + Save a config object to a yaml file. + Note that when the config dictionary contains complex objects (e.g. lambda), + it can't be saved to yaml. In that case we will print an error and + attempt to save to a pkl file instead. + + Args: + cfg: an omegaconf config object + filename: yaml file name to save the config file + """ + logger = logging.getLogger(__name__) + try: + cfg = deepcopy(cfg) + except Exception: + pass + else: + # if it's deep-copyable, then... + def _replace_type_by_name(x): + if "_target_" in x and callable(x._target_): + try: + x._target_ = _convert_target_to_string(x._target_) + except AttributeError: + pass + + # not necessary, but makes yaml looks nicer + _visit_dict_config(cfg, _replace_type_by_name) + + save_pkl = False + try: + dict = OmegaConf.to_container( + cfg, + # Do not resolve interpolation when saving, i.e. do not turn ${a} into + # actual values when saving. + resolve=False, + # Save structures (dataclasses) in a format that can be instantiated later. + # Without this option, the type information of the dataclass will be erased. + structured_config_mode=SCMode.INSTANTIATE, + ) + dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999) + with PathManager.open(filename, "w") as f: + f.write(dumped) + + try: + _ = yaml.unsafe_load(dumped) # test that it is loadable + except Exception: + logger.warning( + "The config contains objects that cannot serialize to a valid yaml. " + f"{filename} is human-readable but cannot be loaded." + ) + save_pkl = True + except Exception: + logger.exception("Unable to serialize the config to yaml. Error:") + save_pkl = True + + if save_pkl: + new_filename = filename + ".pkl" + # try: + # # retry by pickle + # with PathManager.open(new_filename, "wb") as f: + # cloudpickle.dump(cfg, f) + # logger.warning(f"Config is saved using cloudpickle at {new_filename}.") + # except Exception: + # pass + + @staticmethod + def apply_overrides(cfg, overrides: List[str]): + """ + In-place override contents of cfg. + + Args: + cfg: an omegaconf config object + overrides: list of strings in the format of "a=b" to override configs. + See https://hydra.cc/docs/next/advanced/override_grammar/basic/ + for syntax. + + Returns: + the cfg object + """ + + def safe_update(cfg, key, value): + parts = key.split(".") + for idx in range(1, len(parts)): + prefix = ".".join(parts[:idx]) + v = OmegaConf.select(cfg, prefix, default=None) + if v is None: + break + if not OmegaConf.is_config(v): + raise KeyError( + f"Trying to update key {key}, but {prefix} " + f"is not a config, but has type {type(v)}." + ) + OmegaConf.update(cfg, key, value, merge=True) + + try: + from hydra.core.override_parser.overrides_parser import OverridesParser + + has_hydra = True + except ImportError: + has_hydra = False + + if has_hydra: + parser = OverridesParser.create() + overrides = parser.parse_overrides(overrides) + for o in overrides: + key = o.key_or_group + value = o.value() + if o.is_delete(): + # TODO support this + raise NotImplementedError("deletion is not yet a supported override") + safe_update(cfg, key, value) + else: + # Fallback. Does not support all the features and error checking like hydra. + for o in overrides: + key, value = o.split("=") + try: + value = eval(value, {}) + except NameError: + pass + safe_update(cfg, key, value) + return cfg + + # @staticmethod + # def to_py(cfg, prefix: str = "cfg."): + # """ + # Try to convert a config object into Python-like psuedo code. + # + # Note that perfect conversion is not always possible. So the returned + # results are mainly meant to be human-readable, and not meant to be executed. + # + # Args: + # cfg: an omegaconf config object + # prefix: root name for the resulting code (default: "cfg.") + # + # + # Returns: + # str of formatted Python code + # """ + # import black + # + # cfg = OmegaConf.to_container(cfg, resolve=True) + # + # def _to_str(obj, prefix=None, inside_call=False): + # if prefix is None: + # prefix = [] + # if isinstance(obj, abc.Mapping) and "_target_" in obj: + # # Dict representing a function call + # target = _convert_target_to_string(obj.pop("_target_")) + # args = [] + # for k, v in sorted(obj.items()): + # args.append(f"{k}={_to_str(v, inside_call=True)}") + # args = ", ".join(args) + # call = f"{target}({args})" + # return "".join(prefix) + call + # elif isinstance(obj, abc.Mapping) and not inside_call: + # # Dict that is not inside a call is a list of top-level config objects that we + # # render as one object per line with dot separated prefixes + # key_list = [] + # for k, v in sorted(obj.items()): + # if isinstance(v, abc.Mapping) and "_target_" not in v: + # key_list.append(_to_str(v, prefix=prefix + [k + "."])) + # else: + # key = "".join(prefix) + k + # key_list.append(f"{key}={_to_str(v)}") + # return "\n".join(key_list) + # elif isinstance(obj, abc.Mapping): + # # Dict that is inside a call is rendered as a regular dict + # return ( + # "{" + # + ",".join( + # f"{repr(k)}: {_to_str(v, inside_call=inside_call)}" + # for k, v in sorted(obj.items()) + # ) + # + "}" + # ) + # elif isinstance(obj, list): + # return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]" + # else: + # return repr(obj) + # + # py_str = _to_str(cfg, prefix=[prefix]) + # try: + # return black.format_str(py_str, mode=black.Mode()) + # except black.InvalidInput: + # return py_str diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..259f669b78bd05815cb8d3351fd6c5fc9a1b85a1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from . import transforms # isort:skip + +from .build import ( + build_batch_data_loader, + build_detection_test_loader, + build_detection_train_loader, + get_detection_dataset_dicts, + load_proposals_into_dataset, + print_instances_class_histogram, +) +from .catalog import DatasetCatalog, MetadataCatalog, Metadata +from .common import DatasetFromList, MapDataset, ToIterableDataset +from .dataset_mapper import DatasetMapper + +# ensure the builtin datasets are registered +from . import datasets, samplers # isort:skip + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/benchmark.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd650582c83cd032b4fe76303517cdfd9a2a8b4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/benchmark.py @@ -0,0 +1,225 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +from itertools import count +from typing import List, Tuple +import torch +import tqdm +from fvcore.common.timer import Timer + +from annotator.oneformer.detectron2.utils import comm + +from .build import build_batch_data_loader +from .common import DatasetFromList, MapDataset +from .samplers import TrainingSampler + +logger = logging.getLogger(__name__) + + +class _EmptyMapDataset(torch.utils.data.Dataset): + """ + Map anything to emptiness. + """ + + def __init__(self, dataset): + self.ds = dataset + + def __len__(self): + return len(self.ds) + + def __getitem__(self, idx): + _ = self.ds[idx] + return [0] + + +def iter_benchmark( + iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60 +) -> Tuple[float, List[float]]: + """ + Benchmark an iterator/iterable for `num_iter` iterations with an extra + `warmup` iterations of warmup. + End early if `max_time_seconds` time is spent on iterations. + + Returns: + float: average time (seconds) per iteration + list[float]: time spent on each iteration. Sometimes useful for further analysis. + """ + num_iter, warmup = int(num_iter), int(warmup) + + iterator = iter(iterator) + for _ in range(warmup): + next(iterator) + timer = Timer() + all_times = [] + for curr_iter in tqdm.trange(num_iter): + start = timer.seconds() + if start > max_time_seconds: + num_iter = curr_iter + break + next(iterator) + all_times.append(timer.seconds() - start) + avg = timer.seconds() / num_iter + return avg, all_times + + +class DataLoaderBenchmark: + """ + Some common benchmarks that help understand perf bottleneck of a standard dataloader + made of dataset, mapper and sampler. + """ + + def __init__( + self, + dataset, + *, + mapper, + sampler=None, + total_batch_size, + num_workers=0, + max_time_seconds: int = 90, + ): + """ + Args: + max_time_seconds (int): maximum time to spent for each benchmark + other args: same as in `build.py:build_detection_train_loader` + """ + if isinstance(dataset, list): + dataset = DatasetFromList(dataset, copy=False, serialize=True) + if sampler is None: + sampler = TrainingSampler(len(dataset)) + + self.dataset = dataset + self.mapper = mapper + self.sampler = sampler + self.total_batch_size = total_batch_size + self.num_workers = num_workers + self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size() + + self.max_time_seconds = max_time_seconds + + def _benchmark(self, iterator, num_iter, warmup, msg=None): + avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds) + if msg is not None: + self._log_time(msg, avg, all_times) + return avg, all_times + + def _log_time(self, msg, avg, all_times, distributed=False): + percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]] + if not distributed: + logger.info( + f"{msg}: avg={1.0/avg:.1f} it/s, " + f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " + f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s." + ) + return + avg_per_gpu = comm.all_gather(avg) + percentiles_per_gpu = comm.all_gather(percentiles) + if comm.get_rank() > 0: + return + for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu): + logger.info( + f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, " + f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " + f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s." + ) + + def benchmark_dataset(self, num_iter, warmup=5): + """ + Benchmark the speed of taking raw samples from the dataset. + """ + + def loader(): + while True: + for k in self.sampler: + yield self.dataset[k] + + self._benchmark(loader(), num_iter, warmup, "Dataset Alone") + + def benchmark_mapper(self, num_iter, warmup=5): + """ + Benchmark the speed of taking raw samples from the dataset and map + them in a single process. + """ + + def loader(): + while True: + for k in self.sampler: + yield self.mapper(self.dataset[k]) + + self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)") + + def benchmark_workers(self, num_iter, warmup=10): + """ + Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers]. + """ + candidates = [0, 1] + if self.num_workers not in candidates: + candidates.append(self.num_workers) + + dataset = MapDataset(self.dataset, self.mapper) + for n in candidates: + loader = build_batch_data_loader( + dataset, + self.sampler, + self.total_batch_size, + num_workers=n, + ) + self._benchmark( + iter(loader), + num_iter * max(n, 1), + warmup * max(n, 1), + f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})", + ) + del loader + + def benchmark_IPC(self, num_iter, warmup=10): + """ + Benchmark the dataloader where each worker outputs nothing. This + eliminates the IPC overhead compared to the regular dataloader. + + PyTorch multiprocessing's IPC only optimizes for torch tensors. + Large numpy arrays or other data structure may incur large IPC overhead. + """ + n = self.num_workers + dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper)) + loader = build_batch_data_loader( + dataset, self.sampler, self.total_batch_size, num_workers=n + ) + self._benchmark( + iter(loader), + num_iter * max(n, 1), + warmup * max(n, 1), + f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm", + ) + + def benchmark_distributed(self, num_iter, warmup=10): + """ + Benchmark the dataloader in each distributed worker, and log results of + all workers. This helps understand the final performance as well as + the variances among workers. + + It also prints startup time (first iter) of the dataloader. + """ + gpu = comm.get_world_size() + dataset = MapDataset(self.dataset, self.mapper) + n = self.num_workers + loader = build_batch_data_loader( + dataset, self.sampler, self.total_batch_size, num_workers=n + ) + + timer = Timer() + loader = iter(loader) + next(loader) + startup_time = timer.seconds() + logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time)) + + comm.synchronize() + + avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1)) + del loader + self._log_time( + f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})", + avg, + all_times, + True, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/build.py new file mode 100644 index 0000000000000000000000000000000000000000..d03137a9aabfc4a056dd671d4c3d0ba6f349fe03 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/build.py @@ -0,0 +1,556 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import logging +import numpy as np +import operator +import pickle +from typing import Any, Callable, Dict, List, Optional, Union +import torch +import torch.utils.data as torchdata +from tabulate import tabulate +from termcolor import colored + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.structures import BoxMode +from annotator.oneformer.detectron2.utils.comm import get_world_size +from annotator.oneformer.detectron2.utils.env import seed_all_rng +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import _log_api_usage, log_first_n + +from .catalog import DatasetCatalog, MetadataCatalog +from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset +from .dataset_mapper import DatasetMapper +from .detection_utils import check_metadata_consistency +from .samplers import ( + InferenceSampler, + RandomSubsetTrainingSampler, + RepeatFactorTrainingSampler, + TrainingSampler, +) + +""" +This file contains the default logic to build a dataloader for training or testing. +""" + +__all__ = [ + "build_batch_data_loader", + "build_detection_train_loader", + "build_detection_test_loader", + "get_detection_dataset_dicts", + "load_proposals_into_dataset", + "print_instances_class_histogram", +] + + +def filter_images_with_only_crowd_annotations(dataset_dicts): + """ + Filter out images with none annotations or only crowd annotations + (i.e., images without non-crowd annotations). + A common training-time preprocessing on COCO dataset. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + + Returns: + list[dict]: the same format, but filtered. + """ + num_before = len(dataset_dicts) + + def valid(anns): + for ann in anns: + if ann.get("iscrowd", 0) == 0: + return True + return False + + dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])] + num_after = len(dataset_dicts) + logger = logging.getLogger(__name__) + logger.info( + "Removed {} images with no usable annotations. {} images left.".format( + num_before - num_after, num_after + ) + ) + return dataset_dicts + + +def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image): + """ + Filter out images with too few number of keypoints. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + + Returns: + list[dict]: the same format as dataset_dicts, but filtered. + """ + num_before = len(dataset_dicts) + + def visible_keypoints_in_image(dic): + # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility + annotations = dic["annotations"] + return sum( + (np.array(ann["keypoints"][2::3]) > 0).sum() + for ann in annotations + if "keypoints" in ann + ) + + dataset_dicts = [ + x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image + ] + num_after = len(dataset_dicts) + logger = logging.getLogger(__name__) + logger.info( + "Removed {} images with fewer than {} keypoints.".format( + num_before - num_after, min_keypoints_per_image + ) + ) + return dataset_dicts + + +def load_proposals_into_dataset(dataset_dicts, proposal_file): + """ + Load precomputed object proposals into the dataset. + + The proposal file should be a pickled dict with the following keys: + + - "ids": list[int] or list[str], the image ids + - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id + - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores + corresponding to the boxes. + - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + proposal_file (str): file path of pre-computed proposals, in pkl format. + + Returns: + list[dict]: the same format as dataset_dicts, but added proposal field. + """ + logger = logging.getLogger(__name__) + logger.info("Loading proposals from: {}".format(proposal_file)) + + with PathManager.open(proposal_file, "rb") as f: + proposals = pickle.load(f, encoding="latin1") + + # Rename the key names in D1 proposal files + rename_keys = {"indexes": "ids", "scores": "objectness_logits"} + for key in rename_keys: + if key in proposals: + proposals[rename_keys[key]] = proposals.pop(key) + + # Fetch the indexes of all proposals that are in the dataset + # Convert image_id to str since they could be int. + img_ids = set({str(record["image_id"]) for record in dataset_dicts}) + id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids} + + # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' + bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS + + for record in dataset_dicts: + # Get the index of the proposal + i = id_to_index[str(record["image_id"])] + + boxes = proposals["boxes"][i] + objectness_logits = proposals["objectness_logits"][i] + # Sort the proposals in descending order of the scores + inds = objectness_logits.argsort()[::-1] + record["proposal_boxes"] = boxes[inds] + record["proposal_objectness_logits"] = objectness_logits[inds] + record["proposal_bbox_mode"] = bbox_mode + + return dataset_dicts + + +def print_instances_class_histogram(dataset_dicts, class_names): + """ + Args: + dataset_dicts (list[dict]): list of dataset dicts. + class_names (list[str]): list of class names (zero-indexed). + """ + num_classes = len(class_names) + hist_bins = np.arange(num_classes + 1) + histogram = np.zeros((num_classes,), dtype=np.int) + for entry in dataset_dicts: + annos = entry["annotations"] + classes = np.asarray( + [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int + ) + if len(classes): + assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}" + assert ( + classes.max() < num_classes + ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes" + histogram += np.histogram(classes, bins=hist_bins)[0] + + N_COLS = min(6, len(class_names) * 2) + + def short_name(x): + # make long class names shorter. useful for lvis + if len(x) > 13: + return x[:11] + ".." + return x + + data = list( + itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)]) + ) + total_num_instances = sum(data[1::2]) + data.extend([None] * (N_COLS - (len(data) % N_COLS))) + if num_classes > 1: + data.extend(["total", total_num_instances]) + data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + data, + headers=["category", "#instances"] * (N_COLS // 2), + tablefmt="pipe", + numalign="left", + stralign="center", + ) + log_first_n( + logging.INFO, + "Distribution of instances among all {} categories:\n".format(num_classes) + + colored(table, "cyan"), + key="message", + ) + + +def get_detection_dataset_dicts( + names, + filter_empty=True, + min_keypoints=0, + proposal_files=None, + check_consistency=True, +): + """ + Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. + + Args: + names (str or list[str]): a dataset name or a list of dataset names + filter_empty (bool): whether to filter out images without instance annotations + min_keypoints (int): filter out images with fewer keypoints than + `min_keypoints`. Set to 0 to do nothing. + proposal_files (list[str]): if given, a list of object proposal files + that match each dataset in `names`. + check_consistency (bool): whether to check if datasets have consistent metadata. + + Returns: + list[dict]: a list of dicts following the standard dataset dict format. + """ + if isinstance(names, str): + names = [names] + assert len(names), names + dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names] + + if isinstance(dataset_dicts[0], torchdata.Dataset): + if len(dataset_dicts) > 1: + # ConcatDataset does not work for iterable style dataset. + # We could support concat for iterable as well, but it's often + # not a good idea to concat iterables anyway. + return torchdata.ConcatDataset(dataset_dicts) + return dataset_dicts[0] + + for dataset_name, dicts in zip(names, dataset_dicts): + assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) + + if proposal_files is not None: + assert len(names) == len(proposal_files) + # load precomputed proposals from proposal files + dataset_dicts = [ + load_proposals_into_dataset(dataset_i_dicts, proposal_file) + for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) + ] + + dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) + + has_instances = "annotations" in dataset_dicts[0] + if filter_empty and has_instances: + dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) + if min_keypoints > 0 and has_instances: + dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints) + + if check_consistency and has_instances: + try: + class_names = MetadataCatalog.get(names[0]).thing_classes + check_metadata_consistency("thing_classes", names) + print_instances_class_histogram(dataset_dicts, class_names) + except AttributeError: # class names are not available for this dataset + pass + + assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names)) + return dataset_dicts + + +def build_batch_data_loader( + dataset, + sampler, + total_batch_size, + *, + aspect_ratio_grouping=False, + num_workers=0, + collate_fn=None, +): + """ + Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are: + 1. support aspect ratio grouping options + 2. use no "batch collation", because this is common for detection training + + Args: + dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset. + sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices. + Must be provided iff. ``dataset`` is a map-style dataset. + total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see + :func:`build_detection_train_loader`. + + Returns: + iterable[list]. Length of each list is the batch size of the current + GPU. Each element in the list comes from the dataset. + """ + world_size = get_world_size() + assert ( + total_batch_size > 0 and total_batch_size % world_size == 0 + ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format( + total_batch_size, world_size + ) + batch_size = total_batch_size // world_size + + if isinstance(dataset, torchdata.IterableDataset): + assert sampler is None, "sampler must be None if dataset is IterableDataset" + else: + dataset = ToIterableDataset(dataset, sampler) + + if aspect_ratio_grouping: + data_loader = torchdata.DataLoader( + dataset, + num_workers=num_workers, + collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements + worker_init_fn=worker_init_reset_seed, + ) # yield individual mapped dict + data_loader = AspectRatioGroupedDataset(data_loader, batch_size) + if collate_fn is None: + return data_loader + return MapDataset(data_loader, collate_fn) + else: + return torchdata.DataLoader( + dataset, + batch_size=batch_size, + drop_last=True, + num_workers=num_workers, + collate_fn=trivial_batch_collator if collate_fn is None else collate_fn, + worker_init_fn=worker_init_reset_seed, + ) + + +def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None): + if dataset is None: + dataset = get_detection_dataset_dicts( + cfg.DATASETS.TRAIN, + filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, + min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE + if cfg.MODEL.KEYPOINT_ON + else 0, + proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, + ) + _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0]) + + if mapper is None: + mapper = DatasetMapper(cfg, True) + + if sampler is None: + sampler_name = cfg.DATALOADER.SAMPLER_TRAIN + logger = logging.getLogger(__name__) + if isinstance(dataset, torchdata.IterableDataset): + logger.info("Not using any sampler since the dataset is IterableDataset.") + sampler = None + else: + logger.info("Using training sampler {}".format(sampler_name)) + if sampler_name == "TrainingSampler": + sampler = TrainingSampler(len(dataset)) + elif sampler_name == "RepeatFactorTrainingSampler": + repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( + dataset, cfg.DATALOADER.REPEAT_THRESHOLD + ) + sampler = RepeatFactorTrainingSampler(repeat_factors) + elif sampler_name == "RandomSubsetTrainingSampler": + sampler = RandomSubsetTrainingSampler( + len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO + ) + else: + raise ValueError("Unknown training sampler: {}".format(sampler_name)) + + return { + "dataset": dataset, + "sampler": sampler, + "mapper": mapper, + "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, + "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING, + "num_workers": cfg.DATALOADER.NUM_WORKERS, + } + + +@configurable(from_config=_train_loader_from_config) +def build_detection_train_loader( + dataset, + *, + mapper, + sampler=None, + total_batch_size, + aspect_ratio_grouping=True, + num_workers=0, + collate_fn=None, +): + """ + Build a dataloader for object detection with some default features. + + Args: + dataset (list or torch.utils.data.Dataset): a list of dataset dicts, + or a pytorch dataset (either map-style or iterable). It can be obtained + by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. + mapper (callable): a callable which takes a sample (dict) from dataset and + returns the format to be consumed by the model. + When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``. + sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces + indices to be applied on ``dataset``. + If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`, + which coordinates an infinite random shuffle sequence across all workers. + Sampler must be None if ``dataset`` is iterable. + total_batch_size (int): total batch size across all workers. + aspect_ratio_grouping (bool): whether to group images with similar + aspect ratio for efficiency. When enabled, it requires each + element in dataset be a dict with keys "width" and "height". + num_workers (int): number of parallel data loading workers + collate_fn: a function that determines how to do batching, same as the argument of + `torch.utils.data.DataLoader`. Defaults to do no collation and return a list of + data. No collation is OK for small batch size and simple data structures. + If your batch size is large and each sample contains too many small tensors, + it's more efficient to collate them in data loader. + + Returns: + torch.utils.data.DataLoader: + a dataloader. Each output from it is a ``list[mapped_element]`` of length + ``total_batch_size / num_workers``, where ``mapped_element`` is produced + by the ``mapper``. + """ + if isinstance(dataset, list): + dataset = DatasetFromList(dataset, copy=False) + if mapper is not None: + dataset = MapDataset(dataset, mapper) + + if isinstance(dataset, torchdata.IterableDataset): + assert sampler is None, "sampler must be None if dataset is IterableDataset" + else: + if sampler is None: + sampler = TrainingSampler(len(dataset)) + assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}" + return build_batch_data_loader( + dataset, + sampler, + total_batch_size, + aspect_ratio_grouping=aspect_ratio_grouping, + num_workers=num_workers, + collate_fn=collate_fn, + ) + + +def _test_loader_from_config(cfg, dataset_name, mapper=None): + """ + Uses the given `dataset_name` argument (instead of the names in cfg), because the + standard practice is to evaluate each test set individually (not combining them). + """ + if isinstance(dataset_name, str): + dataset_name = [dataset_name] + + dataset = get_detection_dataset_dicts( + dataset_name, + filter_empty=False, + proposal_files=[ + cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] for x in dataset_name + ] + if cfg.MODEL.LOAD_PROPOSALS + else None, + ) + if mapper is None: + mapper = DatasetMapper(cfg, False) + return { + "dataset": dataset, + "mapper": mapper, + "num_workers": cfg.DATALOADER.NUM_WORKERS, + "sampler": InferenceSampler(len(dataset)) + if not isinstance(dataset, torchdata.IterableDataset) + else None, + } + + +@configurable(from_config=_test_loader_from_config) +def build_detection_test_loader( + dataset: Union[List[Any], torchdata.Dataset], + *, + mapper: Callable[[Dict[str, Any]], Any], + sampler: Optional[torchdata.Sampler] = None, + batch_size: int = 1, + num_workers: int = 0, + collate_fn: Optional[Callable[[List[Any]], Any]] = None, +) -> torchdata.DataLoader: + """ + Similar to `build_detection_train_loader`, with default batch size = 1, + and sampler = :class:`InferenceSampler`. This sampler coordinates all workers + to produce the exact set of all samples. + + Args: + dataset: a list of dataset dicts, + or a pytorch dataset (either map-style or iterable). They can be obtained + by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. + mapper: a callable which takes a sample (dict) from dataset + and returns the format to be consumed by the model. + When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``. + sampler: a sampler that produces + indices to be applied on ``dataset``. Default to :class:`InferenceSampler`, + which splits the dataset across all workers. Sampler must be None + if `dataset` is iterable. + batch_size: the batch size of the data loader to be created. + Default to 1 image per worker since this is the standard when reporting + inference time in papers. + num_workers: number of parallel data loading workers + collate_fn: same as the argument of `torch.utils.data.DataLoader`. + Defaults to do no collation and return a list of data. + + Returns: + DataLoader: a torch DataLoader, that loads the given detection + dataset, with test-time transformation and batching. + + Examples: + :: + data_loader = build_detection_test_loader( + DatasetRegistry.get("my_test"), + mapper=DatasetMapper(...)) + + # or, instantiate with a CfgNode: + data_loader = build_detection_test_loader(cfg, "my_test") + """ + if isinstance(dataset, list): + dataset = DatasetFromList(dataset, copy=False) + if mapper is not None: + dataset = MapDataset(dataset, mapper) + if isinstance(dataset, torchdata.IterableDataset): + assert sampler is None, "sampler must be None if dataset is IterableDataset" + else: + if sampler is None: + sampler = InferenceSampler(len(dataset)) + return torchdata.DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + drop_last=False, + num_workers=num_workers, + collate_fn=trivial_batch_collator if collate_fn is None else collate_fn, + ) + + +def trivial_batch_collator(batch): + """ + A batch collator that does nothing. + """ + return batch + + +def worker_init_reset_seed(worker_id): + initial_seed = torch.initial_seed() % 2**31 + seed_all_rng(initial_seed + worker_id) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/catalog.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/catalog.py new file mode 100644 index 0000000000000000000000000000000000000000..4f5209b5583d01258437bdc9b52a3dd716bdbbf6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/catalog.py @@ -0,0 +1,236 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import types +from collections import UserDict +from typing import List + +from annotator.oneformer.detectron2.utils.logger import log_first_n + +__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"] + + +class _DatasetCatalog(UserDict): + """ + A global dictionary that stores information about the datasets and how to obtain them. + + It contains a mapping from strings + (which are names that identify a dataset, e.g. "coco_2014_train") + to a function which parses the dataset and returns the samples in the + format of `list[dict]`. + + The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details) + if used with the data loader functionalities in `data/build.py,data/detection_transform.py`. + + The purpose of having this catalog is to make it easy to choose + different datasets, by just using the strings in the config. + """ + + def register(self, name, func): + """ + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + func (callable): a callable which takes no arguments and returns a list of dicts. + It must return the same results if called multiple times. + """ + assert callable(func), "You must register a function with `DatasetCatalog.register`!" + assert name not in self, "Dataset '{}' is already registered!".format(name) + self[name] = func + + def get(self, name): + """ + Call the registered function and return its results. + + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + + Returns: + list[dict]: dataset annotations. + """ + try: + f = self[name] + except KeyError as e: + raise KeyError( + "Dataset '{}' is not registered! Available datasets are: {}".format( + name, ", ".join(list(self.keys())) + ) + ) from e + return f() + + def list(self) -> List[str]: + """ + List all registered datasets. + + Returns: + list[str] + """ + return list(self.keys()) + + def remove(self, name): + """ + Alias of ``pop``. + """ + self.pop(name) + + def __str__(self): + return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys())) + + __repr__ = __str__ + + +DatasetCatalog = _DatasetCatalog() +DatasetCatalog.__doc__ = ( + _DatasetCatalog.__doc__ + + """ + .. automethod:: detectron2.data.catalog.DatasetCatalog.register + .. automethod:: detectron2.data.catalog.DatasetCatalog.get +""" +) + + +class Metadata(types.SimpleNamespace): + """ + A class that supports simple attribute setter/getter. + It is intended for storing metadata of a dataset and make it accessible globally. + + Examples: + :: + # somewhere when you load the data: + MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"] + + # somewhere when you print statistics or visualize: + classes = MetadataCatalog.get("mydataset").thing_classes + """ + + # the name of the dataset + # set default to N/A so that `self.name` in the errors will not trigger getattr again + name: str = "N/A" + + _RENAMED = { + "class_names": "thing_classes", + "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id", + "stuff_class_names": "stuff_classes", + } + + def __getattr__(self, key): + if key in self._RENAMED: + log_first_n( + logging.WARNING, + "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), + n=10, + ) + return getattr(self, self._RENAMED[key]) + + # "name" exists in every metadata + if len(self.__dict__) > 1: + raise AttributeError( + "Attribute '{}' does not exist in the metadata of dataset '{}'. Available " + "keys are {}.".format(key, self.name, str(self.__dict__.keys())) + ) + else: + raise AttributeError( + f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': " + "metadata is empty." + ) + + def __setattr__(self, key, val): + if key in self._RENAMED: + log_first_n( + logging.WARNING, + "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), + n=10, + ) + setattr(self, self._RENAMED[key], val) + + # Ensure that metadata of the same name stays consistent + try: + oldval = getattr(self, key) + assert oldval == val, ( + "Attribute '{}' in the metadata of '{}' cannot be set " + "to a different value!\n{} != {}".format(key, self.name, oldval, val) + ) + except AttributeError: + super().__setattr__(key, val) + + def as_dict(self): + """ + Returns all the metadata as a dict. + Note that modifications to the returned dict will not reflect on the Metadata object. + """ + return copy.copy(self.__dict__) + + def set(self, **kwargs): + """ + Set multiple metadata with kwargs. + """ + for k, v in kwargs.items(): + setattr(self, k, v) + return self + + def get(self, key, default=None): + """ + Access an attribute and return its value if exists. + Otherwise return default. + """ + try: + return getattr(self, key) + except AttributeError: + return default + + +class _MetadataCatalog(UserDict): + """ + MetadataCatalog is a global dictionary that provides access to + :class:`Metadata` of a given dataset. + + The metadata associated with a certain name is a singleton: once created, the + metadata will stay alive and will be returned by future calls to ``get(name)``. + + It's like global variables, so don't abuse it. + It's meant for storing knowledge that's constant and shared across the execution + of the program, e.g.: the class names in COCO. + """ + + def get(self, name): + """ + Args: + name (str): name of a dataset (e.g. coco_2014_train). + + Returns: + Metadata: The :class:`Metadata` instance associated with this name, + or create an empty one if none is available. + """ + assert len(name) + r = super().get(name, None) + if r is None: + r = self[name] = Metadata(name=name) + return r + + def list(self): + """ + List all registered metadata. + + Returns: + list[str]: keys (names of datasets) of all registered metadata + """ + return list(self.keys()) + + def remove(self, name): + """ + Alias of ``pop``. + """ + self.pop(name) + + def __str__(self): + return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys())) + + __repr__ = __str__ + + +MetadataCatalog = _MetadataCatalog() +MetadataCatalog.__doc__ = ( + _MetadataCatalog.__doc__ + + """ + .. automethod:: detectron2.data.catalog.MetadataCatalog.get +""" +) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/common.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/common.py new file mode 100644 index 0000000000000000000000000000000000000000..aa69a6a6546030aee818b195a0fbb399d5b776f6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/common.py @@ -0,0 +1,301 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import contextlib +import copy +import itertools +import logging +import numpy as np +import pickle +import random +from typing import Callable, Union +import torch +import torch.utils.data as data +from torch.utils.data.sampler import Sampler + +from annotator.oneformer.detectron2.utils.serialize import PicklableWrapper + +__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"] + +logger = logging.getLogger(__name__) + + +def _shard_iterator_dataloader_worker(iterable): + # Shard the iterable if we're currently inside pytorch dataloader worker. + worker_info = data.get_worker_info() + if worker_info is None or worker_info.num_workers == 1: + # do nothing + yield from iterable + else: + yield from itertools.islice(iterable, worker_info.id, None, worker_info.num_workers) + + +class _MapIterableDataset(data.IterableDataset): + """ + Map a function over elements in an IterableDataset. + + Similar to pytorch's MapIterDataPipe, but support filtering when map_func + returns None. + + This class is not public-facing. Will be called by `MapDataset`. + """ + + def __init__(self, dataset, map_func): + self._dataset = dataset + self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work + + def __len__(self): + return len(self._dataset) + + def __iter__(self): + for x in map(self._map_func, self._dataset): + if x is not None: + yield x + + +class MapDataset(data.Dataset): + """ + Map a function over the elements in a dataset. + """ + + def __init__(self, dataset, map_func): + """ + Args: + dataset: a dataset where map function is applied. Can be either + map-style or iterable dataset. When given an iterable dataset, + the returned object will also be an iterable dataset. + map_func: a callable which maps the element in dataset. map_func can + return None to skip the data (e.g. in case of errors). + How None is handled depends on the style of `dataset`. + If `dataset` is map-style, it randomly tries other elements. + If `dataset` is iterable, it skips the data and tries the next. + """ + self._dataset = dataset + self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work + + self._rng = random.Random(42) + self._fallback_candidates = set(range(len(dataset))) + + def __new__(cls, dataset, map_func): + is_iterable = isinstance(dataset, data.IterableDataset) + if is_iterable: + return _MapIterableDataset(dataset, map_func) + else: + return super().__new__(cls) + + def __getnewargs__(self): + return self._dataset, self._map_func + + def __len__(self): + return len(self._dataset) + + def __getitem__(self, idx): + retry_count = 0 + cur_idx = int(idx) + + while True: + data = self._map_func(self._dataset[cur_idx]) + if data is not None: + self._fallback_candidates.add(cur_idx) + return data + + # _map_func fails for this idx, use a random new index from the pool + retry_count += 1 + self._fallback_candidates.discard(cur_idx) + cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] + + if retry_count >= 3: + logger = logging.getLogger(__name__) + logger.warning( + "Failed to apply `_map_func` for idx: {}, retry count: {}".format( + idx, retry_count + ) + ) + + +class _TorchSerializedList(object): + """ + A list-like object whose items are serialized and stored in a torch tensor. When + launching a process that uses TorchSerializedList with "fork" start method, + the subprocess can read the same buffer without triggering copy-on-access. When + launching a process that uses TorchSerializedList with "spawn/forkserver" start + method, the list will be pickled by a special ForkingPickler registered by PyTorch + that moves data to shared memory. In both cases, this allows parent and child + processes to share RAM for the list data, hence avoids the issue in + https://github.com/pytorch/pytorch/issues/13246. + + See also https://ppwwyyxx.com/blog/2022/Demystify-RAM-Usage-in-Multiprocess-DataLoader/ + on how it works. + """ + + def __init__(self, lst: list): + self._lst = lst + + def _serialize(data): + buffer = pickle.dumps(data, protocol=-1) + return np.frombuffer(buffer, dtype=np.uint8) + + logger.info( + "Serializing {} elements to byte tensors and concatenating them all ...".format( + len(self._lst) + ) + ) + self._lst = [_serialize(x) for x in self._lst] + self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64) + self._addr = torch.from_numpy(np.cumsum(self._addr)) + self._lst = torch.from_numpy(np.concatenate(self._lst)) + logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2)) + + def __len__(self): + return len(self._addr) + + def __getitem__(self, idx): + start_addr = 0 if idx == 0 else self._addr[idx - 1].item() + end_addr = self._addr[idx].item() + bytes = memoryview(self._lst[start_addr:end_addr].numpy()) + + # @lint-ignore PYTHONPICKLEISBAD + return pickle.loads(bytes) + + +_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = _TorchSerializedList + + +@contextlib.contextmanager +def set_default_dataset_from_list_serialize_method(new): + """ + Context manager for using custom serialize function when creating DatasetFromList + """ + + global _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD + orig = _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD + _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = new + yield + _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = orig + + +class DatasetFromList(data.Dataset): + """ + Wrap a list to a torch Dataset. It produces elements of the list as data. + """ + + def __init__( + self, + lst: list, + copy: bool = True, + serialize: Union[bool, Callable] = True, + ): + """ + Args: + lst (list): a list which contains elements to produce. + copy (bool): whether to deepcopy the element when producing it, + so that the result can be modified in place without affecting the + source in the list. + serialize (bool or callable): whether to serialize the stroage to other + backend. If `True`, the default serialize method will be used, if given + a callable, the callable will be used as serialize method. + """ + self._lst = lst + self._copy = copy + if not isinstance(serialize, (bool, Callable)): + raise TypeError(f"Unsupported type for argument `serailzie`: {serialize}") + self._serialize = serialize is not False + + if self._serialize: + serialize_method = ( + serialize + if isinstance(serialize, Callable) + else _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD + ) + logger.info(f"Serializing the dataset using: {serialize_method}") + self._lst = serialize_method(self._lst) + + def __len__(self): + return len(self._lst) + + def __getitem__(self, idx): + if self._copy and not self._serialize: + return copy.deepcopy(self._lst[idx]) + else: + return self._lst[idx] + + +class ToIterableDataset(data.IterableDataset): + """ + Convert an old indices-based (also called map-style) dataset + to an iterable-style dataset. + """ + + def __init__(self, dataset: data.Dataset, sampler: Sampler, shard_sampler: bool = True): + """ + Args: + dataset: an old-style dataset with ``__getitem__`` + sampler: a cheap iterable that produces indices to be applied on ``dataset``. + shard_sampler: whether to shard the sampler based on the current pytorch data loader + worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple + workers, it is responsible for sharding its data based on worker id so that workers + don't produce identical data. + + Most samplers (like our TrainingSampler) do not shard based on dataloader worker id + and this argument should be set to True. But certain samplers may be already + sharded, in that case this argument should be set to False. + """ + assert not isinstance(dataset, data.IterableDataset), dataset + assert isinstance(sampler, Sampler), sampler + self.dataset = dataset + self.sampler = sampler + self.shard_sampler = shard_sampler + + def __iter__(self): + if not self.shard_sampler: + sampler = self.sampler + else: + # With map-style dataset, `DataLoader(dataset, sampler)` runs the + # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))` + # will run sampler in every of the N worker. So we should only keep 1/N of the ids on + # each worker. The assumption is that sampler is cheap to iterate so it's fine to + # discard ids in workers. + sampler = _shard_iterator_dataloader_worker(self.sampler) + for idx in sampler: + yield self.dataset[idx] + + def __len__(self): + return len(self.sampler) + + +class AspectRatioGroupedDataset(data.IterableDataset): + """ + Batch data that have similar aspect ratio together. + In this implementation, images whose aspect ratio < (or >) 1 will + be batched together. + This improves training speed because the images then need less padding + to form a batch. + + It assumes the underlying dataset produces dicts with "width" and "height" keys. + It will then produce a list of original dicts with length = batch_size, + all with similar aspect ratios. + """ + + def __init__(self, dataset, batch_size): + """ + Args: + dataset: an iterable. Each element must be a dict with keys + "width" and "height", which will be used to batch data. + batch_size (int): + """ + self.dataset = dataset + self.batch_size = batch_size + self._buckets = [[] for _ in range(2)] + # Hard-coded two aspect ratio groups: w > h and w < h. + # Can add support for more aspect ratio groups, but doesn't seem useful + + def __iter__(self): + for d in self.dataset: + w, h = d["width"], d["height"] + bucket_id = 0 if w > h else 1 + bucket = self._buckets[bucket_id] + bucket.append(d) + if len(bucket) == self.batch_size: + data = bucket[:] + # Clear bucket first, because code after yield is not + # guaranteed to execute + del bucket[:] + yield data diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/dataset_mapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..3bb6bb1057a68bfb12e55872f391065f02023ed3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/dataset_mapper.py @@ -0,0 +1,191 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import numpy as np +from typing import List, Optional, Union +import torch + +from annotator.oneformer.detectron2.config import configurable + +from . import detection_utils as utils +from . import transforms as T + +""" +This file contains the default mapping that's applied to "dataset dicts". +""" + +__all__ = ["DatasetMapper"] + + +class DatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by the model. + + This is the default callable to be used to map your dataset dict into training data. + You may need to follow it to implement your own one for customized logic, + such as a different way to read or transform images. + See :doc:`/tutorials/data_loading` for details. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies cropping/geometric transforms to the image and annotations + 3. Prepare data and annotations to Tensor and :class:`Instances` + """ + + @configurable + def __init__( + self, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = T.AugmentationList(augmentations) + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + # fmt: on + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON + else: + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "recompute_boxes": recompute_boxes, + } + + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def _transform_annotations(self, dataset_dict, transforms, image_shape): + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + # After transforms such as cropping are applied, the bounding box may no longer + # tightly bound the object. As an example, imagine a triangle object + # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight + # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to + # the intersection of original bounding box and the cropping box. + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + # USER: Write your own image loading if it's not from a file + image = utils.read_image(dataset_dict["file_name"], format=self.image_format) + utils.check_image_size(dataset_dict, image) + + # USER: Remove if you don't do semantic/panoptic segmentation. + if "sem_seg_file_name" in dataset_dict: + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) + else: + sem_seg_gt = None + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + # USER: Remove if you don't use pre-computed proposals. + # Most users would not need this feature. + if self.proposal_topk is not None: + utils.transform_proposals( + dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk + ) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + dataset_dict.pop("sem_seg_file_name", None) + return dataset_dict + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + return dataset_dict diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9fb3e4f7afec17137c95c78be6ef06d520ec8032 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/README.md @@ -0,0 +1,9 @@ + + +### Common Datasets + +The dataset implemented here do not need to load the data into the final format. +It should provide the minimal data structure needed to use the dataset, so it can be very efficient. + +For example, for an image dataset, just provide the file names and labels, but don't read the images. +Let the downstream decide how to read. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a44bedc15e5f0e762fc4d77efd6f1b07c6ff77d0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json +from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated +from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta +from .pascal_voc import load_voc_instances, register_pascal_voc +from . import builtin as _builtin # ensure the builtin datasets are registered + + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin.py new file mode 100644 index 0000000000000000000000000000000000000000..39bbb1feec64f76705ba32c46f19f89f71be2ca7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + + +""" +This file registers pre-defined datasets at hard-coded paths, and their metadata. + +We hard-code metadata for common datasets. This will enable: +1. Consistency check when loading the datasets +2. Use models on these standard datasets directly and run demos, + without having to download the dataset annotations + +We hard-code some paths to the dataset that's assumed to +exist in "./datasets/". + +Users SHOULD NOT use this file to create new dataset / metadata for new dataset. +To add new dataset, refer to the tutorial "docs/DATASETS.md". +""" + +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog + +from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata +from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic +from .cityscapes_panoptic import register_all_cityscapes_panoptic +from .coco import load_sem_seg, register_coco_instances +from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated +from .lvis import get_lvis_instances_meta, register_lvis_instances +from .pascal_voc import register_pascal_voc + +# ==== Predefined datasets and splits for COCO ========== + +_PREDEFINED_SPLITS_COCO = {} +_PREDEFINED_SPLITS_COCO["coco"] = { + "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"), + "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"), + "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"), + "coco_2014_valminusminival": ( + "coco/val2014", + "coco/annotations/instances_valminusminival2014.json", + ), + "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"), + "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"), + "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"), + "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"), + "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"), +} + +_PREDEFINED_SPLITS_COCO["coco_person"] = { + "keypoints_coco_2014_train": ( + "coco/train2014", + "coco/annotations/person_keypoints_train2014.json", + ), + "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"), + "keypoints_coco_2014_minival": ( + "coco/val2014", + "coco/annotations/person_keypoints_minival2014.json", + ), + "keypoints_coco_2014_valminusminival": ( + "coco/val2014", + "coco/annotations/person_keypoints_valminusminival2014.json", + ), + "keypoints_coco_2017_train": ( + "coco/train2017", + "coco/annotations/person_keypoints_train2017.json", + ), + "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"), + "keypoints_coco_2017_val_100": ( + "coco/val2017", + "coco/annotations/person_keypoints_val2017_100.json", + ), +} + + +_PREDEFINED_SPLITS_COCO_PANOPTIC = { + "coco_2017_train_panoptic": ( + # This is the original panoptic annotation directory + "coco/panoptic_train2017", + "coco/annotations/panoptic_train2017.json", + # This directory contains semantic annotations that are + # converted from panoptic annotations. + # It is used by PanopticFPN. + # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py + # to create these directories. + "coco/panoptic_stuff_train2017", + ), + "coco_2017_val_panoptic": ( + "coco/panoptic_val2017", + "coco/annotations/panoptic_val2017.json", + "coco/panoptic_stuff_val2017", + ), + "coco_2017_val_100_panoptic": ( + "coco/panoptic_val2017_100", + "coco/annotations/panoptic_val2017_100.json", + "coco/panoptic_stuff_val2017_100", + ), +} + + +def register_all_coco(root): + for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): + for key, (image_root, json_file) in splits_per_dataset.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_builtin_metadata(dataset_name), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + for ( + prefix, + (panoptic_root, panoptic_json, semantic_root), + ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): + prefix_instances = prefix[: -len("_panoptic")] + instances_meta = MetadataCatalog.get(prefix_instances) + image_root, instances_json = instances_meta.image_root, instances_meta.json_file + # The "separated" version of COCO panoptic segmentation dataset, + # e.g. used by Panoptic FPN + register_coco_panoptic_separated( + prefix, + _get_builtin_metadata("coco_panoptic_separated"), + image_root, + os.path.join(root, panoptic_root), + os.path.join(root, panoptic_json), + os.path.join(root, semantic_root), + instances_json, + ) + # The "standard" version of COCO panoptic segmentation dataset, + # e.g. used by Panoptic-DeepLab + register_coco_panoptic( + prefix, + _get_builtin_metadata("coco_panoptic_standard"), + image_root, + os.path.join(root, panoptic_root), + os.path.join(root, panoptic_json), + instances_json, + ) + + +# ==== Predefined datasets and splits for LVIS ========== + + +_PREDEFINED_SPLITS_LVIS = { + "lvis_v1": { + "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"), + "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"), + "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"), + "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"), + }, + "lvis_v0.5": { + "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"), + "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"), + "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"), + "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"), + }, + "lvis_v0.5_cocofied": { + "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"), + "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"), + }, +} + + +def register_all_lvis(root): + for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items(): + for key, (image_root, json_file) in splits_per_dataset.items(): + register_lvis_instances( + key, + get_lvis_instances_meta(dataset_name), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +# ==== Predefined splits for raw cityscapes images =========== +_RAW_CITYSCAPES_SPLITS = { + "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"), + "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"), + "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"), +} + + +def register_all_cityscapes(root): + for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): + meta = _get_builtin_metadata("cityscapes") + image_dir = os.path.join(root, image_dir) + gt_dir = os.path.join(root, gt_dir) + + inst_key = key.format(task="instance_seg") + DatasetCatalog.register( + inst_key, + lambda x=image_dir, y=gt_dir: load_cityscapes_instances( + x, y, from_json=True, to_polygons=True + ), + ) + MetadataCatalog.get(inst_key).set( + image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta + ) + + sem_key = key.format(task="sem_seg") + DatasetCatalog.register( + sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y) + ) + MetadataCatalog.get(sem_key).set( + image_dir=image_dir, + gt_dir=gt_dir, + evaluator_type="cityscapes_sem_seg", + ignore_label=255, + **meta, + ) + + +# ==== Predefined splits for PASCAL VOC =========== +def register_all_pascal_voc(root): + SPLITS = [ + ("voc_2007_trainval", "VOC2007", "trainval"), + ("voc_2007_train", "VOC2007", "train"), + ("voc_2007_val", "VOC2007", "val"), + ("voc_2007_test", "VOC2007", "test"), + ("voc_2012_trainval", "VOC2012", "trainval"), + ("voc_2012_train", "VOC2012", "train"), + ("voc_2012_val", "VOC2012", "val"), + ] + for name, dirname, split in SPLITS: + year = 2007 if "2007" in name else 2012 + register_pascal_voc(name, os.path.join(root, dirname), split, year) + MetadataCatalog.get(name).evaluator_type = "pascal_voc" + + +def register_all_ade20k(root): + root = os.path.join(root, "ADEChallengeData2016") + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_detectron2", dirname) + name = f"ade20k_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + ) + + +# True for open source; +# Internally at fb, we register them elsewhere +if __name__.endswith(".builtin"): + # Assume pre-defined datasets live in `./datasets`. + _root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets")) + register_all_coco(_root) + register_all_lvis(_root) + register_all_cityscapes(_root) + register_all_cityscapes_panoptic(_root) + register_all_pascal_voc(_root) + register_all_ade20k(_root) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin_meta.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin_meta.py new file mode 100644 index 0000000000000000000000000000000000000000..63c7a1a31b31dd89b82011effee26471faccacf5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/builtin_meta.py @@ -0,0 +1,350 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +Note: +For your custom dataset, there is no need to hard-code metadata anywhere in the code. +For example, for COCO-format dataset, metadata will be obtained automatically +when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways +during loading. + +However, we hard-coded metadata for a few common dataset here. +The only goal is to allow users who don't have these dataset to use pre-trained models. +Users don't have to download a COCO json (which contains metadata), in order to visualize a +COCO model (with correct class names and colors). +""" + + +# All coco categories, together with their nice-looking visualization colors +# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json +COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, + {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, + {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, + {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, + {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, + {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, + {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, + {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, + {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, + {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, + {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, + {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, + {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, + {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, + {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, + {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, + {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, + {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, + {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, + {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, + {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, + {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, + {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, + {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, + {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, + {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, + {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, + {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, + {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, + {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, + {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, + {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, + {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, + {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, + {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, + {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, + {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, + {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, + {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, + {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, + {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, + {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, + {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, + {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, + {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, + {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, + {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, + {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, + {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, + {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, + {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, + {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, + {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, +] + +# fmt: off +COCO_PERSON_KEYPOINT_NAMES = ( + "nose", + "left_eye", "right_eye", + "left_ear", "right_ear", + "left_shoulder", "right_shoulder", + "left_elbow", "right_elbow", + "left_wrist", "right_wrist", + "left_hip", "right_hip", + "left_knee", "right_knee", + "left_ankle", "right_ankle", +) +# fmt: on + +# Pairs of keypoints that should be exchanged under horizontal flipping +COCO_PERSON_KEYPOINT_FLIP_MAP = ( + ("left_eye", "right_eye"), + ("left_ear", "right_ear"), + ("left_shoulder", "right_shoulder"), + ("left_elbow", "right_elbow"), + ("left_wrist", "right_wrist"), + ("left_hip", "right_hip"), + ("left_knee", "right_knee"), + ("left_ankle", "right_ankle"), +) + +# rules for pairs of keypoints to draw a line between, and the line color to use. +KEYPOINT_CONNECTION_RULES = [ + # face + ("left_ear", "left_eye", (102, 204, 255)), + ("right_ear", "right_eye", (51, 153, 255)), + ("left_eye", "nose", (102, 0, 204)), + ("nose", "right_eye", (51, 102, 255)), + # upper-body + ("left_shoulder", "right_shoulder", (255, 128, 0)), + ("left_shoulder", "left_elbow", (153, 255, 204)), + ("right_shoulder", "right_elbow", (128, 229, 255)), + ("left_elbow", "left_wrist", (153, 255, 153)), + ("right_elbow", "right_wrist", (102, 255, 224)), + # lower-body + ("left_hip", "right_hip", (255, 102, 0)), + ("left_hip", "left_knee", (255, 255, 77)), + ("right_hip", "right_knee", (153, 255, 204)), + ("left_knee", "left_ankle", (191, 255, 128)), + ("right_knee", "right_ankle", (255, 195, 77)), +] + +# All Cityscapes categories, together with their nice-looking visualization colors +# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py # noqa +CITYSCAPES_CATEGORIES = [ + {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"}, + {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"}, + {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"}, + {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"}, + {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"}, + {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"}, + {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"}, + {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"}, + {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"}, + {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"}, + {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"}, + {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"}, + {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"}, + {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"}, + {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"}, + {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"}, + {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"}, + {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"}, + {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"}, +] + +# fmt: off +ADE20K_SEM_SEG_CATEGORIES = [ + "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa +] +# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore +# fmt: on + + +def _get_coco_instances_meta(): + thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] + assert len(thing_ids) == 80, len(thing_ids) + # Mapping from the incontiguous COCO category id to an id in [0, 79] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + "thing_colors": thing_colors, + } + return ret + + +def _get_coco_panoptic_separated_meta(): + """ + Returns metadata for "separated" version of the panoptic segmentation dataset. + """ + stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0] + assert len(stuff_ids) == 53, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 53], used in models) to ids in the dataset (used for processing results) + # The id 0 is mapped to an extra category "thing". + stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)} + # When converting COCO panoptic annotations to semantic annotations + # We label the "thing" category to 0 + stuff_dataset_id_to_contiguous_id[0] = 0 + + # 54 names for COCO stuff categories (including "things") + stuff_classes = ["things"] + [ + k["name"].replace("-other", "").replace("-merged", "") + for k in COCO_CATEGORIES + if k["isthing"] == 0 + ] + + # NOTE: I randomly picked a color for things + stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0] + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + "stuff_colors": stuff_colors, + } + ret.update(_get_coco_instances_meta()) + return ret + + +def _get_builtin_metadata(dataset_name): + if dataset_name == "coco": + return _get_coco_instances_meta() + if dataset_name == "coco_panoptic_separated": + return _get_coco_panoptic_separated_meta() + elif dataset_name == "coco_panoptic_standard": + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in COCO_CATEGORIES] + thing_colors = [k["color"] for k in COCO_CATEGORIES] + stuff_classes = [k["name"] for k in COCO_CATEGORIES] + stuff_colors = [k["color"] for k in COCO_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for i, cat in enumerate(COCO_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + else: + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + return meta + elif dataset_name == "coco_person": + return { + "thing_classes": ["person"], + "keypoint_names": COCO_PERSON_KEYPOINT_NAMES, + "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP, + "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES, + } + elif dataset_name == "cityscapes": + # fmt: off + CITYSCAPES_THING_CLASSES = [ + "person", "rider", "car", "truck", + "bus", "train", "motorcycle", "bicycle", + ] + CITYSCAPES_STUFF_CLASSES = [ + "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", + "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car", + "truck", "bus", "train", "motorcycle", "bicycle", + ] + # fmt: on + return { + "thing_classes": CITYSCAPES_THING_CLASSES, + "stuff_classes": CITYSCAPES_STUFF_CLASSES, + } + raise KeyError("No built-in metadata for dataset {}".format(dataset_name)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f646be9da15914c2ea5e34e478fda3cfb5fb309f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes.py @@ -0,0 +1,329 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import functools +import json +import logging +import multiprocessing as mp +import numpy as np +import os +from itertools import chain +import annotator.oneformer.pycocotools.mask as mask_util +from PIL import Image + +from annotator.oneformer.detectron2.structures import BoxMode +from annotator.oneformer.detectron2.utils.comm import get_world_size +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import setup_logger + +try: + import cv2 # noqa +except ImportError: + # OpenCV is an optional dependency at the moment + pass + + +logger = logging.getLogger(__name__) + + +def _get_cityscapes_files(image_dir, gt_dir): + files = [] + # scan through the directory + cities = PathManager.ls(image_dir) + logger.info(f"{len(cities)} cities found in '{image_dir}'.") + for city in cities: + city_img_dir = os.path.join(image_dir, city) + city_gt_dir = os.path.join(gt_dir, city) + for basename in PathManager.ls(city_img_dir): + image_file = os.path.join(city_img_dir, basename) + + suffix = "leftImg8bit.png" + assert basename.endswith(suffix), basename + basename = basename[: -len(suffix)] + + instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") + label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") + json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") + + files.append((image_file, instance_file, label_file, json_file)) + assert len(files), "No images found in {}".format(image_dir) + for f in files[0]: + assert PathManager.isfile(f), f + return files + + +def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". + from_json (bool): whether to read annotations from the raw json file or the png files. + to_polygons (bool): whether to represent the segmentation as polygons + (COCO's format) instead of masks (cityscapes's format). + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + if from_json: + assert to_polygons, ( + "Cityscapes's json annotations are in polygon format. " + "Converting to mask format is not supported now." + ) + files = _get_cityscapes_files(image_dir, gt_dir) + + logger.info("Preprocessing cityscapes annotations ...") + # This is still not fast: all workers will execute duplicate works and will + # take up to 10m on a 8GPU server. + pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) + + ret = pool.map( + functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), + files, + ) + logger.info("Loaded {} images from {}".format(len(ret), image_dir)) + + # Map cityscape ids to contiguous ids + from cityscapesscripts.helpers.labels import labels + + labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] + dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} + for dict_per_image in ret: + for anno in dict_per_image["annotations"]: + anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] + return ret + + +def load_cityscapes_semantic(image_dir, gt_dir): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". + + Returns: + list[dict]: a list of dict, each has "file_name" and + "sem_seg_file_name". + """ + ret = [] + # gt_dir is small and contain many small files. make sense to fetch to local first + gt_dir = PathManager.get_local_path(gt_dir) + for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir): + label_file = label_file.replace("labelIds", "labelTrainIds") + + with PathManager.open(json_file, "r") as f: + jsonobj = json.load(f) + ret.append( + { + "file_name": image_file, + "sem_seg_file_name": label_file, + "height": jsonobj["imgHeight"], + "width": jsonobj["imgWidth"], + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile( + ret[0]["sem_seg_file_name"] + ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa + return ret + + +def _cityscapes_files_to_dict(files, from_json, to_polygons): + """ + Parse cityscapes annotation files to a instance segmentation dataset dict. + + Args: + files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) + from_json (bool): whether to read annotations from the raw json file or the png files. + to_polygons (bool): whether to represent the segmentation as polygons + (COCO's format) instead of masks (cityscapes's format). + + Returns: + A dict in Detectron2 Dataset format. + """ + from cityscapesscripts.helpers.labels import id2label, name2label + + image_file, instance_id_file, _, json_file = files + + annos = [] + + if from_json: + from shapely.geometry import MultiPolygon, Polygon + + with PathManager.open(json_file, "r") as f: + jsonobj = json.load(f) + ret = { + "file_name": image_file, + "image_id": os.path.basename(image_file), + "height": jsonobj["imgHeight"], + "width": jsonobj["imgWidth"], + } + + # `polygons_union` contains the union of all valid polygons. + polygons_union = Polygon() + + # CityscapesScripts draw the polygons in sequential order + # and each polygon *overwrites* existing ones. See + # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa + # We use reverse order, and each polygon *avoids* early ones. + # This will resolve the ploygon overlaps in the same way as CityscapesScripts. + for obj in jsonobj["objects"][::-1]: + if "deleted" in obj: # cityscapes data format specific + continue + label_name = obj["label"] + + try: + label = name2label[label_name] + except KeyError: + if label_name.endswith("group"): # crowd area + label = name2label[label_name[: -len("group")]] + else: + raise + if label.id < 0: # cityscapes data format + continue + + # Cityscapes's raw annotations uses integer coordinates + # Therefore +0.5 here + poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 + # CityscapesScript uses PIL.ImageDraw.polygon to rasterize + # polygons for evaluation. This function operates in integer space + # and draws each pixel whose center falls into the polygon. + # Therefore it draws a polygon which is 0.5 "fatter" in expectation. + # We therefore dilate the input polygon by 0.5 as our input. + poly = Polygon(poly_coord).buffer(0.5, resolution=4) + + if not label.hasInstances or label.ignoreInEval: + # even if we won't store the polygon it still contributes to overlaps resolution + polygons_union = polygons_union.union(poly) + continue + + # Take non-overlapping part of the polygon + poly_wo_overlaps = poly.difference(polygons_union) + if poly_wo_overlaps.is_empty: + continue + polygons_union = polygons_union.union(poly) + + anno = {} + anno["iscrowd"] = label_name.endswith("group") + anno["category_id"] = label.id + + if isinstance(poly_wo_overlaps, Polygon): + poly_list = [poly_wo_overlaps] + elif isinstance(poly_wo_overlaps, MultiPolygon): + poly_list = poly_wo_overlaps.geoms + else: + raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) + + poly_coord = [] + for poly_el in poly_list: + # COCO API can work only with exterior boundaries now, hence we store only them. + # TODO: store both exterior and interior boundaries once other parts of the + # codebase support holes in polygons. + poly_coord.append(list(chain(*poly_el.exterior.coords))) + anno["segmentation"] = poly_coord + (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds + + anno["bbox"] = (xmin, ymin, xmax, ymax) + anno["bbox_mode"] = BoxMode.XYXY_ABS + + annos.append(anno) + else: + # See also the official annotation parsing scripts at + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa + with PathManager.open(instance_id_file, "rb") as f: + inst_image = np.asarray(Image.open(f), order="F") + # ids < 24 are stuff labels (filtering them first is about 5% faster) + flattened_ids = np.unique(inst_image[inst_image >= 24]) + + ret = { + "file_name": image_file, + "image_id": os.path.basename(image_file), + "height": inst_image.shape[0], + "width": inst_image.shape[1], + } + + for instance_id in flattened_ids: + # For non-crowd annotations, instance_id // 1000 is the label_id + # Crowd annotations have <1000 instance ids + label_id = instance_id // 1000 if instance_id >= 1000 else instance_id + label = id2label[label_id] + if not label.hasInstances or label.ignoreInEval: + continue + + anno = {} + anno["iscrowd"] = instance_id < 1000 + anno["category_id"] = label.id + + mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") + + inds = np.nonzero(mask) + ymin, ymax = inds[0].min(), inds[0].max() + xmin, xmax = inds[1].min(), inds[1].max() + anno["bbox"] = (xmin, ymin, xmax, ymax) + if xmax <= xmin or ymax <= ymin: + continue + anno["bbox_mode"] = BoxMode.XYXY_ABS + if to_polygons: + # This conversion comes from D4809743 and D5171122, + # when Mask-RCNN was first developed. + contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ + -2 + ] + polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] + # opencv's can produce invalid polygons + if len(polygons) == 0: + continue + anno["segmentation"] = polygons + else: + anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] + annos.append(anno) + ret["annotations"] = annos + return ret + + +if __name__ == "__main__": + """ + Test the cityscapes dataset loader. + + Usage: + python -m detectron2.data.datasets.cityscapes \ + cityscapes/leftImg8bit/train cityscapes/gtFine/train + """ + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("image_dir") + parser.add_argument("gt_dir") + parser.add_argument("--type", choices=["instance", "semantic"], default="instance") + args = parser.parse_args() + from annotator.oneformer.detectron2.data.catalog import Metadata + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + from cityscapesscripts.helpers.labels import labels + + logger = setup_logger(name=__name__) + + dirname = "cityscapes-data-vis" + os.makedirs(dirname, exist_ok=True) + + if args.type == "instance": + dicts = load_cityscapes_instances( + args.image_dir, args.gt_dir, from_json=True, to_polygons=True + ) + logger.info("Done loading {} samples.".format(len(dicts))) + + thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval] + meta = Metadata().set(thing_classes=thing_classes) + + else: + dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) + logger.info("Done loading {} samples.".format(len(dicts))) + + stuff_classes = [k.name for k in labels if k.trainId != 255] + stuff_colors = [k.color for k in labels if k.trainId != 255] + meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors) + + for d in dicts: + img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + # cv2.imshow("a", vis.get_image()[:, :, ::-1]) + # cv2.waitKey() + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes_panoptic.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce9ec48f673dadf3f5b4ae0592fc82415d9f925 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/cityscapes_panoptic.py @@ -0,0 +1,187 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import json +import logging +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES +from annotator.oneformer.detectron2.utils.file_io import PathManager + +""" +This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog. +""" + + +logger = logging.getLogger(__name__) + + +def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info): + files = [] + # scan through the directory + cities = PathManager.ls(image_dir) + logger.info(f"{len(cities)} cities found in '{image_dir}'.") + image_dict = {} + for city in cities: + city_img_dir = os.path.join(image_dir, city) + for basename in PathManager.ls(city_img_dir): + image_file = os.path.join(city_img_dir, basename) + + suffix = "_leftImg8bit.png" + assert basename.endswith(suffix), basename + basename = os.path.basename(basename)[: -len(suffix)] + + image_dict[basename] = image_file + + for ann in json_info["annotations"]: + image_file = image_dict.get(ann["image_id"], None) + assert image_file is not None, "No image {} found for annotation {}".format( + ann["image_id"], ann["file_name"] + ) + label_file = os.path.join(gt_dir, ann["file_name"]) + segments_info = ann["segments_info"] + + files.append((image_file, label_file, segments_info)) + + assert len(files), "No images found in {}".format(image_dir) + assert PathManager.isfile(files[0][0]), files[0][0] + assert PathManager.isfile(files[0][1]), files[0][1] + return files + + +def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train". + gt_json (str): path to the json file. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train.json". + meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id" + and "stuff_dataset_id_to_contiguous_id" to map category ids to + contiguous ids for training. + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + return segment_info + + assert os.path.exists( + gt_json + ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa + with open(gt_json) as f: + json_info = json.load(f) + files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info) + ret = [] + for image_file, label_file, segments_info in files: + sem_label_file = ( + image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png" + ) + segments_info = [_convert_category_id(x, meta) for x in segments_info] + ret.append( + { + "file_name": image_file, + "image_id": "_".join( + os.path.splitext(os.path.basename(image_file))[0].split("_")[:3] + ), + "sem_seg_file_name": sem_label_file, + "pan_seg_file_name": label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile( + ret[0]["sem_seg_file_name"] + ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa + assert PathManager.isfile( + ret[0]["pan_seg_file_name"] + ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa + return ret + + +_RAW_CITYSCAPES_PANOPTIC_SPLITS = { + "cityscapes_fine_panoptic_train": ( + "cityscapes/leftImg8bit/train", + "cityscapes/gtFine/cityscapes_panoptic_train", + "cityscapes/gtFine/cityscapes_panoptic_train.json", + ), + "cityscapes_fine_panoptic_val": ( + "cityscapes/leftImg8bit/val", + "cityscapes/gtFine/cityscapes_panoptic_val", + "cityscapes/gtFine/cityscapes_panoptic_val.json", + ), + # "cityscapes_fine_panoptic_test": not supported yet +} + + +def register_all_cityscapes_panoptic(root): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # There are three types of ids in cityscapes panoptic segmentation: + # (1) category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the classifier + # (2) instance id: this id is used to differentiate different instances from + # the same category. For "stuff" classes, the instance id is always 0; for + # "thing" classes, the instance id starts from 1 and 0 is reserved for + # ignored instances (e.g. crowd annotation). + # (3) panoptic id: this is the compact id that encode both category and + # instance id by: category_id * 1000 + instance_id. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for k in CITYSCAPES_CATEGORIES: + if k["isthing"] == 1: + thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + else: + stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items(): + image_dir = os.path.join(root, image_dir) + gt_dir = os.path.join(root, gt_dir) + gt_json = os.path.join(root, gt_json) + + DatasetCatalog.register( + key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta) + ) + MetadataCatalog.get(key).set( + panoptic_root=gt_dir, + image_root=image_dir, + panoptic_json=gt_json, + gt_dir=gt_dir.replace("cityscapes_panoptic_", ""), + evaluator_type="cityscapes_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **meta, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1a7cdba855979f9453904b1d6f0aedd47dd81200 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco.py @@ -0,0 +1,539 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import contextlib +import datetime +import io +import json +import logging +import numpy as np +import os +import shutil +import annotator.oneformer.pycocotools.mask as mask_util +from fvcore.common.timer import Timer +from iopath.common.file_io import file_lock +from PIL import Image + +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .. import DatasetCatalog, MetadataCatalog + +""" +This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format". +""" + + +logger = logging.getLogger(__name__) + +__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json", "register_coco_instances"] + + +def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): + """ + Load a json file with COCO's instances annotation format. + Currently supports instance detection, instance segmentation, + and person keypoints annotations. + + Args: + json_file (str): full path to the json file in COCO instances annotation format. + image_root (str or path-like): the directory where the images in this json file exists. + dataset_name (str or None): the name of the dataset (e.g., coco_2017_train). + When provided, this function will also do the following: + + * Put "thing_classes" into the metadata associated with this dataset. + * Map the category ids into a contiguous range (needed by standard dataset format), + and add "thing_dataset_id_to_contiguous_id" to the metadata associated + with this dataset. + + This option should usually be provided, unless users need to load + the original json content and apply more processing manually. + extra_annotation_keys (list[str]): list of per-annotation keys that should also be + loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", + "category_id", "segmentation"). The values for these keys will be returned as-is. + For example, the densepose annotations are loaded in this way. + + Returns: + list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See + `Using Custom Datasets `_ ) when `dataset_name` is not None. + If `dataset_name` is None, the returned `category_ids` may be + incontiguous and may not conform to the Detectron2 standard format. + + Notes: + 1. This function does not read the image files. + The results do not have the "image" field. + """ + from annotator.oneformer.pycocotools.coco import COCO + + timer = Timer() + json_file = PathManager.get_local_path(json_file) + with contextlib.redirect_stdout(io.StringIO()): + coco_api = COCO(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + + id_map = None + if dataset_name is not None: + meta = MetadataCatalog.get(dataset_name) + cat_ids = sorted(coco_api.getCatIds()) + cats = coco_api.loadCats(cat_ids) + # The categories in a custom json file may not be sorted. + thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] + meta.thing_classes = thing_classes + + # In COCO, certain category ids are artificially removed, + # and by convention they are always ignored. + # We deal with COCO's id issue and translate + # the category ids to contiguous ids in [0, 80). + + # It works by looking at the "categories" field in the json, therefore + # if users' own json also have incontiguous ids, we'll + # apply this mapping as well but print a warning. + if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): + if "coco" not in dataset_name: + logger.warning( + """ +Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. +""" + ) + id_map = {v: i for i, v in enumerate(cat_ids)} + meta.thing_dataset_id_to_contiguous_id = id_map + + # sort indices for reproducible results + img_ids = sorted(coco_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = coco_api.loadImgs(img_ids) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. Example of anns[0]: + # [{'segmentation': [[192.81, + # 247.09, + # ... + # 219.03, + # 249.06]], + # 'area': 1035.749, + # 'iscrowd': 0, + # 'image_id': 1268, + # 'bbox': [192.81, 224.8, 74.73, 33.43], + # 'category_id': 16, + # 'id': 42986}, + # ...] + anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] + total_num_valid_anns = sum([len(x) for x in anns]) + total_num_anns = len(coco_api.anns) + if total_num_valid_anns < total_num_anns: + logger.warning( + f"{json_file} contains {total_num_anns} annotations, but only " + f"{total_num_valid_anns} of them match to images in the file." + ) + + if "minival" not in json_file: + # The popular valminusminival & minival annotations for COCO2014 contain this bug. + # However the ratio of buggy annotations there is tiny and does not affect accuracy. + # Therefore we explicitly white-list them. + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( + json_file + ) + + imgs_anns = list(zip(imgs, anns)) + logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) + + dataset_dicts = [] + + ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) + + num_instances_without_valid_segmentation = 0 + + for (img_dict, anno_dict_list) in imgs_anns: + record = {} + record["file_name"] = os.path.join(image_root, img_dict["file_name"]) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + image_id = record["image_id"] = img_dict["id"] + + objs = [] + for anno in anno_dict_list: + # Check that the image_id in this annotation is the same as + # the image_id we're looking at. + # This fails only when the data parsing logic or the annotation file is buggy. + + # The original COCO valminusminival2014 & minival2014 annotation files + # actually contains bugs that, together with certain ways of using COCO API, + # can trigger this assertion. + assert anno["image_id"] == image_id + + assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' + + obj = {key: anno[key] for key in ann_keys if key in anno} + if "bbox" in obj and len(obj["bbox"]) == 0: + raise ValueError( + f"One annotation of image {image_id} contains empty 'bbox' value! " + "This json does not have valid COCO format." + ) + + segm = anno.get("segmentation", None) + if segm: # either list[list[float]] or dict(RLE) + if isinstance(segm, dict): + if isinstance(segm["counts"], list): + # convert to compressed RLE + segm = mask_util.frPyObjects(segm, *segm["size"]) + else: + # filter out invalid polygons (< 3 points) + segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + if len(segm) == 0: + num_instances_without_valid_segmentation += 1 + continue # ignore this instance + obj["segmentation"] = segm + + keypts = anno.get("keypoints", None) + if keypts: # list[int] + for idx, v in enumerate(keypts): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # Therefore we assume the coordinates are "pixel indices" and + # add 0.5 to convert to floating point coordinates. + keypts[idx] = v + 0.5 + obj["keypoints"] = keypts + + obj["bbox_mode"] = BoxMode.XYWH_ABS + if id_map: + annotation_category_id = obj["category_id"] + try: + obj["category_id"] = id_map[annotation_category_id] + except KeyError as e: + raise KeyError( + f"Encountered category_id={annotation_category_id} " + "but this id does not exist in 'categories' of the json file." + ) from e + objs.append(obj) + record["annotations"] = objs + dataset_dicts.append(record) + + if num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. ".format( + num_instances_without_valid_segmentation + ) + + "There might be issues in your dataset generation process. Please " + "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" + ) + return dataset_dicts + + +def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"): + """ + Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are + treated as ground truth annotations and all files under "image_root" with "image_ext" extension + as input images. Ground truth and input images are matched using file paths relative to + "gt_root" and "image_root" respectively without taking into account file extensions. + This works for COCO as well as some other datasets. + + Args: + gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation + annotations are stored as images with integer values in pixels that represent + corresponding semantic labels. + image_root (str): the directory where the input images are. + gt_ext (str): file extension for ground truth annotations. + image_ext (str): file extension for input images. + + Returns: + list[dict]: + a list of dicts in detectron2 standard format without instance-level + annotation. + + Notes: + 1. This function does not read the image and ground truth files. + The results do not have the "image" and "sem_seg" fields. + """ + + # We match input images with ground truth based on their relative filepaths (without file + # extensions) starting from 'image_root' and 'gt_root' respectively. + def file2id(folder_path, file_path): + # extract relative path starting from `folder_path` + image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path)) + # remove file extension + image_id = os.path.splitext(image_id)[0] + return image_id + + input_files = sorted( + (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), + key=lambda file_path: file2id(image_root, file_path), + ) + gt_files = sorted( + (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), + key=lambda file_path: file2id(gt_root, file_path), + ) + + assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) + + # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images + if len(input_files) != len(gt_files): + logger.warn( + "Directory {} and {} has {} and {} files, respectively.".format( + image_root, gt_root, len(input_files), len(gt_files) + ) + ) + input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files] + gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files] + intersect = list(set(input_basenames) & set(gt_basenames)) + # sort, otherwise each worker may obtain a list[dict] in different order + intersect = sorted(intersect) + logger.warn("Will use their intersection of {} files.".format(len(intersect))) + input_files = [os.path.join(image_root, f + image_ext) for f in intersect] + gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] + + logger.info( + "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root) + ) + + dataset_dicts = [] + for (img_path, gt_path) in zip(input_files, gt_files): + record = {} + record["file_name"] = img_path + record["sem_seg_file_name"] = gt_path + dataset_dicts.append(record) + + return dataset_dicts + + +def convert_to_coco_dict(dataset_name): + """ + Convert an instance detection/segmentation or keypoint detection dataset + in detectron2's standard format into COCO json format. + + Generic dataset description can be found here: + https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset + + COCO data format description can be found here: + http://cocodataset.org/#format-data + + Args: + dataset_name (str): + name of the source dataset + Must be registered in DatastCatalog and in detectron2's standard format. + Must have corresponding metadata "thing_classes" + Returns: + coco_dict: serializable dict in COCO json format + """ + + dataset_dicts = DatasetCatalog.get(dataset_name) + metadata = MetadataCatalog.get(dataset_name) + + # unmap the category mapping ids for COCO + if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} + reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa + else: + reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa + + categories = [ + {"id": reverse_id_mapper(id), "name": name} + for id, name in enumerate(metadata.thing_classes) + ] + + logger.info("Converting dataset dicts into COCO format") + coco_images = [] + coco_annotations = [] + + for image_id, image_dict in enumerate(dataset_dicts): + coco_image = { + "id": image_dict.get("image_id", image_id), + "width": int(image_dict["width"]), + "height": int(image_dict["height"]), + "file_name": str(image_dict["file_name"]), + } + coco_images.append(coco_image) + + anns_per_image = image_dict.get("annotations", []) + for annotation in anns_per_image: + # create a new dict with only COCO fields + coco_annotation = {} + + # COCO requirement: XYWH box format for axis-align and XYWHA for rotated + bbox = annotation["bbox"] + if isinstance(bbox, np.ndarray): + if bbox.ndim != 1: + raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.") + bbox = bbox.tolist() + if len(bbox) not in [4, 5]: + raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.") + from_bbox_mode = annotation["bbox_mode"] + to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS + bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode) + + # COCO requirement: instance area + if "segmentation" in annotation: + # Computing areas for instances by counting the pixels + segmentation = annotation["segmentation"] + # TODO: check segmentation type: RLE, BinaryMask or Polygon + if isinstance(segmentation, list): + polygons = PolygonMasks([segmentation]) + area = polygons.area()[0].item() + elif isinstance(segmentation, dict): # RLE + area = mask_util.area(segmentation).item() + else: + raise TypeError(f"Unknown segmentation type {type(segmentation)}!") + else: + # Computing areas using bounding boxes + if to_bbox_mode == BoxMode.XYWH_ABS: + bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS) + area = Boxes([bbox_xy]).area()[0].item() + else: + area = RotatedBoxes([bbox]).area()[0].item() + + if "keypoints" in annotation: + keypoints = annotation["keypoints"] # list[int] + for idx, v in enumerate(keypoints): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # For COCO format consistency we substract 0.5 + # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 + keypoints[idx] = v - 0.5 + if "num_keypoints" in annotation: + num_keypoints = annotation["num_keypoints"] + else: + num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) + + # COCO requirement: + # linking annotations to images + # "id" field must start with 1 + coco_annotation["id"] = len(coco_annotations) + 1 + coco_annotation["image_id"] = coco_image["id"] + coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] + coco_annotation["area"] = float(area) + coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0)) + coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"])) + + # Add optional fields + if "keypoints" in annotation: + coco_annotation["keypoints"] = keypoints + coco_annotation["num_keypoints"] = num_keypoints + + if "segmentation" in annotation: + seg = coco_annotation["segmentation"] = annotation["segmentation"] + if isinstance(seg, dict): # RLE + counts = seg["counts"] + if not isinstance(counts, str): + # make it json-serializable + seg["counts"] = counts.decode("ascii") + + coco_annotations.append(coco_annotation) + + logger.info( + "Conversion finished, " + f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" + ) + + info = { + "date_created": str(datetime.datetime.now()), + "description": "Automatically generated COCO json file for Detectron2.", + } + coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None} + if len(coco_annotations) > 0: + coco_dict["annotations"] = coco_annotations + return coco_dict + + +def convert_to_coco_json(dataset_name, output_file, allow_cached=True): + """ + Converts dataset into COCO format and saves it to a json file. + dataset_name must be registered in DatasetCatalog and in detectron2's standard format. + + Args: + dataset_name: + reference from the config file to the catalogs + must be registered in DatasetCatalog and in detectron2's standard format + output_file: path of json file that will be saved to + allow_cached: if json file is already present then skip conversion + """ + + # TODO: The dataset or the conversion script *may* change, + # a checksum would be useful for validating the cached data + + PathManager.mkdirs(os.path.dirname(output_file)) + with file_lock(output_file): + if PathManager.exists(output_file) and allow_cached: + logger.warning( + f"Using previously cached COCO format annotations at '{output_file}'. " + "You need to clear the cache file if your dataset has been modified." + ) + else: + logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") + coco_dict = convert_to_coco_dict(dataset_name) + + logger.info(f"Caching COCO format annotations at '{output_file}' ...") + tmp_file = output_file + ".tmp" + with PathManager.open(tmp_file, "w") as f: + json.dump(coco_dict, f) + shutil.move(tmp_file, output_file) + + +def register_coco_instances(name, metadata, json_file, image_root): + """ + Register a dataset in COCO's json annotation format for + instance detection, instance segmentation and keypoint detection. + (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. + `instances*.json` and `person_keypoints*.json` in the dataset). + + This is an example of how to register a new dataset. + You can do something similar to this function, to register new datasets. + + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + metadata (dict): extra metadata associated with this dataset. You can + leave it as an empty dict. + json_file (str): path to the json instance annotation file. + image_root (str or path-like): directory which contains all the images. + """ + assert isinstance(name, str), name + assert isinstance(json_file, (str, os.PathLike)), json_file + assert isinstance(image_root, (str, os.PathLike)), image_root + # 1. register a function which returns dicts + DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name)) + + # 2. Optionally, add metadata about this dataset, + # since they might be useful in evaluation, visualization or logging + MetadataCatalog.get(name).set( + json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata + ) + + +if __name__ == "__main__": + """ + Test the COCO json dataset loader. + + Usage: + python -m detectron2.data.datasets.coco \ + path/to/json path/to/image_root dataset_name + + "dataset_name" can be "coco_2014_minival_100", or other + pre-registered ones + """ + from annotator.oneformer.detectron2.utils.logger import setup_logger + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + import annotator.oneformer.detectron2.data.datasets # noqa # add pre-defined metadata + import sys + + logger = setup_logger(name=__name__) + assert sys.argv[3] in DatasetCatalog.list() + meta = MetadataCatalog.get(sys.argv[3]) + + dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3]) + logger.info("Done loading {} samples.".format(len(dicts))) + + dirname = "coco-data-vis" + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = np.array(Image.open(d["file_name"])) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco_panoptic.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..a7180df512c29665222b1a90323ccfa7e7623137 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/coco_panoptic.py @@ -0,0 +1,228 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import json +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .coco import load_coco_json, load_sem_seg + +__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"] + + +def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + ret = [] + for ann in json_info["annotations"]: + image_id = int(ann["image_id"]) + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + return ret + + +def register_coco_panoptic( + name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None +): + """ + Register a "standard" version of COCO panoptic segmentation dataset named `name`. + The dictionaries in this registered dataset follows detectron2's standard format. + Hence it's called "standard". + + Args: + name (str): the name that identifies a dataset, + e.g. "coco_2017_train_panoptic" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images in COCO format + panoptic_json (str): path to the json panoptic annotation file in COCO format + sem_seg_root (none): not used, to be consistent with + `register_coco_panoptic_separated`. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + DatasetCatalog.register( + panoptic_name, + lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="coco_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **metadata, + ) + + +def register_coco_panoptic_separated( + name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json +): + """ + Register a "separated" version of COCO panoptic segmentation dataset named `name`. + The annotations in this registered dataset will contain both instance annotations and + semantic annotations, each with its own contiguous ids. Hence it's called "separated". + + It follows the setting used by the PanopticFPN paper: + + 1. The instance annotations directly come from polygons in the COCO + instances annotation task, rather than from the masks in the COCO panoptic annotations. + + The two format have small differences: + Polygons in the instance annotations may have overlaps. + The mask annotations are produced by labeling the overlapped polygons + with depth ordering. + + 2. The semantic annotations are converted from panoptic annotations, where + all "things" are assigned a semantic id of 0. + All semantic categories will therefore have ids in contiguous + range [1, #stuff_categories]. + + This function will also register a pure semantic segmentation dataset + named ``name + '_stuffonly'``. + + Args: + name (str): the name that identifies a dataset, + e.g. "coco_2017_train_panoptic" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images + panoptic_json (str): path to the json panoptic annotation file + sem_seg_root (str): directory which contains all the ground truth segmentation annotations. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + "_separated" + DatasetCatalog.register( + panoptic_name, + lambda: merge_to_panoptic( + load_coco_json(instances_json, image_root, panoptic_name), + load_sem_seg(sem_seg_root, image_root), + ), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + sem_seg_root=sem_seg_root, + json_file=instances_json, # TODO rename + evaluator_type="coco_panoptic_seg", + ignore_label=255, + **metadata, + ) + + semantic_name = name + "_stuffonly" + DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root)) + MetadataCatalog.get(semantic_name).set( + sem_seg_root=sem_seg_root, + image_root=image_root, + evaluator_type="sem_seg", + ignore_label=255, + **metadata, + ) + + +def merge_to_panoptic(detection_dicts, sem_seg_dicts): + """ + Create dataset dicts for panoptic segmentation, by + merging two dicts using "file_name" field to match their entries. + + Args: + detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. + sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. + + Returns: + list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in + both detection_dicts and sem_seg_dicts that correspond to the same image. + The function assumes that the same key in different dicts has the same value. + """ + results = [] + sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} + assert len(sem_seg_file_to_entry) > 0 + + for det_dict in detection_dicts: + dic = copy.copy(det_dict) + dic.update(sem_seg_file_to_entry[dic["file_name"]]) + results.append(dic) + return results + + +if __name__ == "__main__": + """ + Test the COCO panoptic dataset loader. + + Usage: + python -m detectron2.data.datasets.coco_panoptic \ + path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10 + + "dataset_name" can be "coco_2017_train_panoptic", or other + pre-registered ones + """ + from annotator.oneformer.detectron2.utils.logger import setup_logger + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + import annotator.oneformer.detectron2.data.datasets # noqa # add pre-defined metadata + import sys + from PIL import Image + import numpy as np + + logger = setup_logger(name=__name__) + assert sys.argv[4] in DatasetCatalog.list() + meta = MetadataCatalog.get(sys.argv[4]) + + dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict()) + logger.info("Done loading {} samples.".format(len(dicts))) + + dirname = "coco-data-vis" + os.makedirs(dirname, exist_ok=True) + num_imgs_to_vis = int(sys.argv[5]) + for i, d in enumerate(dicts): + img = np.array(Image.open(d["file_name"])) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) + if i + 1 >= num_imgs_to_vis: + break diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis.py new file mode 100644 index 0000000000000000000000000000000000000000..6e1e6ecc657e83d6df57da342b0655177402c514 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis.py @@ -0,0 +1,241 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import os +from fvcore.common.timer import Timer + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.structures import BoxMode +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .builtin_meta import _get_coco_instances_meta +from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES +from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES +from .lvis_v1_category_image_count import LVIS_CATEGORY_IMAGE_COUNT as LVIS_V1_CATEGORY_IMAGE_COUNT + +""" +This file contains functions to parse LVIS-format annotations into dicts in the +"Detectron2 format". +""" + +logger = logging.getLogger(__name__) + +__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"] + + +def register_lvis_instances(name, metadata, json_file, image_root): + """ + Register a dataset in LVIS's json annotation format for instance detection and segmentation. + + Args: + name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train". + metadata (dict): extra metadata associated with this dataset. It can be an empty dict. + json_file (str): path to the json instance annotation file. + image_root (str or path-like): directory which contains all the images. + """ + DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name)) + MetadataCatalog.get(name).set( + json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata + ) + + +def load_lvis_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): + """ + Load a json file in LVIS's annotation format. + + Args: + json_file (str): full path to the LVIS json annotation file. + image_root (str): the directory where the images in this json file exists. + dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). + If provided, this function will put "thing_classes" into the metadata + associated with this dataset. + extra_annotation_keys (list[str]): list of per-annotation keys that should also be + loaded into the dataset dict (besides "bbox", "bbox_mode", "category_id", + "segmentation"). The values for these keys will be returned as-is. + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + + Notes: + 1. This function does not read the image files. + The results do not have the "image" field. + """ + from lvis import LVIS + + json_file = PathManager.get_local_path(json_file) + + timer = Timer() + lvis_api = LVIS(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + + if dataset_name is not None: + meta = get_lvis_instances_meta(dataset_name) + MetadataCatalog.get(dataset_name).set(**meta) + + # sort indices for reproducible results + img_ids = sorted(lvis_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = lvis_api.load_imgs(img_ids) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. Example of anns[0]: + # [{'segmentation': [[192.81, + # 247.09, + # ... + # 219.03, + # 249.06]], + # 'area': 1035.749, + # 'image_id': 1268, + # 'bbox': [192.81, 224.8, 74.73, 33.43], + # 'category_id': 16, + # 'id': 42986}, + # ...] + anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] + + # Sanity check that each annotation has a unique id + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format( + json_file + ) + + imgs_anns = list(zip(imgs, anns)) + + logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file)) + + if extra_annotation_keys: + logger.info( + "The following extra annotation keys will be loaded: {} ".format(extra_annotation_keys) + ) + else: + extra_annotation_keys = [] + + def get_file_name(img_root, img_dict): + # Determine the path including the split folder ("train2017", "val2017", "test2017") from + # the coco_url field. Example: + # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg' + split_folder, file_name = img_dict["coco_url"].split("/")[-2:] + return os.path.join(img_root + split_folder, file_name) + + dataset_dicts = [] + + for (img_dict, anno_dict_list) in imgs_anns: + record = {} + record["file_name"] = get_file_name(image_root, img_dict) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) + record["neg_category_ids"] = img_dict.get("neg_category_ids", []) + image_id = record["image_id"] = img_dict["id"] + + objs = [] + for anno in anno_dict_list: + # Check that the image_id in this annotation is the same as + # the image_id we're looking at. + # This fails only when the data parsing logic or the annotation file is buggy. + assert anno["image_id"] == image_id + obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} + # LVIS data loader can be used to load COCO dataset categories. In this case `meta` + # variable will have a field with COCO-specific category mapping. + if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta: + obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][anno["category_id"]] + else: + obj["category_id"] = anno["category_id"] - 1 # Convert 1-indexed to 0-indexed + segm = anno["segmentation"] # list[list[float]] + # filter out invalid polygons (< 3 points) + valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + assert len(segm) == len( + valid_segm + ), "Annotation contains an invalid polygon with < 3 points" + assert len(segm) > 0 + obj["segmentation"] = segm + for extra_ann_key in extra_annotation_keys: + obj[extra_ann_key] = anno[extra_ann_key] + objs.append(obj) + record["annotations"] = objs + dataset_dicts.append(record) + + return dataset_dicts + + +def get_lvis_instances_meta(dataset_name): + """ + Load LVIS metadata. + + Args: + dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5"). + + Returns: + dict: LVIS metadata with keys: thing_classes + """ + if "cocofied" in dataset_name: + return _get_coco_instances_meta() + if "v0.5" in dataset_name: + return _get_lvis_instances_meta_v0_5() + elif "v1" in dataset_name: + return _get_lvis_instances_meta_v1() + raise ValueError("No built-in metadata for dataset {}".format(dataset_name)) + + +def _get_lvis_instances_meta_v0_5(): + assert len(LVIS_V0_5_CATEGORIES) == 1230 + cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES] + assert min(cat_ids) == 1 and max(cat_ids) == len( + cat_ids + ), "Category ids are not in [1, #categories], as expected" + # Ensure that the category list is sorted by id + lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"]) + thing_classes = [k["synonyms"][0] for k in lvis_categories] + meta = {"thing_classes": thing_classes} + return meta + + +def _get_lvis_instances_meta_v1(): + assert len(LVIS_V1_CATEGORIES) == 1203 + cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES] + assert min(cat_ids) == 1 and max(cat_ids) == len( + cat_ids + ), "Category ids are not in [1, #categories], as expected" + # Ensure that the category list is sorted by id + lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"]) + thing_classes = [k["synonyms"][0] for k in lvis_categories] + meta = {"thing_classes": thing_classes, "class_image_count": LVIS_V1_CATEGORY_IMAGE_COUNT} + return meta + + +if __name__ == "__main__": + """ + Test the LVIS json dataset loader. + + Usage: + python -m detectron2.data.datasets.lvis \ + path/to/json path/to/image_root dataset_name vis_limit + """ + import sys + import numpy as np + from annotator.oneformer.detectron2.utils.logger import setup_logger + from PIL import Image + import annotator.oneformer.detectron2.data.datasets # noqa # add pre-defined metadata + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + + logger = setup_logger(name=__name__) + meta = MetadataCatalog.get(sys.argv[3]) + + dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3]) + logger.info("Done loading {} samples.".format(len(dicts))) + + dirname = "lvis-data-vis" + os.makedirs(dirname, exist_ok=True) + for d in dicts[: int(sys.argv[4])]: + img = np.array(Image.open(d["file_name"])) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v0_5_categories.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v0_5_categories.py new file mode 100644 index 0000000000000000000000000000000000000000..d3dab6198da614937b08682f4c9edf52bdf1d236 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v0_5_categories.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Autogen with +# with open("lvis_v0.5_val.json", "r") as f: +# a = json.load(f) +# c = a["categories"] +# for x in c: +# del x["image_count"] +# del x["instance_count"] +# LVIS_CATEGORIES = repr(c) + " # noqa" + +# fmt: off +LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa +# fmt: on diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_categories.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_categories.py new file mode 100644 index 0000000000000000000000000000000000000000..7374e6968bb006f5d8c49e75d9d3b31ea3d77d05 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_categories.py @@ -0,0 +1,16 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Autogen with +# with open("lvis_v1_val.json", "r") as f: +# a = json.load(f) +# c = a["categories"] +# for x in c: +# del x["image_count"] +# del x["instance_count"] +# LVIS_CATEGORIES = repr(c) + " # noqa" +# with open("/tmp/lvis_categories.py", "wt") as f: +# f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}") +# Then paste the contents of that file below + +# fmt: off +LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa +# fmt: on diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_category_image_count.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_category_image_count.py new file mode 100644 index 0000000000000000000000000000000000000000..31bf0cfcd5096ab87835db86a28671d474514c40 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/lvis_v1_category_image_count.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Autogen with +# with open("lvis_v1_train.json", "r") as f: +# a = json.load(f) +# c = a["categories"] +# for x in c: +# del x["name"] +# del x["instance_count"] +# del x["def"] +# del x["synonyms"] +# del x["frequency"] +# del x["synset"] +# LVIS_CATEGORY_IMAGE_COUNT = repr(c) + " # noqa" +# with open("/tmp/lvis_category_image_count.py", "wt") as f: +# f.write(f"LVIS_CATEGORY_IMAGE_COUNT = {LVIS_CATEGORY_IMAGE_COUNT}") +# Then paste the contents of that file below + +# fmt: off +LVIS_CATEGORY_IMAGE_COUNT = [{'id': 1, 'image_count': 64}, {'id': 2, 'image_count': 364}, {'id': 3, 'image_count': 1911}, {'id': 4, 'image_count': 149}, {'id': 5, 'image_count': 29}, {'id': 6, 'image_count': 26}, {'id': 7, 'image_count': 59}, {'id': 8, 'image_count': 22}, {'id': 9, 'image_count': 12}, {'id': 10, 'image_count': 28}, {'id': 11, 'image_count': 505}, {'id': 12, 'image_count': 1207}, {'id': 13, 'image_count': 4}, {'id': 14, 'image_count': 10}, {'id': 15, 'image_count': 500}, {'id': 16, 'image_count': 33}, {'id': 17, 'image_count': 3}, {'id': 18, 'image_count': 44}, {'id': 19, 'image_count': 561}, {'id': 20, 'image_count': 8}, {'id': 21, 'image_count': 9}, {'id': 22, 'image_count': 33}, {'id': 23, 'image_count': 1883}, {'id': 24, 'image_count': 98}, {'id': 25, 'image_count': 70}, {'id': 26, 'image_count': 46}, {'id': 27, 'image_count': 117}, {'id': 28, 'image_count': 41}, {'id': 29, 'image_count': 1395}, {'id': 30, 'image_count': 7}, {'id': 31, 'image_count': 1}, {'id': 32, 'image_count': 314}, {'id': 33, 'image_count': 31}, {'id': 34, 'image_count': 1905}, {'id': 35, 'image_count': 1859}, {'id': 36, 'image_count': 1623}, {'id': 37, 'image_count': 47}, {'id': 38, 'image_count': 3}, {'id': 39, 'image_count': 3}, {'id': 40, 'image_count': 1}, {'id': 41, 'image_count': 305}, {'id': 42, 'image_count': 6}, {'id': 43, 'image_count': 210}, {'id': 44, 'image_count': 36}, {'id': 45, 'image_count': 1787}, {'id': 46, 'image_count': 17}, {'id': 47, 'image_count': 51}, {'id': 48, 'image_count': 138}, {'id': 49, 'image_count': 3}, {'id': 50, 'image_count': 1470}, {'id': 51, 'image_count': 3}, {'id': 52, 'image_count': 2}, {'id': 53, 'image_count': 186}, {'id': 54, 'image_count': 76}, {'id': 55, 'image_count': 26}, {'id': 56, 'image_count': 303}, {'id': 57, 'image_count': 738}, {'id': 58, 'image_count': 1799}, {'id': 59, 'image_count': 1934}, {'id': 60, 'image_count': 1609}, {'id': 61, 'image_count': 1622}, {'id': 62, 'image_count': 41}, {'id': 63, 'image_count': 4}, {'id': 64, 'image_count': 11}, {'id': 65, 'image_count': 270}, {'id': 66, 'image_count': 349}, {'id': 67, 'image_count': 42}, {'id': 68, 'image_count': 823}, {'id': 69, 'image_count': 6}, {'id': 70, 'image_count': 48}, {'id': 71, 'image_count': 3}, {'id': 72, 'image_count': 42}, {'id': 73, 'image_count': 24}, {'id': 74, 'image_count': 16}, {'id': 75, 'image_count': 605}, {'id': 76, 'image_count': 646}, {'id': 77, 'image_count': 1765}, {'id': 78, 'image_count': 2}, {'id': 79, 'image_count': 125}, {'id': 80, 'image_count': 1420}, {'id': 81, 'image_count': 140}, {'id': 82, 'image_count': 4}, {'id': 83, 'image_count': 322}, {'id': 84, 'image_count': 60}, {'id': 85, 'image_count': 2}, {'id': 86, 'image_count': 231}, {'id': 87, 'image_count': 333}, {'id': 88, 'image_count': 1941}, {'id': 89, 'image_count': 367}, {'id': 90, 'image_count': 1922}, {'id': 91, 'image_count': 18}, {'id': 92, 'image_count': 81}, {'id': 93, 'image_count': 1}, {'id': 94, 'image_count': 1852}, {'id': 95, 'image_count': 430}, {'id': 96, 'image_count': 247}, {'id': 97, 'image_count': 94}, {'id': 98, 'image_count': 21}, {'id': 99, 'image_count': 1821}, {'id': 100, 'image_count': 16}, {'id': 101, 'image_count': 12}, {'id': 102, 'image_count': 25}, {'id': 103, 'image_count': 41}, {'id': 104, 'image_count': 244}, {'id': 105, 'image_count': 7}, {'id': 106, 'image_count': 1}, {'id': 107, 'image_count': 40}, {'id': 108, 'image_count': 40}, {'id': 109, 'image_count': 104}, {'id': 110, 'image_count': 1671}, {'id': 111, 'image_count': 49}, {'id': 112, 'image_count': 243}, {'id': 113, 'image_count': 2}, {'id': 114, 'image_count': 242}, {'id': 115, 'image_count': 271}, {'id': 116, 'image_count': 104}, {'id': 117, 'image_count': 8}, {'id': 118, 'image_count': 1758}, {'id': 119, 'image_count': 1}, {'id': 120, 'image_count': 48}, {'id': 121, 'image_count': 14}, {'id': 122, 'image_count': 40}, {'id': 123, 'image_count': 1}, {'id': 124, 'image_count': 37}, {'id': 125, 'image_count': 1510}, {'id': 126, 'image_count': 6}, {'id': 127, 'image_count': 1903}, {'id': 128, 'image_count': 70}, {'id': 129, 'image_count': 86}, {'id': 130, 'image_count': 7}, {'id': 131, 'image_count': 5}, {'id': 132, 'image_count': 1406}, {'id': 133, 'image_count': 1901}, {'id': 134, 'image_count': 15}, {'id': 135, 'image_count': 28}, {'id': 136, 'image_count': 6}, {'id': 137, 'image_count': 494}, {'id': 138, 'image_count': 234}, {'id': 139, 'image_count': 1922}, {'id': 140, 'image_count': 1}, {'id': 141, 'image_count': 35}, {'id': 142, 'image_count': 5}, {'id': 143, 'image_count': 1828}, {'id': 144, 'image_count': 8}, {'id': 145, 'image_count': 63}, {'id': 146, 'image_count': 1668}, {'id': 147, 'image_count': 4}, {'id': 148, 'image_count': 95}, {'id': 149, 'image_count': 17}, {'id': 150, 'image_count': 1567}, {'id': 151, 'image_count': 2}, {'id': 152, 'image_count': 103}, {'id': 153, 'image_count': 50}, {'id': 154, 'image_count': 1309}, {'id': 155, 'image_count': 6}, {'id': 156, 'image_count': 92}, {'id': 157, 'image_count': 19}, {'id': 158, 'image_count': 37}, {'id': 159, 'image_count': 4}, {'id': 160, 'image_count': 709}, {'id': 161, 'image_count': 9}, {'id': 162, 'image_count': 82}, {'id': 163, 'image_count': 15}, {'id': 164, 'image_count': 3}, {'id': 165, 'image_count': 61}, {'id': 166, 'image_count': 51}, {'id': 167, 'image_count': 5}, {'id': 168, 'image_count': 13}, {'id': 169, 'image_count': 642}, {'id': 170, 'image_count': 24}, {'id': 171, 'image_count': 255}, {'id': 172, 'image_count': 9}, {'id': 173, 'image_count': 1808}, {'id': 174, 'image_count': 31}, {'id': 175, 'image_count': 158}, {'id': 176, 'image_count': 80}, {'id': 177, 'image_count': 1884}, {'id': 178, 'image_count': 158}, {'id': 179, 'image_count': 2}, {'id': 180, 'image_count': 12}, {'id': 181, 'image_count': 1659}, {'id': 182, 'image_count': 7}, {'id': 183, 'image_count': 834}, {'id': 184, 'image_count': 57}, {'id': 185, 'image_count': 174}, {'id': 186, 'image_count': 95}, {'id': 187, 'image_count': 27}, {'id': 188, 'image_count': 22}, {'id': 189, 'image_count': 1391}, {'id': 190, 'image_count': 90}, {'id': 191, 'image_count': 40}, {'id': 192, 'image_count': 445}, {'id': 193, 'image_count': 21}, {'id': 194, 'image_count': 1132}, {'id': 195, 'image_count': 177}, {'id': 196, 'image_count': 4}, {'id': 197, 'image_count': 17}, {'id': 198, 'image_count': 84}, {'id': 199, 'image_count': 55}, {'id': 200, 'image_count': 30}, {'id': 201, 'image_count': 25}, {'id': 202, 'image_count': 2}, {'id': 203, 'image_count': 125}, {'id': 204, 'image_count': 1135}, {'id': 205, 'image_count': 19}, {'id': 206, 'image_count': 72}, {'id': 207, 'image_count': 1926}, {'id': 208, 'image_count': 159}, {'id': 209, 'image_count': 7}, {'id': 210, 'image_count': 1}, {'id': 211, 'image_count': 13}, {'id': 212, 'image_count': 35}, {'id': 213, 'image_count': 18}, {'id': 214, 'image_count': 8}, {'id': 215, 'image_count': 6}, {'id': 216, 'image_count': 35}, {'id': 217, 'image_count': 1222}, {'id': 218, 'image_count': 103}, {'id': 219, 'image_count': 28}, {'id': 220, 'image_count': 63}, {'id': 221, 'image_count': 28}, {'id': 222, 'image_count': 5}, {'id': 223, 'image_count': 7}, {'id': 224, 'image_count': 14}, {'id': 225, 'image_count': 1918}, {'id': 226, 'image_count': 133}, {'id': 227, 'image_count': 16}, {'id': 228, 'image_count': 27}, {'id': 229, 'image_count': 110}, {'id': 230, 'image_count': 1895}, {'id': 231, 'image_count': 4}, {'id': 232, 'image_count': 1927}, {'id': 233, 'image_count': 8}, {'id': 234, 'image_count': 1}, {'id': 235, 'image_count': 263}, {'id': 236, 'image_count': 10}, {'id': 237, 'image_count': 2}, {'id': 238, 'image_count': 3}, {'id': 239, 'image_count': 87}, {'id': 240, 'image_count': 9}, {'id': 241, 'image_count': 71}, {'id': 242, 'image_count': 13}, {'id': 243, 'image_count': 18}, {'id': 244, 'image_count': 2}, {'id': 245, 'image_count': 5}, {'id': 246, 'image_count': 45}, {'id': 247, 'image_count': 1}, {'id': 248, 'image_count': 23}, {'id': 249, 'image_count': 32}, {'id': 250, 'image_count': 4}, {'id': 251, 'image_count': 1}, {'id': 252, 'image_count': 858}, {'id': 253, 'image_count': 661}, {'id': 254, 'image_count': 168}, {'id': 255, 'image_count': 210}, {'id': 256, 'image_count': 65}, {'id': 257, 'image_count': 4}, {'id': 258, 'image_count': 2}, {'id': 259, 'image_count': 159}, {'id': 260, 'image_count': 31}, {'id': 261, 'image_count': 811}, {'id': 262, 'image_count': 1}, {'id': 263, 'image_count': 42}, {'id': 264, 'image_count': 27}, {'id': 265, 'image_count': 2}, {'id': 266, 'image_count': 5}, {'id': 267, 'image_count': 95}, {'id': 268, 'image_count': 32}, {'id': 269, 'image_count': 1}, {'id': 270, 'image_count': 1}, {'id': 271, 'image_count': 1844}, {'id': 272, 'image_count': 897}, {'id': 273, 'image_count': 31}, {'id': 274, 'image_count': 23}, {'id': 275, 'image_count': 1}, {'id': 276, 'image_count': 202}, {'id': 277, 'image_count': 746}, {'id': 278, 'image_count': 44}, {'id': 279, 'image_count': 14}, {'id': 280, 'image_count': 26}, {'id': 281, 'image_count': 1}, {'id': 282, 'image_count': 2}, {'id': 283, 'image_count': 25}, {'id': 284, 'image_count': 238}, {'id': 285, 'image_count': 592}, {'id': 286, 'image_count': 26}, {'id': 287, 'image_count': 5}, {'id': 288, 'image_count': 42}, {'id': 289, 'image_count': 13}, {'id': 290, 'image_count': 46}, {'id': 291, 'image_count': 1}, {'id': 292, 'image_count': 8}, {'id': 293, 'image_count': 34}, {'id': 294, 'image_count': 5}, {'id': 295, 'image_count': 1}, {'id': 296, 'image_count': 1871}, {'id': 297, 'image_count': 717}, {'id': 298, 'image_count': 1010}, {'id': 299, 'image_count': 679}, {'id': 300, 'image_count': 3}, {'id': 301, 'image_count': 4}, {'id': 302, 'image_count': 1}, {'id': 303, 'image_count': 166}, {'id': 304, 'image_count': 2}, {'id': 305, 'image_count': 266}, {'id': 306, 'image_count': 101}, {'id': 307, 'image_count': 6}, {'id': 308, 'image_count': 14}, {'id': 309, 'image_count': 133}, {'id': 310, 'image_count': 2}, {'id': 311, 'image_count': 38}, {'id': 312, 'image_count': 95}, {'id': 313, 'image_count': 1}, {'id': 314, 'image_count': 12}, {'id': 315, 'image_count': 49}, {'id': 316, 'image_count': 5}, {'id': 317, 'image_count': 5}, {'id': 318, 'image_count': 16}, {'id': 319, 'image_count': 216}, {'id': 320, 'image_count': 12}, {'id': 321, 'image_count': 1}, {'id': 322, 'image_count': 54}, {'id': 323, 'image_count': 5}, {'id': 324, 'image_count': 245}, {'id': 325, 'image_count': 12}, {'id': 326, 'image_count': 7}, {'id': 327, 'image_count': 35}, {'id': 328, 'image_count': 36}, {'id': 329, 'image_count': 32}, {'id': 330, 'image_count': 1027}, {'id': 331, 'image_count': 10}, {'id': 332, 'image_count': 12}, {'id': 333, 'image_count': 1}, {'id': 334, 'image_count': 67}, {'id': 335, 'image_count': 71}, {'id': 336, 'image_count': 30}, {'id': 337, 'image_count': 48}, {'id': 338, 'image_count': 249}, {'id': 339, 'image_count': 13}, {'id': 340, 'image_count': 29}, {'id': 341, 'image_count': 14}, {'id': 342, 'image_count': 236}, {'id': 343, 'image_count': 15}, {'id': 344, 'image_count': 1521}, {'id': 345, 'image_count': 25}, {'id': 346, 'image_count': 249}, {'id': 347, 'image_count': 139}, {'id': 348, 'image_count': 2}, {'id': 349, 'image_count': 2}, {'id': 350, 'image_count': 1890}, {'id': 351, 'image_count': 1240}, {'id': 352, 'image_count': 1}, {'id': 353, 'image_count': 9}, {'id': 354, 'image_count': 1}, {'id': 355, 'image_count': 3}, {'id': 356, 'image_count': 11}, {'id': 357, 'image_count': 4}, {'id': 358, 'image_count': 236}, {'id': 359, 'image_count': 44}, {'id': 360, 'image_count': 19}, {'id': 361, 'image_count': 1100}, {'id': 362, 'image_count': 7}, {'id': 363, 'image_count': 69}, {'id': 364, 'image_count': 2}, {'id': 365, 'image_count': 8}, {'id': 366, 'image_count': 5}, {'id': 367, 'image_count': 227}, {'id': 368, 'image_count': 6}, {'id': 369, 'image_count': 106}, {'id': 370, 'image_count': 81}, {'id': 371, 'image_count': 17}, {'id': 372, 'image_count': 134}, {'id': 373, 'image_count': 312}, {'id': 374, 'image_count': 8}, {'id': 375, 'image_count': 271}, {'id': 376, 'image_count': 2}, {'id': 377, 'image_count': 103}, {'id': 378, 'image_count': 1938}, {'id': 379, 'image_count': 574}, {'id': 380, 'image_count': 120}, {'id': 381, 'image_count': 2}, {'id': 382, 'image_count': 2}, {'id': 383, 'image_count': 13}, {'id': 384, 'image_count': 29}, {'id': 385, 'image_count': 1710}, {'id': 386, 'image_count': 66}, {'id': 387, 'image_count': 1008}, {'id': 388, 'image_count': 1}, {'id': 389, 'image_count': 3}, {'id': 390, 'image_count': 1942}, {'id': 391, 'image_count': 19}, {'id': 392, 'image_count': 1488}, {'id': 393, 'image_count': 46}, {'id': 394, 'image_count': 106}, {'id': 395, 'image_count': 115}, {'id': 396, 'image_count': 19}, {'id': 397, 'image_count': 2}, {'id': 398, 'image_count': 1}, {'id': 399, 'image_count': 28}, {'id': 400, 'image_count': 9}, {'id': 401, 'image_count': 192}, {'id': 402, 'image_count': 12}, {'id': 403, 'image_count': 21}, {'id': 404, 'image_count': 247}, {'id': 405, 'image_count': 6}, {'id': 406, 'image_count': 64}, {'id': 407, 'image_count': 7}, {'id': 408, 'image_count': 40}, {'id': 409, 'image_count': 542}, {'id': 410, 'image_count': 2}, {'id': 411, 'image_count': 1898}, {'id': 412, 'image_count': 36}, {'id': 413, 'image_count': 4}, {'id': 414, 'image_count': 1}, {'id': 415, 'image_count': 191}, {'id': 416, 'image_count': 6}, {'id': 417, 'image_count': 41}, {'id': 418, 'image_count': 39}, {'id': 419, 'image_count': 46}, {'id': 420, 'image_count': 1}, {'id': 421, 'image_count': 1451}, {'id': 422, 'image_count': 1878}, {'id': 423, 'image_count': 11}, {'id': 424, 'image_count': 82}, {'id': 425, 'image_count': 18}, {'id': 426, 'image_count': 1}, {'id': 427, 'image_count': 7}, {'id': 428, 'image_count': 3}, {'id': 429, 'image_count': 575}, {'id': 430, 'image_count': 1907}, {'id': 431, 'image_count': 8}, {'id': 432, 'image_count': 4}, {'id': 433, 'image_count': 32}, {'id': 434, 'image_count': 11}, {'id': 435, 'image_count': 4}, {'id': 436, 'image_count': 54}, {'id': 437, 'image_count': 202}, {'id': 438, 'image_count': 32}, {'id': 439, 'image_count': 3}, {'id': 440, 'image_count': 130}, {'id': 441, 'image_count': 119}, {'id': 442, 'image_count': 141}, {'id': 443, 'image_count': 29}, {'id': 444, 'image_count': 525}, {'id': 445, 'image_count': 1323}, {'id': 446, 'image_count': 2}, {'id': 447, 'image_count': 113}, {'id': 448, 'image_count': 16}, {'id': 449, 'image_count': 7}, {'id': 450, 'image_count': 35}, {'id': 451, 'image_count': 1908}, {'id': 452, 'image_count': 353}, {'id': 453, 'image_count': 18}, {'id': 454, 'image_count': 14}, {'id': 455, 'image_count': 77}, {'id': 456, 'image_count': 8}, {'id': 457, 'image_count': 37}, {'id': 458, 'image_count': 1}, {'id': 459, 'image_count': 346}, {'id': 460, 'image_count': 19}, {'id': 461, 'image_count': 1779}, {'id': 462, 'image_count': 23}, {'id': 463, 'image_count': 25}, {'id': 464, 'image_count': 67}, {'id': 465, 'image_count': 19}, {'id': 466, 'image_count': 28}, {'id': 467, 'image_count': 4}, {'id': 468, 'image_count': 27}, {'id': 469, 'image_count': 1861}, {'id': 470, 'image_count': 11}, {'id': 471, 'image_count': 13}, {'id': 472, 'image_count': 13}, {'id': 473, 'image_count': 32}, {'id': 474, 'image_count': 1767}, {'id': 475, 'image_count': 42}, {'id': 476, 'image_count': 17}, {'id': 477, 'image_count': 128}, {'id': 478, 'image_count': 1}, {'id': 479, 'image_count': 9}, {'id': 480, 'image_count': 10}, {'id': 481, 'image_count': 4}, {'id': 482, 'image_count': 9}, {'id': 483, 'image_count': 18}, {'id': 484, 'image_count': 41}, {'id': 485, 'image_count': 28}, {'id': 486, 'image_count': 3}, {'id': 487, 'image_count': 65}, {'id': 488, 'image_count': 9}, {'id': 489, 'image_count': 23}, {'id': 490, 'image_count': 24}, {'id': 491, 'image_count': 1}, {'id': 492, 'image_count': 2}, {'id': 493, 'image_count': 59}, {'id': 494, 'image_count': 48}, {'id': 495, 'image_count': 17}, {'id': 496, 'image_count': 1877}, {'id': 497, 'image_count': 18}, {'id': 498, 'image_count': 1920}, {'id': 499, 'image_count': 50}, {'id': 500, 'image_count': 1890}, {'id': 501, 'image_count': 99}, {'id': 502, 'image_count': 1530}, {'id': 503, 'image_count': 3}, {'id': 504, 'image_count': 11}, {'id': 505, 'image_count': 19}, {'id': 506, 'image_count': 3}, {'id': 507, 'image_count': 63}, {'id': 508, 'image_count': 5}, {'id': 509, 'image_count': 6}, {'id': 510, 'image_count': 233}, {'id': 511, 'image_count': 54}, {'id': 512, 'image_count': 36}, {'id': 513, 'image_count': 10}, {'id': 514, 'image_count': 124}, {'id': 515, 'image_count': 101}, {'id': 516, 'image_count': 3}, {'id': 517, 'image_count': 363}, {'id': 518, 'image_count': 3}, {'id': 519, 'image_count': 30}, {'id': 520, 'image_count': 18}, {'id': 521, 'image_count': 199}, {'id': 522, 'image_count': 97}, {'id': 523, 'image_count': 32}, {'id': 524, 'image_count': 121}, {'id': 525, 'image_count': 16}, {'id': 526, 'image_count': 12}, {'id': 527, 'image_count': 2}, {'id': 528, 'image_count': 214}, {'id': 529, 'image_count': 48}, {'id': 530, 'image_count': 26}, {'id': 531, 'image_count': 13}, {'id': 532, 'image_count': 4}, {'id': 533, 'image_count': 11}, {'id': 534, 'image_count': 123}, {'id': 535, 'image_count': 7}, {'id': 536, 'image_count': 200}, {'id': 537, 'image_count': 91}, {'id': 538, 'image_count': 9}, {'id': 539, 'image_count': 72}, {'id': 540, 'image_count': 1886}, {'id': 541, 'image_count': 4}, {'id': 542, 'image_count': 1}, {'id': 543, 'image_count': 1}, {'id': 544, 'image_count': 1932}, {'id': 545, 'image_count': 4}, {'id': 546, 'image_count': 56}, {'id': 547, 'image_count': 854}, {'id': 548, 'image_count': 755}, {'id': 549, 'image_count': 1843}, {'id': 550, 'image_count': 96}, {'id': 551, 'image_count': 7}, {'id': 552, 'image_count': 74}, {'id': 553, 'image_count': 66}, {'id': 554, 'image_count': 57}, {'id': 555, 'image_count': 44}, {'id': 556, 'image_count': 1905}, {'id': 557, 'image_count': 4}, {'id': 558, 'image_count': 90}, {'id': 559, 'image_count': 1635}, {'id': 560, 'image_count': 8}, {'id': 561, 'image_count': 5}, {'id': 562, 'image_count': 50}, {'id': 563, 'image_count': 545}, {'id': 564, 'image_count': 20}, {'id': 565, 'image_count': 193}, {'id': 566, 'image_count': 285}, {'id': 567, 'image_count': 3}, {'id': 568, 'image_count': 1}, {'id': 569, 'image_count': 1904}, {'id': 570, 'image_count': 294}, {'id': 571, 'image_count': 3}, {'id': 572, 'image_count': 5}, {'id': 573, 'image_count': 24}, {'id': 574, 'image_count': 2}, {'id': 575, 'image_count': 2}, {'id': 576, 'image_count': 16}, {'id': 577, 'image_count': 8}, {'id': 578, 'image_count': 154}, {'id': 579, 'image_count': 66}, {'id': 580, 'image_count': 1}, {'id': 581, 'image_count': 24}, {'id': 582, 'image_count': 1}, {'id': 583, 'image_count': 4}, {'id': 584, 'image_count': 75}, {'id': 585, 'image_count': 6}, {'id': 586, 'image_count': 126}, {'id': 587, 'image_count': 24}, {'id': 588, 'image_count': 22}, {'id': 589, 'image_count': 1872}, {'id': 590, 'image_count': 16}, {'id': 591, 'image_count': 423}, {'id': 592, 'image_count': 1927}, {'id': 593, 'image_count': 38}, {'id': 594, 'image_count': 3}, {'id': 595, 'image_count': 1945}, {'id': 596, 'image_count': 35}, {'id': 597, 'image_count': 1}, {'id': 598, 'image_count': 13}, {'id': 599, 'image_count': 9}, {'id': 600, 'image_count': 14}, {'id': 601, 'image_count': 37}, {'id': 602, 'image_count': 3}, {'id': 603, 'image_count': 4}, {'id': 604, 'image_count': 100}, {'id': 605, 'image_count': 195}, {'id': 606, 'image_count': 1}, {'id': 607, 'image_count': 12}, {'id': 608, 'image_count': 24}, {'id': 609, 'image_count': 489}, {'id': 610, 'image_count': 10}, {'id': 611, 'image_count': 1689}, {'id': 612, 'image_count': 42}, {'id': 613, 'image_count': 81}, {'id': 614, 'image_count': 894}, {'id': 615, 'image_count': 1868}, {'id': 616, 'image_count': 7}, {'id': 617, 'image_count': 1567}, {'id': 618, 'image_count': 10}, {'id': 619, 'image_count': 8}, {'id': 620, 'image_count': 7}, {'id': 621, 'image_count': 629}, {'id': 622, 'image_count': 89}, {'id': 623, 'image_count': 15}, {'id': 624, 'image_count': 134}, {'id': 625, 'image_count': 4}, {'id': 626, 'image_count': 1802}, {'id': 627, 'image_count': 595}, {'id': 628, 'image_count': 1210}, {'id': 629, 'image_count': 48}, {'id': 630, 'image_count': 418}, {'id': 631, 'image_count': 1846}, {'id': 632, 'image_count': 5}, {'id': 633, 'image_count': 221}, {'id': 634, 'image_count': 10}, {'id': 635, 'image_count': 7}, {'id': 636, 'image_count': 76}, {'id': 637, 'image_count': 22}, {'id': 638, 'image_count': 10}, {'id': 639, 'image_count': 341}, {'id': 640, 'image_count': 1}, {'id': 641, 'image_count': 705}, {'id': 642, 'image_count': 1900}, {'id': 643, 'image_count': 188}, {'id': 644, 'image_count': 227}, {'id': 645, 'image_count': 861}, {'id': 646, 'image_count': 6}, {'id': 647, 'image_count': 115}, {'id': 648, 'image_count': 5}, {'id': 649, 'image_count': 43}, {'id': 650, 'image_count': 14}, {'id': 651, 'image_count': 6}, {'id': 652, 'image_count': 15}, {'id': 653, 'image_count': 1167}, {'id': 654, 'image_count': 15}, {'id': 655, 'image_count': 994}, {'id': 656, 'image_count': 28}, {'id': 657, 'image_count': 2}, {'id': 658, 'image_count': 338}, {'id': 659, 'image_count': 334}, {'id': 660, 'image_count': 15}, {'id': 661, 'image_count': 102}, {'id': 662, 'image_count': 1}, {'id': 663, 'image_count': 8}, {'id': 664, 'image_count': 1}, {'id': 665, 'image_count': 1}, {'id': 666, 'image_count': 28}, {'id': 667, 'image_count': 91}, {'id': 668, 'image_count': 260}, {'id': 669, 'image_count': 131}, {'id': 670, 'image_count': 128}, {'id': 671, 'image_count': 3}, {'id': 672, 'image_count': 10}, {'id': 673, 'image_count': 39}, {'id': 674, 'image_count': 2}, {'id': 675, 'image_count': 925}, {'id': 676, 'image_count': 354}, {'id': 677, 'image_count': 31}, {'id': 678, 'image_count': 10}, {'id': 679, 'image_count': 215}, {'id': 680, 'image_count': 71}, {'id': 681, 'image_count': 43}, {'id': 682, 'image_count': 28}, {'id': 683, 'image_count': 34}, {'id': 684, 'image_count': 16}, {'id': 685, 'image_count': 273}, {'id': 686, 'image_count': 2}, {'id': 687, 'image_count': 999}, {'id': 688, 'image_count': 4}, {'id': 689, 'image_count': 107}, {'id': 690, 'image_count': 2}, {'id': 691, 'image_count': 1}, {'id': 692, 'image_count': 454}, {'id': 693, 'image_count': 9}, {'id': 694, 'image_count': 1901}, {'id': 695, 'image_count': 61}, {'id': 696, 'image_count': 91}, {'id': 697, 'image_count': 46}, {'id': 698, 'image_count': 1402}, {'id': 699, 'image_count': 74}, {'id': 700, 'image_count': 421}, {'id': 701, 'image_count': 226}, {'id': 702, 'image_count': 10}, {'id': 703, 'image_count': 1720}, {'id': 704, 'image_count': 261}, {'id': 705, 'image_count': 1337}, {'id': 706, 'image_count': 293}, {'id': 707, 'image_count': 62}, {'id': 708, 'image_count': 814}, {'id': 709, 'image_count': 407}, {'id': 710, 'image_count': 6}, {'id': 711, 'image_count': 16}, {'id': 712, 'image_count': 7}, {'id': 713, 'image_count': 1791}, {'id': 714, 'image_count': 2}, {'id': 715, 'image_count': 1915}, {'id': 716, 'image_count': 1940}, {'id': 717, 'image_count': 13}, {'id': 718, 'image_count': 16}, {'id': 719, 'image_count': 448}, {'id': 720, 'image_count': 12}, {'id': 721, 'image_count': 18}, {'id': 722, 'image_count': 4}, {'id': 723, 'image_count': 71}, {'id': 724, 'image_count': 189}, {'id': 725, 'image_count': 74}, {'id': 726, 'image_count': 103}, {'id': 727, 'image_count': 3}, {'id': 728, 'image_count': 110}, {'id': 729, 'image_count': 5}, {'id': 730, 'image_count': 9}, {'id': 731, 'image_count': 15}, {'id': 732, 'image_count': 25}, {'id': 733, 'image_count': 7}, {'id': 734, 'image_count': 647}, {'id': 735, 'image_count': 824}, {'id': 736, 'image_count': 100}, {'id': 737, 'image_count': 47}, {'id': 738, 'image_count': 121}, {'id': 739, 'image_count': 731}, {'id': 740, 'image_count': 73}, {'id': 741, 'image_count': 49}, {'id': 742, 'image_count': 23}, {'id': 743, 'image_count': 4}, {'id': 744, 'image_count': 62}, {'id': 745, 'image_count': 118}, {'id': 746, 'image_count': 99}, {'id': 747, 'image_count': 40}, {'id': 748, 'image_count': 1036}, {'id': 749, 'image_count': 105}, {'id': 750, 'image_count': 21}, {'id': 751, 'image_count': 229}, {'id': 752, 'image_count': 7}, {'id': 753, 'image_count': 72}, {'id': 754, 'image_count': 9}, {'id': 755, 'image_count': 10}, {'id': 756, 'image_count': 328}, {'id': 757, 'image_count': 468}, {'id': 758, 'image_count': 1}, {'id': 759, 'image_count': 2}, {'id': 760, 'image_count': 24}, {'id': 761, 'image_count': 11}, {'id': 762, 'image_count': 72}, {'id': 763, 'image_count': 17}, {'id': 764, 'image_count': 10}, {'id': 765, 'image_count': 17}, {'id': 766, 'image_count': 489}, {'id': 767, 'image_count': 47}, {'id': 768, 'image_count': 93}, {'id': 769, 'image_count': 1}, {'id': 770, 'image_count': 12}, {'id': 771, 'image_count': 228}, {'id': 772, 'image_count': 5}, {'id': 773, 'image_count': 76}, {'id': 774, 'image_count': 71}, {'id': 775, 'image_count': 30}, {'id': 776, 'image_count': 109}, {'id': 777, 'image_count': 14}, {'id': 778, 'image_count': 1}, {'id': 779, 'image_count': 8}, {'id': 780, 'image_count': 26}, {'id': 781, 'image_count': 339}, {'id': 782, 'image_count': 153}, {'id': 783, 'image_count': 2}, {'id': 784, 'image_count': 3}, {'id': 785, 'image_count': 8}, {'id': 786, 'image_count': 47}, {'id': 787, 'image_count': 8}, {'id': 788, 'image_count': 6}, {'id': 789, 'image_count': 116}, {'id': 790, 'image_count': 69}, {'id': 791, 'image_count': 13}, {'id': 792, 'image_count': 6}, {'id': 793, 'image_count': 1928}, {'id': 794, 'image_count': 79}, {'id': 795, 'image_count': 14}, {'id': 796, 'image_count': 7}, {'id': 797, 'image_count': 20}, {'id': 798, 'image_count': 114}, {'id': 799, 'image_count': 221}, {'id': 800, 'image_count': 502}, {'id': 801, 'image_count': 62}, {'id': 802, 'image_count': 87}, {'id': 803, 'image_count': 4}, {'id': 804, 'image_count': 1912}, {'id': 805, 'image_count': 7}, {'id': 806, 'image_count': 186}, {'id': 807, 'image_count': 18}, {'id': 808, 'image_count': 4}, {'id': 809, 'image_count': 3}, {'id': 810, 'image_count': 7}, {'id': 811, 'image_count': 1413}, {'id': 812, 'image_count': 7}, {'id': 813, 'image_count': 12}, {'id': 814, 'image_count': 248}, {'id': 815, 'image_count': 4}, {'id': 816, 'image_count': 1881}, {'id': 817, 'image_count': 529}, {'id': 818, 'image_count': 1932}, {'id': 819, 'image_count': 50}, {'id': 820, 'image_count': 3}, {'id': 821, 'image_count': 28}, {'id': 822, 'image_count': 10}, {'id': 823, 'image_count': 5}, {'id': 824, 'image_count': 5}, {'id': 825, 'image_count': 18}, {'id': 826, 'image_count': 14}, {'id': 827, 'image_count': 1890}, {'id': 828, 'image_count': 660}, {'id': 829, 'image_count': 8}, {'id': 830, 'image_count': 25}, {'id': 831, 'image_count': 10}, {'id': 832, 'image_count': 218}, {'id': 833, 'image_count': 36}, {'id': 834, 'image_count': 16}, {'id': 835, 'image_count': 808}, {'id': 836, 'image_count': 479}, {'id': 837, 'image_count': 1404}, {'id': 838, 'image_count': 307}, {'id': 839, 'image_count': 57}, {'id': 840, 'image_count': 28}, {'id': 841, 'image_count': 80}, {'id': 842, 'image_count': 11}, {'id': 843, 'image_count': 92}, {'id': 844, 'image_count': 20}, {'id': 845, 'image_count': 194}, {'id': 846, 'image_count': 23}, {'id': 847, 'image_count': 52}, {'id': 848, 'image_count': 673}, {'id': 849, 'image_count': 2}, {'id': 850, 'image_count': 2}, {'id': 851, 'image_count': 1}, {'id': 852, 'image_count': 2}, {'id': 853, 'image_count': 8}, {'id': 854, 'image_count': 80}, {'id': 855, 'image_count': 3}, {'id': 856, 'image_count': 3}, {'id': 857, 'image_count': 15}, {'id': 858, 'image_count': 2}, {'id': 859, 'image_count': 10}, {'id': 860, 'image_count': 386}, {'id': 861, 'image_count': 65}, {'id': 862, 'image_count': 3}, {'id': 863, 'image_count': 35}, {'id': 864, 'image_count': 5}, {'id': 865, 'image_count': 180}, {'id': 866, 'image_count': 99}, {'id': 867, 'image_count': 49}, {'id': 868, 'image_count': 28}, {'id': 869, 'image_count': 1}, {'id': 870, 'image_count': 52}, {'id': 871, 'image_count': 36}, {'id': 872, 'image_count': 70}, {'id': 873, 'image_count': 6}, {'id': 874, 'image_count': 29}, {'id': 875, 'image_count': 24}, {'id': 876, 'image_count': 1115}, {'id': 877, 'image_count': 61}, {'id': 878, 'image_count': 18}, {'id': 879, 'image_count': 18}, {'id': 880, 'image_count': 665}, {'id': 881, 'image_count': 1096}, {'id': 882, 'image_count': 29}, {'id': 883, 'image_count': 8}, {'id': 884, 'image_count': 14}, {'id': 885, 'image_count': 1622}, {'id': 886, 'image_count': 2}, {'id': 887, 'image_count': 3}, {'id': 888, 'image_count': 32}, {'id': 889, 'image_count': 55}, {'id': 890, 'image_count': 1}, {'id': 891, 'image_count': 10}, {'id': 892, 'image_count': 10}, {'id': 893, 'image_count': 47}, {'id': 894, 'image_count': 3}, {'id': 895, 'image_count': 29}, {'id': 896, 'image_count': 342}, {'id': 897, 'image_count': 25}, {'id': 898, 'image_count': 1469}, {'id': 899, 'image_count': 521}, {'id': 900, 'image_count': 347}, {'id': 901, 'image_count': 35}, {'id': 902, 'image_count': 7}, {'id': 903, 'image_count': 207}, {'id': 904, 'image_count': 108}, {'id': 905, 'image_count': 2}, {'id': 906, 'image_count': 34}, {'id': 907, 'image_count': 12}, {'id': 908, 'image_count': 10}, {'id': 909, 'image_count': 13}, {'id': 910, 'image_count': 361}, {'id': 911, 'image_count': 1023}, {'id': 912, 'image_count': 782}, {'id': 913, 'image_count': 2}, {'id': 914, 'image_count': 5}, {'id': 915, 'image_count': 247}, {'id': 916, 'image_count': 221}, {'id': 917, 'image_count': 4}, {'id': 918, 'image_count': 8}, {'id': 919, 'image_count': 158}, {'id': 920, 'image_count': 3}, {'id': 921, 'image_count': 752}, {'id': 922, 'image_count': 64}, {'id': 923, 'image_count': 707}, {'id': 924, 'image_count': 143}, {'id': 925, 'image_count': 1}, {'id': 926, 'image_count': 49}, {'id': 927, 'image_count': 126}, {'id': 928, 'image_count': 76}, {'id': 929, 'image_count': 11}, {'id': 930, 'image_count': 11}, {'id': 931, 'image_count': 4}, {'id': 932, 'image_count': 39}, {'id': 933, 'image_count': 11}, {'id': 934, 'image_count': 13}, {'id': 935, 'image_count': 91}, {'id': 936, 'image_count': 14}, {'id': 937, 'image_count': 5}, {'id': 938, 'image_count': 3}, {'id': 939, 'image_count': 10}, {'id': 940, 'image_count': 18}, {'id': 941, 'image_count': 9}, {'id': 942, 'image_count': 6}, {'id': 943, 'image_count': 951}, {'id': 944, 'image_count': 2}, {'id': 945, 'image_count': 1}, {'id': 946, 'image_count': 19}, {'id': 947, 'image_count': 1942}, {'id': 948, 'image_count': 1916}, {'id': 949, 'image_count': 139}, {'id': 950, 'image_count': 43}, {'id': 951, 'image_count': 1969}, {'id': 952, 'image_count': 5}, {'id': 953, 'image_count': 134}, {'id': 954, 'image_count': 74}, {'id': 955, 'image_count': 381}, {'id': 956, 'image_count': 1}, {'id': 957, 'image_count': 381}, {'id': 958, 'image_count': 6}, {'id': 959, 'image_count': 1826}, {'id': 960, 'image_count': 28}, {'id': 961, 'image_count': 1635}, {'id': 962, 'image_count': 1967}, {'id': 963, 'image_count': 16}, {'id': 964, 'image_count': 1926}, {'id': 965, 'image_count': 1789}, {'id': 966, 'image_count': 401}, {'id': 967, 'image_count': 1968}, {'id': 968, 'image_count': 1167}, {'id': 969, 'image_count': 1}, {'id': 970, 'image_count': 56}, {'id': 971, 'image_count': 17}, {'id': 972, 'image_count': 1}, {'id': 973, 'image_count': 58}, {'id': 974, 'image_count': 9}, {'id': 975, 'image_count': 8}, {'id': 976, 'image_count': 1124}, {'id': 977, 'image_count': 31}, {'id': 978, 'image_count': 16}, {'id': 979, 'image_count': 491}, {'id': 980, 'image_count': 432}, {'id': 981, 'image_count': 1945}, {'id': 982, 'image_count': 1899}, {'id': 983, 'image_count': 5}, {'id': 984, 'image_count': 28}, {'id': 985, 'image_count': 7}, {'id': 986, 'image_count': 146}, {'id': 987, 'image_count': 1}, {'id': 988, 'image_count': 25}, {'id': 989, 'image_count': 22}, {'id': 990, 'image_count': 1}, {'id': 991, 'image_count': 10}, {'id': 992, 'image_count': 9}, {'id': 993, 'image_count': 308}, {'id': 994, 'image_count': 4}, {'id': 995, 'image_count': 1969}, {'id': 996, 'image_count': 45}, {'id': 997, 'image_count': 12}, {'id': 998, 'image_count': 1}, {'id': 999, 'image_count': 85}, {'id': 1000, 'image_count': 1127}, {'id': 1001, 'image_count': 11}, {'id': 1002, 'image_count': 60}, {'id': 1003, 'image_count': 1}, {'id': 1004, 'image_count': 16}, {'id': 1005, 'image_count': 1}, {'id': 1006, 'image_count': 65}, {'id': 1007, 'image_count': 13}, {'id': 1008, 'image_count': 655}, {'id': 1009, 'image_count': 51}, {'id': 1010, 'image_count': 1}, {'id': 1011, 'image_count': 673}, {'id': 1012, 'image_count': 5}, {'id': 1013, 'image_count': 36}, {'id': 1014, 'image_count': 54}, {'id': 1015, 'image_count': 5}, {'id': 1016, 'image_count': 8}, {'id': 1017, 'image_count': 305}, {'id': 1018, 'image_count': 297}, {'id': 1019, 'image_count': 1053}, {'id': 1020, 'image_count': 223}, {'id': 1021, 'image_count': 1037}, {'id': 1022, 'image_count': 63}, {'id': 1023, 'image_count': 1881}, {'id': 1024, 'image_count': 507}, {'id': 1025, 'image_count': 333}, {'id': 1026, 'image_count': 1911}, {'id': 1027, 'image_count': 1765}, {'id': 1028, 'image_count': 1}, {'id': 1029, 'image_count': 5}, {'id': 1030, 'image_count': 1}, {'id': 1031, 'image_count': 9}, {'id': 1032, 'image_count': 2}, {'id': 1033, 'image_count': 151}, {'id': 1034, 'image_count': 82}, {'id': 1035, 'image_count': 1931}, {'id': 1036, 'image_count': 41}, {'id': 1037, 'image_count': 1895}, {'id': 1038, 'image_count': 24}, {'id': 1039, 'image_count': 22}, {'id': 1040, 'image_count': 35}, {'id': 1041, 'image_count': 69}, {'id': 1042, 'image_count': 962}, {'id': 1043, 'image_count': 588}, {'id': 1044, 'image_count': 21}, {'id': 1045, 'image_count': 825}, {'id': 1046, 'image_count': 52}, {'id': 1047, 'image_count': 5}, {'id': 1048, 'image_count': 5}, {'id': 1049, 'image_count': 5}, {'id': 1050, 'image_count': 1860}, {'id': 1051, 'image_count': 56}, {'id': 1052, 'image_count': 1582}, {'id': 1053, 'image_count': 7}, {'id': 1054, 'image_count': 2}, {'id': 1055, 'image_count': 1562}, {'id': 1056, 'image_count': 1885}, {'id': 1057, 'image_count': 1}, {'id': 1058, 'image_count': 5}, {'id': 1059, 'image_count': 137}, {'id': 1060, 'image_count': 1094}, {'id': 1061, 'image_count': 134}, {'id': 1062, 'image_count': 29}, {'id': 1063, 'image_count': 22}, {'id': 1064, 'image_count': 522}, {'id': 1065, 'image_count': 50}, {'id': 1066, 'image_count': 68}, {'id': 1067, 'image_count': 16}, {'id': 1068, 'image_count': 40}, {'id': 1069, 'image_count': 35}, {'id': 1070, 'image_count': 135}, {'id': 1071, 'image_count': 1413}, {'id': 1072, 'image_count': 772}, {'id': 1073, 'image_count': 50}, {'id': 1074, 'image_count': 1015}, {'id': 1075, 'image_count': 1}, {'id': 1076, 'image_count': 65}, {'id': 1077, 'image_count': 1900}, {'id': 1078, 'image_count': 1302}, {'id': 1079, 'image_count': 1977}, {'id': 1080, 'image_count': 2}, {'id': 1081, 'image_count': 29}, {'id': 1082, 'image_count': 36}, {'id': 1083, 'image_count': 138}, {'id': 1084, 'image_count': 4}, {'id': 1085, 'image_count': 67}, {'id': 1086, 'image_count': 26}, {'id': 1087, 'image_count': 25}, {'id': 1088, 'image_count': 33}, {'id': 1089, 'image_count': 37}, {'id': 1090, 'image_count': 50}, {'id': 1091, 'image_count': 270}, {'id': 1092, 'image_count': 12}, {'id': 1093, 'image_count': 316}, {'id': 1094, 'image_count': 41}, {'id': 1095, 'image_count': 224}, {'id': 1096, 'image_count': 105}, {'id': 1097, 'image_count': 1925}, {'id': 1098, 'image_count': 1021}, {'id': 1099, 'image_count': 1213}, {'id': 1100, 'image_count': 172}, {'id': 1101, 'image_count': 28}, {'id': 1102, 'image_count': 745}, {'id': 1103, 'image_count': 187}, {'id': 1104, 'image_count': 147}, {'id': 1105, 'image_count': 136}, {'id': 1106, 'image_count': 34}, {'id': 1107, 'image_count': 41}, {'id': 1108, 'image_count': 636}, {'id': 1109, 'image_count': 570}, {'id': 1110, 'image_count': 1149}, {'id': 1111, 'image_count': 61}, {'id': 1112, 'image_count': 1890}, {'id': 1113, 'image_count': 18}, {'id': 1114, 'image_count': 143}, {'id': 1115, 'image_count': 1517}, {'id': 1116, 'image_count': 7}, {'id': 1117, 'image_count': 943}, {'id': 1118, 'image_count': 6}, {'id': 1119, 'image_count': 1}, {'id': 1120, 'image_count': 11}, {'id': 1121, 'image_count': 101}, {'id': 1122, 'image_count': 1909}, {'id': 1123, 'image_count': 800}, {'id': 1124, 'image_count': 1}, {'id': 1125, 'image_count': 44}, {'id': 1126, 'image_count': 3}, {'id': 1127, 'image_count': 44}, {'id': 1128, 'image_count': 31}, {'id': 1129, 'image_count': 7}, {'id': 1130, 'image_count': 20}, {'id': 1131, 'image_count': 11}, {'id': 1132, 'image_count': 13}, {'id': 1133, 'image_count': 1924}, {'id': 1134, 'image_count': 113}, {'id': 1135, 'image_count': 2}, {'id': 1136, 'image_count': 139}, {'id': 1137, 'image_count': 12}, {'id': 1138, 'image_count': 37}, {'id': 1139, 'image_count': 1866}, {'id': 1140, 'image_count': 47}, {'id': 1141, 'image_count': 1468}, {'id': 1142, 'image_count': 729}, {'id': 1143, 'image_count': 24}, {'id': 1144, 'image_count': 1}, {'id': 1145, 'image_count': 10}, {'id': 1146, 'image_count': 3}, {'id': 1147, 'image_count': 14}, {'id': 1148, 'image_count': 4}, {'id': 1149, 'image_count': 29}, {'id': 1150, 'image_count': 4}, {'id': 1151, 'image_count': 70}, {'id': 1152, 'image_count': 46}, {'id': 1153, 'image_count': 14}, {'id': 1154, 'image_count': 48}, {'id': 1155, 'image_count': 1855}, {'id': 1156, 'image_count': 113}, {'id': 1157, 'image_count': 1}, {'id': 1158, 'image_count': 1}, {'id': 1159, 'image_count': 10}, {'id': 1160, 'image_count': 54}, {'id': 1161, 'image_count': 1923}, {'id': 1162, 'image_count': 630}, {'id': 1163, 'image_count': 31}, {'id': 1164, 'image_count': 69}, {'id': 1165, 'image_count': 7}, {'id': 1166, 'image_count': 11}, {'id': 1167, 'image_count': 1}, {'id': 1168, 'image_count': 30}, {'id': 1169, 'image_count': 50}, {'id': 1170, 'image_count': 45}, {'id': 1171, 'image_count': 28}, {'id': 1172, 'image_count': 114}, {'id': 1173, 'image_count': 193}, {'id': 1174, 'image_count': 21}, {'id': 1175, 'image_count': 91}, {'id': 1176, 'image_count': 31}, {'id': 1177, 'image_count': 1469}, {'id': 1178, 'image_count': 1924}, {'id': 1179, 'image_count': 87}, {'id': 1180, 'image_count': 77}, {'id': 1181, 'image_count': 11}, {'id': 1182, 'image_count': 47}, {'id': 1183, 'image_count': 21}, {'id': 1184, 'image_count': 47}, {'id': 1185, 'image_count': 70}, {'id': 1186, 'image_count': 1838}, {'id': 1187, 'image_count': 19}, {'id': 1188, 'image_count': 531}, {'id': 1189, 'image_count': 11}, {'id': 1190, 'image_count': 941}, {'id': 1191, 'image_count': 113}, {'id': 1192, 'image_count': 26}, {'id': 1193, 'image_count': 5}, {'id': 1194, 'image_count': 56}, {'id': 1195, 'image_count': 73}, {'id': 1196, 'image_count': 32}, {'id': 1197, 'image_count': 128}, {'id': 1198, 'image_count': 623}, {'id': 1199, 'image_count': 12}, {'id': 1200, 'image_count': 52}, {'id': 1201, 'image_count': 11}, {'id': 1202, 'image_count': 1674}, {'id': 1203, 'image_count': 81}] # noqa +# fmt: on diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/pascal_voc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/pascal_voc.py new file mode 100644 index 0000000000000000000000000000000000000000..919cc4920394d3cb87ad5232adcbedc250e4db26 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/pascal_voc.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import numpy as np +import os +import xml.etree.ElementTree as ET +from typing import List, Tuple, Union + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.structures import BoxMode +from annotator.oneformer.detectron2.utils.file_io import PathManager + +__all__ = ["load_voc_instances", "register_pascal_voc"] + + +# fmt: off +CLASS_NAMES = ( + "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", + "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", + "pottedplant", "sheep", "sofa", "train", "tvmonitor" +) +# fmt: on + + +def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]): + """ + Load Pascal VOC detection annotations to Detectron2 format. + + Args: + dirname: Contain "Annotations", "ImageSets", "JPEGImages" + split (str): one of "train", "test", "val", "trainval" + class_names: list or tuple of class names + """ + with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: + fileids = np.loadtxt(f, dtype=np.str) + + # Needs to read many small annotation files. Makes sense at local + annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) + dicts = [] + for fileid in fileids: + anno_file = os.path.join(annotation_dirname, fileid + ".xml") + jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") + + with PathManager.open(anno_file) as f: + tree = ET.parse(f) + + r = { + "file_name": jpeg_file, + "image_id": fileid, + "height": int(tree.findall("./size/height")[0].text), + "width": int(tree.findall("./size/width")[0].text), + } + instances = [] + + for obj in tree.findall("object"): + cls = obj.find("name").text + # We include "difficult" samples in training. + # Based on limited experiments, they don't hurt accuracy. + # difficult = int(obj.find("difficult").text) + # if difficult == 1: + # continue + bbox = obj.find("bndbox") + bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] + # Original annotations are integers in the range [1, W or H] + # Assuming they mean 1-based pixel indices (inclusive), + # a box with annotation (xmin=1, xmax=W) covers the whole image. + # In coordinate space this is represented by (xmin=0, xmax=W) + bbox[0] -= 1.0 + bbox[1] -= 1.0 + instances.append( + {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} + ) + r["annotations"] = instances + dicts.append(r) + return dicts + + +def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES): + DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names)) + MetadataCatalog.get(name).set( + thing_classes=list(class_names), dirname=dirname, year=year, split=split + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/register_coco.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/register_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e564438d5bf016bcdbb65b4bbdc215d79f579f8a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/datasets/register_coco.py @@ -0,0 +1,3 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .coco import register_coco_instances # noqa +from .coco_panoptic import register_coco_panoptic_separated # noqa diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/detection_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/detection_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b00ca9126d22ecde050d0bb8501871b2cf8f13ff --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/detection_utils.py @@ -0,0 +1,659 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +Common data processing utilities that are used in a +typical object detection data pipeline. +""" +import logging +import numpy as np +from typing import List, Union +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from PIL import Image + +from annotator.oneformer.detectron2.structures import ( + BitMasks, + Boxes, + BoxMode, + Instances, + Keypoints, + PolygonMasks, + RotatedBoxes, + polygons_to_bitmask, +) +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from . import transforms as T +from .catalog import MetadataCatalog + +__all__ = [ + "SizeMismatchError", + "convert_image_to_rgb", + "check_image_size", + "transform_proposals", + "transform_instance_annotations", + "annotations_to_instances", + "annotations_to_instances_rotated", + "build_augmentation", + "build_transform_gen", + "create_keypoint_hflip_indices", + "filter_empty_instances", + "read_image", +] + + +class SizeMismatchError(ValueError): + """ + When loaded image has difference width/height compared with annotation. + """ + + +# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601 +_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] +_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] + +# https://www.exiv2.org/tags.html +_EXIF_ORIENT = 274 # exif 'Orientation' tag + + +def convert_PIL_to_numpy(image, format): + """ + Convert PIL image to numpy array of target format. + + Args: + image (PIL.Image): a PIL image + format (str): the format of output image + + Returns: + (np.ndarray): also see `read_image` + """ + if format is not None: + # PIL only supports RGB, so convert to RGB and flip channels over below + conversion_format = format + if format in ["BGR", "YUV-BT.601"]: + conversion_format = "RGB" + image = image.convert(conversion_format) + image = np.asarray(image) + # PIL squeezes out the channel dimension for "L", so make it HWC + if format == "L": + image = np.expand_dims(image, -1) + + # handle formats not supported by PIL + elif format == "BGR": + # flip channels if needed + image = image[:, :, ::-1] + elif format == "YUV-BT.601": + image = image / 255.0 + image = np.dot(image, np.array(_M_RGB2YUV).T) + + return image + + +def convert_image_to_rgb(image, format): + """ + Convert an image from given format to RGB. + + Args: + image (np.ndarray or Tensor): an HWC image + format (str): the format of input image, also see `read_image` + + Returns: + (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 + """ + if isinstance(image, torch.Tensor): + image = image.cpu().numpy() + if format == "BGR": + image = image[:, :, [2, 1, 0]] + elif format == "YUV-BT.601": + image = np.dot(image, np.array(_M_YUV2RGB).T) + image = image * 255.0 + else: + if format == "L": + image = image[:, :, 0] + image = image.astype(np.uint8) + image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) + return image + + +def _apply_exif_orientation(image): + """ + Applies the exif orientation correctly. + + This code exists per the bug: + https://github.com/python-pillow/Pillow/issues/3973 + with the function `ImageOps.exif_transpose`. The Pillow source raises errors with + various methods, especially `tobytes` + + Function based on: + https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 + https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 + + Args: + image (PIL.Image): a PIL image + + Returns: + (PIL.Image): the PIL image with exif orientation applied, if applicable + """ + if not hasattr(image, "getexif"): + return image + + try: + exif = image.getexif() + except Exception: # https://github.com/facebookresearch/detectron2/issues/1885 + exif = None + + if exif is None: + return image + + orientation = exif.get(_EXIF_ORIENT) + + method = { + 2: Image.FLIP_LEFT_RIGHT, + 3: Image.ROTATE_180, + 4: Image.FLIP_TOP_BOTTOM, + 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, + 7: Image.TRANSVERSE, + 8: Image.ROTATE_90, + }.get(orientation) + + if method is not None: + return image.transpose(method) + return image + + +def read_image(file_name, format=None): + """ + Read an image into the given format. + Will apply rotation and flipping if the image has such exif information. + + Args: + file_name (str): image file path + format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". + + Returns: + image (np.ndarray): + an HWC image in the given format, which is 0-255, uint8 for + supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. + """ + with PathManager.open(file_name, "rb") as f: + image = Image.open(f) + + # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 + image = _apply_exif_orientation(image) + return convert_PIL_to_numpy(image, format) + + +def check_image_size(dataset_dict, image): + """ + Raise an error if the image does not match the size specified in the dict. + """ + if "width" in dataset_dict or "height" in dataset_dict: + image_wh = (image.shape[1], image.shape[0]) + expected_wh = (dataset_dict["width"], dataset_dict["height"]) + if not image_wh == expected_wh: + raise SizeMismatchError( + "Mismatched image shape{}, got {}, expect {}.".format( + " for image " + dataset_dict["file_name"] + if "file_name" in dataset_dict + else "", + image_wh, + expected_wh, + ) + + " Please check the width/height in your annotation." + ) + + # To ensure bbox always remap to original image size + if "width" not in dataset_dict: + dataset_dict["width"] = image.shape[1] + if "height" not in dataset_dict: + dataset_dict["height"] = image.shape[0] + + +def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): + """ + Apply transformations to the proposals in dataset_dict, if any. + + Args: + dataset_dict (dict): a dict read from the dataset, possibly + contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" + image_shape (tuple): height, width + transforms (TransformList): + proposal_topk (int): only keep top-K scoring proposals + min_box_size (int): proposals with either side smaller than this + threshold are removed + + The input dict is modified in-place, with abovementioned keys removed. A new + key "proposals" will be added. Its value is an `Instances` + object which contains the transformed proposals in its field + "proposal_boxes" and "objectness_logits". + """ + if "proposal_boxes" in dataset_dict: + # Transform proposal boxes + boxes = transforms.apply_box( + BoxMode.convert( + dataset_dict.pop("proposal_boxes"), + dataset_dict.pop("proposal_bbox_mode"), + BoxMode.XYXY_ABS, + ) + ) + boxes = Boxes(boxes) + objectness_logits = torch.as_tensor( + dataset_dict.pop("proposal_objectness_logits").astype("float32") + ) + + boxes.clip(image_shape) + keep = boxes.nonempty(threshold=min_box_size) + boxes = boxes[keep] + objectness_logits = objectness_logits[keep] + + proposals = Instances(image_shape) + proposals.proposal_boxes = boxes[:proposal_topk] + proposals.objectness_logits = objectness_logits[:proposal_topk] + dataset_dict["proposals"] = proposals + + +def get_bbox(annotation): + """ + Get bbox from data + Args: + annotation (dict): dict of instance annotations for a single instance. + Returns: + bbox (ndarray): x1, y1, x2, y2 coordinates + """ + # bbox is 1d (per-instance bounding box) + bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) + return bbox + + +def transform_instance_annotations( + annotation, transforms, image_size, *, keypoint_hflip_indices=None +): + """ + Apply transforms to box, segmentation and keypoints annotations of a single instance. + + It will use `transforms.apply_box` for the box, and + `transforms.apply_coords` for segmentation polygons & keypoints. + If you need anything more specially designed for each data structure, + you'll need to implement your own version of this function or the transforms. + + Args: + annotation (dict): dict of instance annotations for a single instance. + It will be modified in-place. + transforms (TransformList or list[Transform]): + image_size (tuple): the height, width of the transformed image + keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. + + Returns: + dict: + the same input dict with fields "bbox", "segmentation", "keypoints" + transformed according to `transforms`. + The "bbox_mode" field will be set to XYXY_ABS. + """ + if isinstance(transforms, (tuple, list)): + transforms = T.TransformList(transforms) + # bbox is 1d (per-instance bounding box) + bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) + # clip transformed bbox to image size + bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) + annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) + annotation["bbox_mode"] = BoxMode.XYXY_ABS + + if "segmentation" in annotation: + # each instance contains 1 or more polygons + segm = annotation["segmentation"] + if isinstance(segm, list): + # polygons + polygons = [np.asarray(p).reshape(-1, 2) for p in segm] + annotation["segmentation"] = [ + p.reshape(-1) for p in transforms.apply_polygons(polygons) + ] + elif isinstance(segm, dict): + # RLE + mask = mask_util.decode(segm) + mask = transforms.apply_segmentation(mask) + assert tuple(mask.shape[:2]) == image_size + annotation["segmentation"] = mask + else: + raise ValueError( + "Cannot transform segmentation of type '{}'!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict.".format(type(segm)) + ) + + if "keypoints" in annotation: + keypoints = transform_keypoint_annotations( + annotation["keypoints"], transforms, image_size, keypoint_hflip_indices + ) + annotation["keypoints"] = keypoints + + return annotation + + +def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None): + """ + Transform keypoint annotations of an image. + If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0) + + Args: + keypoints (list[float]): Nx3 float in Detectron2's Dataset format. + Each point is represented by (x, y, visibility). + transforms (TransformList): + image_size (tuple): the height, width of the transformed image + keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. + When `transforms` includes horizontal flip, will use the index + mapping to flip keypoints. + """ + # (N*3,) -> (N, 3) + keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3) + keypoints_xy = transforms.apply_coords(keypoints[:, :2]) + + # Set all out-of-boundary points to "unlabeled" + inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1])) + inside = inside.all(axis=1) + keypoints[:, :2] = keypoints_xy + keypoints[:, 2][~inside] = 0 + + # This assumes that HorizFlipTransform is the only one that does flip + do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 + + # Alternative way: check if probe points was horizontally flipped. + # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]]) + # probe_aug = transforms.apply_coords(probe.copy()) + # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0]) # noqa + + # If flipped, swap each keypoint with its opposite-handed equivalent + if do_hflip: + if keypoint_hflip_indices is None: + raise ValueError("Cannot flip keypoints without providing flip indices!") + if len(keypoints) != len(keypoint_hflip_indices): + raise ValueError( + "Keypoint data has {} points, but metadata " + "contains {} points!".format(len(keypoints), len(keypoint_hflip_indices)) + ) + keypoints = keypoints[np.asarray(keypoint_hflip_indices, dtype=np.int32), :] + + # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0 + keypoints[keypoints[:, 2] == 0] = 0 + return keypoints + + +def annotations_to_instances(annos, image_size, mask_format="polygon"): + """ + Create an :class:`Instances` object used by the models, + from instance annotations in the dataset dict. + + Args: + annos (list[dict]): a list of instance annotations in one image, each + element for one instance. + image_size (tuple): height, width + + Returns: + Instances: + It will contain fields "gt_boxes", "gt_classes", + "gt_masks", "gt_keypoints", if they can be obtained from `annos`. + This is the format that builtin models expect. + """ + boxes = ( + np.stack( + [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] + ) + if len(annos) + else np.zeros((0, 4)) + ) + target = Instances(image_size) + target.gt_boxes = Boxes(boxes) + + classes = [int(obj["category_id"]) for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + target.gt_classes = classes + + if len(annos) and "segmentation" in annos[0]: + segms = [obj["segmentation"] for obj in annos] + if mask_format == "polygon": + try: + masks = PolygonMasks(segms) + except ValueError as e: + raise ValueError( + "Failed to use mask_format=='polygon' from the given annotations!" + ) from e + else: + assert mask_format == "bitmask", mask_format + masks = [] + for segm in segms: + if isinstance(segm, list): + # polygon + masks.append(polygons_to_bitmask(segm, *image_size)) + elif isinstance(segm, dict): + # COCO RLE + masks.append(mask_util.decode(segm)) + elif isinstance(segm, np.ndarray): + assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( + segm.ndim + ) + # mask array + masks.append(segm) + else: + raise ValueError( + "Cannot convert segmentation of type '{}' to BitMasks!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict, or a binary segmentation mask " + " in a 2D numpy array of shape HxW.".format(type(segm)) + ) + # torch.from_numpy does not support array with negative stride. + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) + ) + target.gt_masks = masks + + if len(annos) and "keypoints" in annos[0]: + kpts = [obj.get("keypoints", []) for obj in annos] + target.gt_keypoints = Keypoints(kpts) + + return target + + +def annotations_to_instances_rotated(annos, image_size): + """ + Create an :class:`Instances` object used by the models, + from instance annotations in the dataset dict. + Compared to `annotations_to_instances`, this function is for rotated boxes only + + Args: + annos (list[dict]): a list of instance annotations in one image, each + element for one instance. + image_size (tuple): height, width + + Returns: + Instances: + Containing fields "gt_boxes", "gt_classes", + if they can be obtained from `annos`. + This is the format that builtin models expect. + """ + boxes = [obj["bbox"] for obj in annos] + target = Instances(image_size) + boxes = target.gt_boxes = RotatedBoxes(boxes) + boxes.clip(image_size) + + classes = [obj["category_id"] for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + target.gt_classes = classes + + return target + + +def filter_empty_instances( + instances, by_box=True, by_mask=True, box_threshold=1e-5, return_mask=False +): + """ + Filter out empty instances in an `Instances` object. + + Args: + instances (Instances): + by_box (bool): whether to filter out instances with empty boxes + by_mask (bool): whether to filter out instances with empty masks + box_threshold (float): minimum width and height to be considered non-empty + return_mask (bool): whether to return boolean mask of filtered instances + + Returns: + Instances: the filtered instances. + tensor[bool], optional: boolean mask of filtered instances + """ + assert by_box or by_mask + r = [] + if by_box: + r.append(instances.gt_boxes.nonempty(threshold=box_threshold)) + if instances.has("gt_masks") and by_mask: + r.append(instances.gt_masks.nonempty()) + + # TODO: can also filter visible keypoints + + if not r: + return instances + m = r[0] + for x in r[1:]: + m = m & x + if return_mask: + return instances[m], m + return instances[m] + + +def create_keypoint_hflip_indices(dataset_names: Union[str, List[str]]) -> List[int]: + """ + Args: + dataset_names: list of dataset names + + Returns: + list[int]: a list of size=#keypoints, storing the + horizontally-flipped keypoint indices. + """ + if isinstance(dataset_names, str): + dataset_names = [dataset_names] + + check_metadata_consistency("keypoint_names", dataset_names) + check_metadata_consistency("keypoint_flip_map", dataset_names) + + meta = MetadataCatalog.get(dataset_names[0]) + names = meta.keypoint_names + # TODO flip -> hflip + flip_map = dict(meta.keypoint_flip_map) + flip_map.update({v: k for k, v in flip_map.items()}) + flipped_names = [i if i not in flip_map else flip_map[i] for i in names] + flip_indices = [names.index(i) for i in flipped_names] + return flip_indices + + +def get_fed_loss_cls_weights(dataset_names: Union[str, List[str]], freq_weight_power=1.0): + """ + Get frequency weight for each class sorted by class id. + We now calcualte freqency weight using image_count to the power freq_weight_power. + + Args: + dataset_names: list of dataset names + freq_weight_power: power value + """ + if isinstance(dataset_names, str): + dataset_names = [dataset_names] + + check_metadata_consistency("class_image_count", dataset_names) + + meta = MetadataCatalog.get(dataset_names[0]) + class_freq_meta = meta.class_image_count + class_freq = torch.tensor( + [c["image_count"] for c in sorted(class_freq_meta, key=lambda x: x["id"])] + ) + class_freq_weight = class_freq.float() ** freq_weight_power + return class_freq_weight + + +def gen_crop_transform_with_instance(crop_size, image_size, instance): + """ + Generate a CropTransform so that the cropping region contains + the center of the given instance. + + Args: + crop_size (tuple): h, w in pixels + image_size (tuple): h, w + instance (dict): an annotation dict of one instance, in Detectron2's + dataset format. + """ + crop_size = np.asarray(crop_size, dtype=np.int32) + bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) + center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 + assert ( + image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] + ), "The annotation bounding box is outside of the image!" + assert ( + image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] + ), "Crop size is larger than image size!" + + min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) + max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) + max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) + + y0 = np.random.randint(min_yx[0], max_yx[0] + 1) + x0 = np.random.randint(min_yx[1], max_yx[1] + 1) + return T.CropTransform(x0, y0, crop_size[1], crop_size[0]) + + +def check_metadata_consistency(key, dataset_names): + """ + Check that the datasets have consistent metadata. + + Args: + key (str): a metadata key + dataset_names (list[str]): a list of dataset names + + Raises: + AttributeError: if the key does not exist in the metadata + ValueError: if the given datasets do not have the same metadata values defined by key + """ + if len(dataset_names) == 0: + return + logger = logging.getLogger(__name__) + entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names] + for idx, entry in enumerate(entries_per_dataset): + if entry != entries_per_dataset[0]: + logger.error( + "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry)) + ) + logger.error( + "Metadata '{}' for dataset '{}' is '{}'".format( + key, dataset_names[0], str(entries_per_dataset[0]) + ) + ) + raise ValueError("Datasets have different metadata '{}'!".format(key)) + + +def build_augmentation(cfg, is_train): + """ + Create a list of default :class:`Augmentation` from config. + Now it includes resizing and flipping. + + Returns: + list[Augmentation] + """ + if is_train: + min_size = cfg.INPUT.MIN_SIZE_TRAIN + max_size = cfg.INPUT.MAX_SIZE_TRAIN + sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING + else: + min_size = cfg.INPUT.MIN_SIZE_TEST + max_size = cfg.INPUT.MAX_SIZE_TEST + sample_style = "choice" + augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)] + if is_train and cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + return augmentation + + +build_transform_gen = build_augmentation +""" +Alias for backward-compatibility. +""" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..85c9f1a9df8a4038fbd4246239b699402e382309 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .distributed_sampler import ( + InferenceSampler, + RandomSubsetTrainingSampler, + RepeatFactorTrainingSampler, + TrainingSampler, +) + +from .grouped_batch_sampler import GroupedBatchSampler + +__all__ = [ + "GroupedBatchSampler", + "TrainingSampler", + "RandomSubsetTrainingSampler", + "InferenceSampler", + "RepeatFactorTrainingSampler", +] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/distributed_sampler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/distributed_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..cd4724eac8fbff2456bd26f95e6fea5e914b73e2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/distributed_sampler.py @@ -0,0 +1,278 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import logging +import math +from collections import defaultdict +from typing import Optional +import torch +from torch.utils.data.sampler import Sampler + +from annotator.oneformer.detectron2.utils import comm + +logger = logging.getLogger(__name__) + + +class TrainingSampler(Sampler): + """ + In training, we only care about the "infinite stream" of training data. + So this sampler produces an infinite stream of indices and + all workers cooperate to correctly shuffle the indices and sample different indices. + + The samplers in each worker effectively produces `indices[worker_id::num_workers]` + where `indices` is an infinite stream of indices consisting of + `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) + or `range(size) + range(size) + ...` (if shuffle is False) + + Note that this sampler does not shard based on pytorch DataLoader worker id. + A sampler passed to pytorch DataLoader is used only with map-style dataset + and will not be executed inside workers. + But if this sampler is used in a way that it gets execute inside a dataloader + worker, then extra work needs to be done to shard its outputs based on worker id. + This is required so that workers don't produce identical data. + :class:`ToIterableDataset` implements this logic. + This note is true for all samplers in detectron2. + """ + + def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + if not isinstance(size, int): + raise TypeError(f"TrainingSampler(size=) expects an int. Got type {type(size)}.") + if size <= 0: + raise ValueError(f"TrainingSampler(size=) expects a positive int. Got {size}.") + self._size = size + self._shuffle = shuffle + if seed is None: + seed = comm.shared_random_seed() + self._seed = int(seed) + + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + def __iter__(self): + start = self._rank + yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + if self._shuffle: + yield from torch.randperm(self._size, generator=g).tolist() + else: + yield from torch.arange(self._size).tolist() + + +class RandomSubsetTrainingSampler(TrainingSampler): + """ + Similar to TrainingSampler, but only sample a random subset of indices. + This is useful when you want to estimate the accuracy vs data-number curves by + training the model with different subset_ratio. + """ + + def __init__( + self, + size: int, + subset_ratio: float, + shuffle: bool = True, + seed_shuffle: Optional[int] = None, + seed_subset: Optional[int] = None, + ): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + subset_ratio (float): the ratio of subset data to sample from the underlying dataset + shuffle (bool): whether to shuffle the indices or not + seed_shuffle (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + seed_subset (int): the seed to randomize the subset to be sampled. + Must be the same across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + super().__init__(size=size, shuffle=shuffle, seed=seed_shuffle) + + assert 0.0 < subset_ratio <= 1.0 + self._size_subset = int(size * subset_ratio) + assert self._size_subset > 0 + if seed_subset is None: + seed_subset = comm.shared_random_seed() + self._seed_subset = int(seed_subset) + + # randomly generate the subset indexes to be sampled from + g = torch.Generator() + g.manual_seed(self._seed_subset) + indexes_randperm = torch.randperm(self._size, generator=g) + self._indexes_subset = indexes_randperm[: self._size_subset] + + logger.info("Using RandomSubsetTrainingSampler......") + logger.info(f"Randomly sample {self._size_subset} data from the original {self._size} data") + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) # self._seed equals seed_shuffle from __init__() + while True: + if self._shuffle: + # generate a random permutation to shuffle self._indexes_subset + randperm = torch.randperm(self._size_subset, generator=g) + yield from self._indexes_subset[randperm].tolist() + else: + yield from self._indexes_subset.tolist() + + +class RepeatFactorTrainingSampler(Sampler): + """ + Similar to TrainingSampler, but a sample may appear more times than others based + on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS. + """ + + def __init__(self, repeat_factors, *, shuffle=True, seed=None): + """ + Args: + repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's + full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``. + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + self._shuffle = shuffle + if seed is None: + seed = comm.shared_random_seed() + self._seed = int(seed) + + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + # Split into whole number (_int_part) and fractional (_frac_part) parts. + self._int_part = torch.trunc(repeat_factors) + self._frac_part = repeat_factors - self._int_part + + @staticmethod + def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh): + """ + Compute (fractional) per-image repeat factors based on category frequency. + The repeat factor for an image is a function of the frequency of the rarest + category labeled in that image. The "frequency of category c" in [0, 1] is defined + as the fraction of images in the training set (without repeats) in which category c + appears. + See :paper:`lvis` (>= v2) Appendix B.2. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 dataset format. + repeat_thresh (float): frequency threshold below which data is repeated. + If the frequency is half of `repeat_thresh`, the image will be + repeated twice. + + Returns: + torch.Tensor: + the i-th element is the repeat factor for the dataset image at index i. + """ + # 1. For each category c, compute the fraction of images that contain it: f(c) + category_freq = defaultdict(int) + for dataset_dict in dataset_dicts: # For each image (without repeats) + cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} + for cat_id in cat_ids: + category_freq[cat_id] += 1 + num_images = len(dataset_dicts) + for k, v in category_freq.items(): + category_freq[k] = v / num_images + + # 2. For each category c, compute the category-level repeat factor: + # r(c) = max(1, sqrt(t / f(c))) + category_rep = { + cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq)) + for cat_id, cat_freq in category_freq.items() + } + + # 3. For each image I, compute the image-level repeat factor: + # r(I) = max_{c in I} r(c) + rep_factors = [] + for dataset_dict in dataset_dicts: + cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} + rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0) + rep_factors.append(rep_factor) + + return torch.tensor(rep_factors, dtype=torch.float32) + + def _get_epoch_indices(self, generator): + """ + Create a list of dataset indices (with repeats) to use for one epoch. + + Args: + generator (torch.Generator): pseudo random number generator used for + stochastic rounding. + + Returns: + torch.Tensor: list of dataset indices to use in one epoch. Each index + is repeated based on its calculated repeat factor. + """ + # Since repeat factors are fractional, we use stochastic rounding so + # that the target repeat factor is achieved in expectation over the + # course of training + rands = torch.rand(len(self._frac_part), generator=generator) + rep_factors = self._int_part + (rands < self._frac_part).float() + # Construct a list of indices in which we repeat images as specified + indices = [] + for dataset_index, rep_factor in enumerate(rep_factors): + indices.extend([dataset_index] * int(rep_factor.item())) + return torch.tensor(indices, dtype=torch.int64) + + def __iter__(self): + start = self._rank + yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + # Sample indices with repeats determined by stochastic rounding; each + # "epoch" may have a slightly different size due to the rounding. + indices = self._get_epoch_indices(g) + if self._shuffle: + randperm = torch.randperm(len(indices), generator=g) + yield from indices[randperm].tolist() + else: + yield from indices.tolist() + + +class InferenceSampler(Sampler): + """ + Produce indices for inference across all workers. + Inference needs to run on the __exact__ set of samples, + therefore when the total number of samples is not divisible by the number of workers, + this sampler produces different number of samples on different workers. + """ + + def __init__(self, size: int): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + """ + self._size = size + assert size > 0 + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + self._local_indices = self._get_local_indices(size, self._world_size, self._rank) + + @staticmethod + def _get_local_indices(total_size, world_size, rank): + shard_size = total_size // world_size + left = total_size % world_size + shard_sizes = [shard_size + int(r < left) for r in range(world_size)] + + begin = sum(shard_sizes[:rank]) + end = min(sum(shard_sizes[: rank + 1]), total_size) + return range(begin, end) + + def __iter__(self): + yield from self._local_indices + + def __len__(self): + return len(self._local_indices) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/grouped_batch_sampler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/grouped_batch_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..5b247730aacd04dd0c752664acde3257c4eddd71 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/samplers/grouped_batch_sampler.py @@ -0,0 +1,47 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from torch.utils.data.sampler import BatchSampler, Sampler + + +class GroupedBatchSampler(BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + It enforces that the batch only contain elements from the same group. + It also tries to provide mini-batches which follows an ordering which is + as close as possible to the ordering from the original sampler. + """ + + def __init__(self, sampler, group_ids, batch_size): + """ + Args: + sampler (Sampler): Base sampler. + group_ids (list[int]): If the sampler produces indices in range [0, N), + `group_ids` must be a list of `N` ints which contains the group id of each sample. + The group ids must be a set of integers in the range [0, num_groups). + batch_size (int): Size of mini-batch. + """ + if not isinstance(sampler, Sampler): + raise ValueError( + "sampler should be an instance of " + "torch.utils.data.Sampler, but got sampler={}".format(sampler) + ) + self.sampler = sampler + self.group_ids = np.asarray(group_ids) + assert self.group_ids.ndim == 1 + self.batch_size = batch_size + groups = np.unique(self.group_ids).tolist() + + # buffer the indices of each group until batch size is reached + self.buffer_per_group = {k: [] for k in groups} + + def __iter__(self): + for idx in self.sampler: + group_id = self.group_ids[idx] + group_buffer = self.buffer_per_group[group_id] + group_buffer.append(idx) + if len(group_buffer) == self.batch_size: + yield group_buffer[:] # yield a copy of the list + del group_buffer[:] + + def __len__(self): + raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e91c6cdfacd6992a7a1e80c7d2e4b38b2cf7dcde --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from fvcore.transforms.transform import Transform, TransformList # order them first +from fvcore.transforms.transform import * +from .transform import * +from .augmentation import * +from .augmentation_impl import * + +__all__ = [k for k in globals().keys() if not k.startswith("_")] + + +from annotator.oneformer.detectron2.utils.env import fixup_module_metadata + +fixup_module_metadata(__name__, globals(), __all__) +del fixup_module_metadata diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..63dd41aef658c9b51c7246880399405a029c5580 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import inspect +import numpy as np +import pprint +from typing import Any, List, Optional, Tuple, Union +from fvcore.transforms.transform import Transform, TransformList + +""" +See "Data Augmentation" tutorial for an overview of the system: +https://detectron2.readthedocs.io/tutorials/augmentation.html +""" + + +__all__ = [ + "Augmentation", + "AugmentationList", + "AugInput", + "TransformGen", + "apply_transform_gens", + "StandardAugInput", + "apply_augmentations", +] + + +def _check_img_dtype(img): + assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format( + type(img) + ) + assert not isinstance(img.dtype, np.integer) or ( + img.dtype == np.uint8 + ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format( + img.dtype + ) + assert img.ndim in [2, 3], img.ndim + + +def _get_aug_input_args(aug, aug_input) -> List[Any]: + """ + Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``. + """ + if aug.input_args is None: + # Decide what attributes are needed automatically + prms = list(inspect.signature(aug.get_transform).parameters.items()) + # The default behavior is: if there is one parameter, then its "image" + # (work automatically for majority of use cases, and also avoid BC breaking), + # Otherwise, use the argument names. + if len(prms) == 1: + names = ("image",) + else: + names = [] + for name, prm in prms: + if prm.kind in ( + inspect.Parameter.VAR_POSITIONAL, + inspect.Parameter.VAR_KEYWORD, + ): + raise TypeError( + f""" \ +The default implementation of `{type(aug)}.__call__` does not allow \ +`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \ +If arguments are unknown, reimplement `__call__` instead. \ +""" + ) + names.append(name) + aug.input_args = tuple(names) + + args = [] + for f in aug.input_args: + try: + args.append(getattr(aug_input, f)) + except AttributeError as e: + raise AttributeError( + f"{type(aug)}.get_transform needs input attribute '{f}', " + f"but it is not an attribute of {type(aug_input)}!" + ) from e + return args + + +class Augmentation: + """ + Augmentation defines (often random) policies/strategies to generate :class:`Transform` + from data. It is often used for pre-processing of input data. + + A "policy" that generates a :class:`Transform` may, in the most general case, + need arbitrary information from input data in order to determine what transforms + to apply. Therefore, each :class:`Augmentation` instance defines the arguments + needed by its :meth:`get_transform` method. When called with the positional arguments, + the :meth:`get_transform` method executes the policy. + + Note that :class:`Augmentation` defines the policies to create a :class:`Transform`, + but not how to execute the actual transform operations to those data. + Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform. + + The returned `Transform` object is meant to describe deterministic transformation, which means + it can be re-applied on associated data, e.g. the geometry of an image and its segmentation + masks need to be transformed together. + (If such re-application is not needed, then determinism is not a crucial requirement.) + """ + + input_args: Optional[Tuple[str]] = None + """ + Stores the attribute names needed by :meth:`get_transform`, e.g. ``("image", "sem_seg")``. + By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only + contain "image". As long as the argument name convention is followed, there is no need for + users to touch this attribute. + """ + + def _init(self, params=None): + if params: + for k, v in params.items(): + if k != "self" and not k.startswith("_"): + setattr(self, k, v) + + def get_transform(self, *args) -> Transform: + """ + Execute the policy based on input data, and decide what transform to apply to inputs. + + Args: + args: Any fixed-length positional arguments. By default, the name of the arguments + should exist in the :class:`AugInput` to be used. + + Returns: + Transform: Returns the deterministic transform to apply to the input. + + Examples: + :: + class MyAug: + # if a policy needs to know both image and semantic segmentation + def get_transform(image, sem_seg) -> T.Transform: + pass + tfm: Transform = MyAug().get_transform(image, sem_seg) + new_image = tfm.apply_image(image) + + Notes: + Users can freely use arbitrary new argument names in custom + :meth:`get_transform` method, as long as they are available in the + input data. In detectron2 we use the following convention: + + * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or + floating point in range [0, 1] or [0, 255]. + * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes + of N instances. Each is in XYXY format in unit of absolute coordinates. + * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel. + + We do not specify convention for other types and do not include builtin + :class:`Augmentation` that uses other types in detectron2. + """ + raise NotImplementedError + + def __call__(self, aug_input) -> Transform: + """ + Augment the given `aug_input` **in-place**, and return the transform that's used. + + This method will be called to apply the augmentation. In most augmentation, it + is enough to use the default implementation, which calls :meth:`get_transform` + using the inputs. But a subclass can overwrite it to have more complicated logic. + + Args: + aug_input (AugInput): an object that has attributes needed by this augmentation + (defined by ``self.get_transform``). Its ``transform`` method will be called + to in-place transform it. + + Returns: + Transform: the transform that is applied on the input. + """ + args = _get_aug_input_args(self, aug_input) + tfm = self.get_transform(*args) + assert isinstance(tfm, (Transform, TransformList)), ( + f"{type(self)}.get_transform must return an instance of Transform! " + f"Got {type(tfm)} instead." + ) + aug_input.transform(tfm) + return tfm + + def _rand_range(self, low=1.0, high=None, size=None): + """ + Uniform float random number between low and high. + """ + if high is None: + low, high = 0, low + if size is None: + size = [] + return np.random.uniform(low, high, size) + + def __repr__(self): + """ + Produce something like: + "MyAugmentation(field1={self.field1}, field2={self.field2})" + """ + try: + sig = inspect.signature(self.__init__) + classname = type(self).__name__ + argstr = [] + for name, param in sig.parameters.items(): + assert ( + param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD + ), "The default __repr__ doesn't support *args or **kwargs" + assert hasattr(self, name), ( + "Attribute {} not found! " + "Default __repr__ only works if attributes match the constructor.".format(name) + ) + attr = getattr(self, name) + default = param.default + if default is attr: + continue + attr_str = pprint.pformat(attr) + if "\n" in attr_str: + # don't show it if pformat decides to use >1 lines + attr_str = "..." + argstr.append("{}={}".format(name, attr_str)) + return "{}({})".format(classname, ", ".join(argstr)) + except AssertionError: + return super().__repr__() + + __str__ = __repr__ + + +class _TransformToAug(Augmentation): + def __init__(self, tfm: Transform): + self.tfm = tfm + + def get_transform(self, *args): + return self.tfm + + def __repr__(self): + return repr(self.tfm) + + __str__ = __repr__ + + +def _transform_to_aug(tfm_or_aug): + """ + Wrap Transform into Augmentation. + Private, used internally to implement augmentations. + """ + assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug + if isinstance(tfm_or_aug, Augmentation): + return tfm_or_aug + else: + return _TransformToAug(tfm_or_aug) + + +class AugmentationList(Augmentation): + """ + Apply a sequence of augmentations. + + It has ``__call__`` method to apply the augmentations. + + Note that :meth:`get_transform` method is impossible (will throw error if called) + for :class:`AugmentationList`, because in order to apply a sequence of augmentations, + the kth augmentation must be applied first, to provide inputs needed by the (k+1)th + augmentation. + """ + + def __init__(self, augs): + """ + Args: + augs (list[Augmentation or Transform]): + """ + super().__init__() + self.augs = [_transform_to_aug(x) for x in augs] + + def __call__(self, aug_input) -> TransformList: + tfms = [] + for x in self.augs: + tfm = x(aug_input) + tfms.append(tfm) + return TransformList(tfms) + + def __repr__(self): + msgs = [str(x) for x in self.augs] + return "AugmentationList[{}]".format(", ".join(msgs)) + + __str__ = __repr__ + + +class AugInput: + """ + Input that can be used with :meth:`Augmentation.__call__`. + This is a standard implementation for the majority of use cases. + This class provides the standard attributes **"image", "boxes", "sem_seg"** + defined in :meth:`__init__` and they may be needed by different augmentations. + Most augmentation policies do not need attributes beyond these three. + + After applying augmentations to these attributes (using :meth:`AugInput.transform`), + the returned transforms can then be used to transform other data structures that users have. + + Examples: + :: + input = AugInput(image, boxes=boxes) + tfms = augmentation(input) + transformed_image = input.image + transformed_boxes = input.boxes + transformed_other_data = tfms.apply_other(other_data) + + An extended project that works with new data types may implement augmentation policies + that need other inputs. An algorithm may need to transform inputs in a way different + from the standard approach defined in this class. In those rare situations, users can + implement a class similar to this class, that satify the following condition: + + * The input must provide access to these data in the form of attribute access + (``getattr``). For example, if an :class:`Augmentation` to be applied needs "image" + and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg". + * The input must have a ``transform(tfm: Transform) -> None`` method which + in-place transforms all its attributes. + """ + + # TODO maybe should support more builtin data types here + def __init__( + self, + image: np.ndarray, + *, + boxes: Optional[np.ndarray] = None, + sem_seg: Optional[np.ndarray] = None, + ): + """ + Args: + image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or + floating point in range [0, 1] or [0, 255]. The meaning of C is up + to users. + boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode + sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element + is an integer label of pixel. + """ + _check_img_dtype(image) + self.image = image + self.boxes = boxes + self.sem_seg = sem_seg + + def transform(self, tfm: Transform) -> None: + """ + In-place transform all attributes of this class. + + By "in-place", it means after calling this method, accessing an attribute such + as ``self.image`` will return transformed data. + """ + self.image = tfm.apply_image(self.image) + if self.boxes is not None: + self.boxes = tfm.apply_box(self.boxes) + if self.sem_seg is not None: + self.sem_seg = tfm.apply_segmentation(self.sem_seg) + + def apply_augmentations( + self, augmentations: List[Union[Augmentation, Transform]] + ) -> TransformList: + """ + Equivalent of ``AugmentationList(augmentations)(self)`` + """ + return AugmentationList(augmentations)(self) + + +def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs): + """ + Use ``T.AugmentationList(augmentations)(inputs)`` instead. + """ + if isinstance(inputs, np.ndarray): + # handle the common case of image-only Augmentation, also for backward compatibility + image_only = True + inputs = AugInput(inputs) + else: + image_only = False + tfms = inputs.apply_augmentations(augmentations) + return inputs.image if image_only else inputs, tfms + + +apply_transform_gens = apply_augmentations +""" +Alias for backward-compatibility. +""" + +TransformGen = Augmentation +""" +Alias for Augmentation, since it is something that generates :class:`Transform`s +""" + +StandardAugInput = AugInput +""" +Alias for compatibility. It's not worth the complexity to have two classes. +""" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation_impl.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation_impl.py new file mode 100644 index 0000000000000000000000000000000000000000..965f0a947d7c3ff03b0990f1a645703d470227de --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/augmentation_impl.py @@ -0,0 +1,736 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Implement many useful :class:`Augmentation`. +""" +import numpy as np +import sys +from numpy import random +from typing import Tuple +import torch +from fvcore.transforms.transform import ( + BlendTransform, + CropTransform, + HFlipTransform, + NoOpTransform, + PadTransform, + Transform, + TransformList, + VFlipTransform, +) +from PIL import Image + +from annotator.oneformer.detectron2.structures import Boxes, pairwise_iou + +from .augmentation import Augmentation, _transform_to_aug +from .transform import ExtentTransform, ResizeTransform, RotationTransform + +__all__ = [ + "FixedSizeCrop", + "RandomApply", + "RandomBrightness", + "RandomContrast", + "RandomCrop", + "RandomExtent", + "RandomFlip", + "RandomSaturation", + "RandomLighting", + "RandomRotation", + "Resize", + "ResizeScale", + "ResizeShortestEdge", + "RandomCrop_CategoryAreaConstraint", + "RandomResize", + "MinIoURandomCrop", +] + + +class RandomApply(Augmentation): + """ + Randomly apply an augmentation with a given probability. + """ + + def __init__(self, tfm_or_aug, prob=0.5): + """ + Args: + tfm_or_aug (Transform, Augmentation): the transform or augmentation + to be applied. It can either be a `Transform` or `Augmentation` + instance. + prob (float): probability between 0.0 and 1.0 that + the wrapper transformation is applied + """ + super().__init__() + self.aug = _transform_to_aug(tfm_or_aug) + assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})" + self.prob = prob + + def get_transform(self, *args): + do = self._rand_range() < self.prob + if do: + return self.aug.get_transform(*args) + else: + return NoOpTransform() + + def __call__(self, aug_input): + do = self._rand_range() < self.prob + if do: + return self.aug(aug_input) + else: + return NoOpTransform() + + +class RandomFlip(Augmentation): + """ + Flip the image horizontally or vertically with the given probability. + """ + + def __init__(self, prob=0.5, *, horizontal=True, vertical=False): + """ + Args: + prob (float): probability of flip. + horizontal (boolean): whether to apply horizontal flipping + vertical (boolean): whether to apply vertical flipping + """ + super().__init__() + + if horizontal and vertical: + raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.") + if not horizontal and not vertical: + raise ValueError("At least one of horiz or vert has to be True!") + self._init(locals()) + + def get_transform(self, image): + h, w = image.shape[:2] + do = self._rand_range() < self.prob + if do: + if self.horizontal: + return HFlipTransform(w) + elif self.vertical: + return VFlipTransform(h) + else: + return NoOpTransform() + + +class Resize(Augmentation): + """Resize image to a fixed target size""" + + def __init__(self, shape, interp=Image.BILINEAR): + """ + Args: + shape: (h, w) tuple or a int + interp: PIL interpolation method + """ + if isinstance(shape, int): + shape = (shape, shape) + shape = tuple(shape) + self._init(locals()) + + def get_transform(self, image): + return ResizeTransform( + image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp + ) + + +class ResizeShortestEdge(Augmentation): + """ + Resize the image while keeping the aspect ratio unchanged. + It attempts to scale the shorter edge to the given `short_edge_length`, + as long as the longer edge does not exceed `max_size`. + If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. + """ + + @torch.jit.unused + def __init__( + self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR + ): + """ + Args: + short_edge_length (list[int]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the shortest edge length. + If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. + max_size (int): maximum allowed longest edge length. + sample_style (str): either "range" or "choice". + """ + super().__init__() + assert sample_style in ["range", "choice"], sample_style + + self.is_range = sample_style == "range" + if isinstance(short_edge_length, int): + short_edge_length = (short_edge_length, short_edge_length) + if self.is_range: + assert len(short_edge_length) == 2, ( + "short_edge_length must be two values using 'range' sample style." + f" Got {short_edge_length}!" + ) + self._init(locals()) + + @torch.jit.unused + def get_transform(self, image): + h, w = image.shape[:2] + if self.is_range: + size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1) + else: + size = np.random.choice(self.short_edge_length) + if size == 0: + return NoOpTransform() + + newh, neww = ResizeShortestEdge.get_output_shape(h, w, size, self.max_size) + return ResizeTransform(h, w, newh, neww, self.interp) + + @staticmethod + def get_output_shape( + oldh: int, oldw: int, short_edge_length: int, max_size: int + ) -> Tuple[int, int]: + """ + Compute the output size given input size and target short edge length. + """ + h, w = oldh, oldw + size = short_edge_length * 1.0 + scale = size / min(h, w) + if h < w: + newh, neww = size, scale * w + else: + newh, neww = scale * h, size + if max(newh, neww) > max_size: + scale = max_size * 1.0 / max(newh, neww) + newh = newh * scale + neww = neww * scale + neww = int(neww + 0.5) + newh = int(newh + 0.5) + return (newh, neww) + + +class ResizeScale(Augmentation): + """ + Takes target size as input and randomly scales the given target size between `min_scale` + and `max_scale`. It then scales the input image such that it fits inside the scaled target + box, keeping the aspect ratio constant. + This implements the resize part of the Google's 'resize_and_crop' data augmentation: + https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/input_utils.py#L127 + """ + + def __init__( + self, + min_scale: float, + max_scale: float, + target_height: int, + target_width: int, + interp: int = Image.BILINEAR, + ): + """ + Args: + min_scale: minimum image scale range. + max_scale: maximum image scale range. + target_height: target image height. + target_width: target image width. + interp: image interpolation method. + """ + super().__init__() + self._init(locals()) + + def _get_resize(self, image: np.ndarray, scale: float) -> Transform: + input_size = image.shape[:2] + + # Compute new target size given a scale. + target_size = (self.target_height, self.target_width) + target_scale_size = np.multiply(target_size, scale) + + # Compute actual rescaling applied to input image and output size. + output_scale = np.minimum( + target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1] + ) + output_size = np.round(np.multiply(input_size, output_scale)).astype(int) + + return ResizeTransform( + input_size[0], input_size[1], output_size[0], output_size[1], self.interp + ) + + def get_transform(self, image: np.ndarray) -> Transform: + random_scale = np.random.uniform(self.min_scale, self.max_scale) + return self._get_resize(image, random_scale) + + +class RandomRotation(Augmentation): + """ + This method returns a copy of this image, rotated the given + number of degrees counter clockwise around the given center. + """ + + def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None): + """ + Args: + angle (list[float]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the angle (in degrees). + If ``sample_style=="choice"``, a list of angles to sample from + expand (bool): choose if the image should be resized to fit the whole + rotated image (default), or simply cropped + center (list[[float, float]]): If ``sample_style=="range"``, + a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center, + [0, 0] being the top left of the image and [1, 1] the bottom right. + If ``sample_style=="choice"``, a list of centers to sample from + Default: None, which means that the center of rotation is the center of the image + center has no effect if expand=True because it only affects shifting + """ + super().__init__() + assert sample_style in ["range", "choice"], sample_style + self.is_range = sample_style == "range" + if isinstance(angle, (float, int)): + angle = (angle, angle) + if center is not None and isinstance(center[0], (float, int)): + center = (center, center) + self._init(locals()) + + def get_transform(self, image): + h, w = image.shape[:2] + center = None + if self.is_range: + angle = np.random.uniform(self.angle[0], self.angle[1]) + if self.center is not None: + center = ( + np.random.uniform(self.center[0][0], self.center[1][0]), + np.random.uniform(self.center[0][1], self.center[1][1]), + ) + else: + angle = np.random.choice(self.angle) + if self.center is not None: + center = np.random.choice(self.center) + + if center is not None: + center = (w * center[0], h * center[1]) # Convert to absolute coordinates + + if angle % 360 == 0: + return NoOpTransform() + + return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp) + + +class FixedSizeCrop(Augmentation): + """ + If `crop_size` is smaller than the input image size, then it uses a random crop of + the crop size. If `crop_size` is larger than the input image size, then it pads + the right and the bottom of the image to the crop size if `pad` is True, otherwise + it returns the smaller image. + """ + + def __init__( + self, + crop_size: Tuple[int], + pad: bool = True, + pad_value: float = 128.0, + seg_pad_value: int = 255, + ): + """ + Args: + crop_size: target image (height, width). + pad: if True, will pad images smaller than `crop_size` up to `crop_size` + pad_value: the padding value to the image. + seg_pad_value: the padding value to the segmentation mask. + """ + super().__init__() + self._init(locals()) + + def _get_crop(self, image: np.ndarray) -> Transform: + # Compute the image scale and scaled size. + input_size = image.shape[:2] + output_size = self.crop_size + + # Add random crop if the image is scaled up. + max_offset = np.subtract(input_size, output_size) + max_offset = np.maximum(max_offset, 0) + offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0)) + offset = np.round(offset).astype(int) + return CropTransform( + offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0] + ) + + def _get_pad(self, image: np.ndarray) -> Transform: + # Compute the image scale and scaled size. + input_size = image.shape[:2] + output_size = self.crop_size + + # Add padding if the image is scaled down. + pad_size = np.subtract(output_size, input_size) + pad_size = np.maximum(pad_size, 0) + original_size = np.minimum(input_size, output_size) + return PadTransform( + 0, + 0, + pad_size[1], + pad_size[0], + original_size[1], + original_size[0], + self.pad_value, + self.seg_pad_value, + ) + + def get_transform(self, image: np.ndarray) -> TransformList: + transforms = [self._get_crop(image)] + if self.pad: + transforms.append(self._get_pad(image)) + return TransformList(transforms) + + +class RandomCrop(Augmentation): + """ + Randomly crop a rectangle region out of an image. + """ + + def __init__(self, crop_type: str, crop_size): + """ + Args: + crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range". + crop_size (tuple[float, float]): two floats, explained below. + + - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of + size (H, W). crop size should be in (0, 1] + - "relative_range": uniformly sample two values from [crop_size[0], 1] + and [crop_size[1]], 1], and use them as in "relative" crop type. + - "absolute" crop a (crop_size[0], crop_size[1]) region from input image. + crop_size must be smaller than the input image size. + - "absolute_range", for an input of size (H, W), uniformly sample H_crop in + [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])]. + Then crop a region (H_crop, W_crop). + """ + # TODO style of relative_range and absolute_range are not consistent: + # one takes (h, w) but another takes (min, max) + super().__init__() + assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"] + self._init(locals()) + + def get_transform(self, image): + h, w = image.shape[:2] + croph, cropw = self.get_crop_size((h, w)) + assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self) + h0 = np.random.randint(h - croph + 1) + w0 = np.random.randint(w - cropw + 1) + return CropTransform(w0, h0, cropw, croph) + + def get_crop_size(self, image_size): + """ + Args: + image_size (tuple): height, width + + Returns: + crop_size (tuple): height, width in absolute pixels + """ + h, w = image_size + if self.crop_type == "relative": + ch, cw = self.crop_size + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "relative_range": + crop_size = np.asarray(self.crop_size, dtype=np.float32) + ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "absolute": + return (min(self.crop_size[0], h), min(self.crop_size[1], w)) + elif self.crop_type == "absolute_range": + assert self.crop_size[0] <= self.crop_size[1] + ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1) + cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1) + return ch, cw + else: + raise NotImplementedError("Unknown crop type {}".format(self.crop_type)) + + +class RandomCrop_CategoryAreaConstraint(Augmentation): + """ + Similar to :class:`RandomCrop`, but find a cropping window such that no single category + occupies a ratio of more than `single_category_max_area` in semantic segmentation ground + truth, which can cause unstability in training. The function attempts to find such a valid + cropping window for at most 10 times. + """ + + def __init__( + self, + crop_type: str, + crop_size, + single_category_max_area: float = 1.0, + ignored_category: int = None, + ): + """ + Args: + crop_type, crop_size: same as in :class:`RandomCrop` + single_category_max_area: the maximum allowed area ratio of a + category. Set to 1.0 to disable + ignored_category: allow this category in the semantic segmentation + ground truth to exceed the area ratio. Usually set to the category + that's ignored in training. + """ + self.crop_aug = RandomCrop(crop_type, crop_size) + self._init(locals()) + + def get_transform(self, image, sem_seg): + if self.single_category_max_area >= 1.0: + return self.crop_aug.get_transform(image) + else: + h, w = sem_seg.shape + for _ in range(10): + crop_size = self.crop_aug.get_crop_size((h, w)) + y0 = np.random.randint(h - crop_size[0] + 1) + x0 = np.random.randint(w - crop_size[1] + 1) + sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]] + labels, cnt = np.unique(sem_seg_temp, return_counts=True) + if self.ignored_category is not None: + cnt = cnt[labels != self.ignored_category] + if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area: + break + crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0]) + return crop_tfm + + +class RandomExtent(Augmentation): + """ + Outputs an image by cropping a random "subrect" of the source image. + + The subrect can be parameterized to include pixels outside the source image, + in which case they will be set to zeros (i.e. black). The size of the output + image will vary with the size of the random subrect. + """ + + def __init__(self, scale_range, shift_range): + """ + Args: + output_size (h, w): Dimensions of output image + scale_range (l, h): Range of input-to-output size scaling factor + shift_range (x, y): Range of shifts of the cropped subrect. The rect + is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], + where (w, h) is the (width, height) of the input image. Set each + component to zero to crop at the image's center. + """ + super().__init__() + self._init(locals()) + + def get_transform(self, image): + img_h, img_w = image.shape[:2] + + # Initialize src_rect to fit the input image. + src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) + + # Apply a random scaling to the src_rect. + src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) + + # Apply a random shift to the coordinates origin. + src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) + src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) + + # Map src_rect coordinates into image coordinates (center at corner). + src_rect[0::2] += 0.5 * img_w + src_rect[1::2] += 0.5 * img_h + + return ExtentTransform( + src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]), + output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])), + ) + + +class RandomContrast(Augmentation): + """ + Randomly transforms image contrast. + + Contrast intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce contrast + - intensity = 1 will preserve the input image + - intensity > 1 will increase contrast + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation + intensity_max (float): Maximum augmentation + """ + super().__init__() + self._init(locals()) + + def get_transform(self, image): + w = np.random.uniform(self.intensity_min, self.intensity_max) + return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w) + + +class RandomBrightness(Augmentation): + """ + Randomly transforms image brightness. + + Brightness intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce brightness + - intensity = 1 will preserve the input image + - intensity > 1 will increase brightness + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation + intensity_max (float): Maximum augmentation + """ + super().__init__() + self._init(locals()) + + def get_transform(self, image): + w = np.random.uniform(self.intensity_min, self.intensity_max) + return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w) + + +class RandomSaturation(Augmentation): + """ + Randomly transforms saturation of an RGB image. + Input images are assumed to have 'RGB' channel order. + + Saturation intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce saturation (make the image more grayscale) + - intensity = 1 will preserve the input image + - intensity > 1 will increase saturation + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation (1 preserves input). + intensity_max (float): Maximum augmentation (1 preserves input). + """ + super().__init__() + self._init(locals()) + + def get_transform(self, image): + assert image.shape[-1] == 3, "RandomSaturation only works on RGB images" + w = np.random.uniform(self.intensity_min, self.intensity_max) + grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] + return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w) + + +class RandomLighting(Augmentation): + """ + The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet. + Input images are assumed to have 'RGB' channel order. + + The degree of color jittering is randomly sampled via a normal distribution, + with standard deviation given by the scale parameter. + """ + + def __init__(self, scale): + """ + Args: + scale (float): Standard deviation of principal component weighting. + """ + super().__init__() + self._init(locals()) + self.eigen_vecs = np.array( + [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]] + ) + self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) + + def get_transform(self, image): + assert image.shape[-1] == 3, "RandomLighting only works on RGB images" + weights = np.random.normal(scale=self.scale, size=3) + return BlendTransform( + src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0 + ) + + +class RandomResize(Augmentation): + """Randomly resize image to a target size in shape_list""" + + def __init__(self, shape_list, interp=Image.BILINEAR): + """ + Args: + shape_list: a list of shapes in (h, w) + interp: PIL interpolation method + """ + self.shape_list = shape_list + self._init(locals()) + + def get_transform(self, image): + shape_idx = np.random.randint(low=0, high=len(self.shape_list)) + h, w = self.shape_list[shape_idx] + return ResizeTransform(image.shape[0], image.shape[1], h, w, self.interp) + + +class MinIoURandomCrop(Augmentation): + """Random crop the image & bboxes, the cropped patches have minimum IoU + requirement with original image & bboxes, the IoU threshold is randomly + selected from min_ious. + + Args: + min_ious (tuple): minimum IoU threshold for all intersections with + bounding boxes + min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w, + where a >= min_crop_size) + mode_trials: number of trials for sampling min_ious threshold + crop_trials: number of trials for sampling crop_size after cropping + """ + + def __init__( + self, + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3, + mode_trials=1000, + crop_trials=50, + ): + self.min_ious = min_ious + self.sample_mode = (1, *min_ious, 0) + self.min_crop_size = min_crop_size + self.mode_trials = mode_trials + self.crop_trials = crop_trials + + def get_transform(self, image, boxes): + """Call function to crop images and bounding boxes with minimum IoU + constraint. + + Args: + boxes: ground truth boxes in (x1, y1, x2, y2) format + """ + if boxes is None: + return NoOpTransform() + h, w, c = image.shape + for _ in range(self.mode_trials): + mode = random.choice(self.sample_mode) + self.mode = mode + if mode == 1: + return NoOpTransform() + + min_iou = mode + for _ in range(self.crop_trials): + new_w = random.uniform(self.min_crop_size * w, w) + new_h = random.uniform(self.min_crop_size * h, h) + + # h / w in [0.5, 2] + if new_h / new_w < 0.5 or new_h / new_w > 2: + continue + + left = random.uniform(w - new_w) + top = random.uniform(h - new_h) + + patch = np.array((int(left), int(top), int(left + new_w), int(top + new_h))) + # Line or point crop is not allowed + if patch[2] == patch[0] or patch[3] == patch[1]: + continue + overlaps = pairwise_iou( + Boxes(patch.reshape(-1, 4)), Boxes(boxes.reshape(-1, 4)) + ).reshape(-1) + if len(overlaps) > 0 and overlaps.min() < min_iou: + continue + + # center of boxes should inside the crop img + # only adjust boxes and instance masks when the gt is not empty + if len(overlaps) > 0: + # adjust boxes + def is_center_of_bboxes_in_patch(boxes, patch): + center = (boxes[:, :2] + boxes[:, 2:]) / 2 + mask = ( + (center[:, 0] > patch[0]) + * (center[:, 1] > patch[1]) + * (center[:, 0] < patch[2]) + * (center[:, 1] < patch[3]) + ) + return mask + + mask = is_center_of_bboxes_in_patch(boxes, patch) + if not mask.any(): + continue + return CropTransform(int(left), int(top), int(new_w), int(new_h)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/transform.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..de44b991d7ab0d920ffb769e1402f08e358d37f7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/data/transforms/transform.py @@ -0,0 +1,351 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +See "Data Augmentation" tutorial for an overview of the system: +https://detectron2.readthedocs.io/tutorials/augmentation.html +""" + +import numpy as np +import torch +import torch.nn.functional as F +from fvcore.transforms.transform import ( + CropTransform, + HFlipTransform, + NoOpTransform, + Transform, + TransformList, +) +from PIL import Image + +try: + import cv2 # noqa +except ImportError: + # OpenCV is an optional dependency at the moment + pass + +__all__ = [ + "ExtentTransform", + "ResizeTransform", + "RotationTransform", + "ColorTransform", + "PILColorTransform", +] + + +class ExtentTransform(Transform): + """ + Extracts a subregion from the source image and scales it to the output size. + + The fill color is used to map pixels from the source rect that fall outside + the source image. + + See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform + """ + + def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): + """ + Args: + src_rect (x0, y0, x1, y1): src coordinates + output_size (h, w): dst image size + interp: PIL interpolation methods + fill: Fill color used when src_rect extends outside image + """ + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img, interp=None): + h, w = self.output_size + if len(img.shape) > 2 and img.shape[2] == 1: + pil_image = Image.fromarray(img[:, :, 0], mode="L") + else: + pil_image = Image.fromarray(img) + pil_image = pil_image.transform( + size=(w, h), + method=Image.EXTENT, + data=self.src_rect, + resample=interp if interp else self.interp, + fill=self.fill, + ) + ret = np.asarray(pil_image) + if len(img.shape) > 2 and img.shape[2] == 1: + ret = np.expand_dims(ret, -1) + return ret + + def apply_coords(self, coords): + # Transform image center from source coordinates into output coordinates + # and then map the new origin to the corner of the output image. + h, w = self.output_size + x0, y0, x1, y1 = self.src_rect + new_coords = coords.astype(np.float32) + new_coords[:, 0] -= 0.5 * (x0 + x1) + new_coords[:, 1] -= 0.5 * (y0 + y1) + new_coords[:, 0] *= w / (x1 - x0) + new_coords[:, 1] *= h / (y1 - y0) + new_coords[:, 0] += 0.5 * w + new_coords[:, 1] += 0.5 * h + return new_coords + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=Image.NEAREST) + return segmentation + + +class ResizeTransform(Transform): + """ + Resize the image to a target size. + """ + + def __init__(self, h, w, new_h, new_w, interp=None): + """ + Args: + h, w (int): original image size + new_h, new_w (int): new image size + interp: PIL interpolation methods, defaults to bilinear. + """ + # TODO decide on PIL vs opencv + super().__init__() + if interp is None: + interp = Image.BILINEAR + self._set_attributes(locals()) + + def apply_image(self, img, interp=None): + assert img.shape[:2] == (self.h, self.w) + assert len(img.shape) <= 4 + interp_method = interp if interp is not None else self.interp + + if img.dtype == np.uint8: + if len(img.shape) > 2 and img.shape[2] == 1: + pil_image = Image.fromarray(img[:, :, 0], mode="L") + else: + pil_image = Image.fromarray(img) + pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) + ret = np.asarray(pil_image) + if len(img.shape) > 2 and img.shape[2] == 1: + ret = np.expand_dims(ret, -1) + else: + # PIL only supports uint8 + if any(x < 0 for x in img.strides): + img = np.ascontiguousarray(img) + img = torch.from_numpy(img) + shape = list(img.shape) + shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] + img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw + _PIL_RESIZE_TO_INTERPOLATE_MODE = { + Image.NEAREST: "nearest", + Image.BILINEAR: "bilinear", + Image.BICUBIC: "bicubic", + } + mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method] + align_corners = None if mode == "nearest" else False + img = F.interpolate( + img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners + ) + shape[:2] = (self.new_h, self.new_w) + ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c) + + return ret + + def apply_coords(self, coords): + coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) + coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) + return coords + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=Image.NEAREST) + return segmentation + + def inverse(self): + return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) + + +class RotationTransform(Transform): + """ + This method returns a copy of this image, rotated the given + number of degrees counter clockwise around its center. + """ + + def __init__(self, h, w, angle, expand=True, center=None, interp=None): + """ + Args: + h, w (int): original image size + angle (float): degrees for rotation + expand (bool): choose if the image should be resized to fit the whole + rotated image (default), or simply cropped + center (tuple (width, height)): coordinates of the rotation center + if left to None, the center will be fit to the center of each image + center has no effect if expand=True because it only affects shifting + interp: cv2 interpolation method, default cv2.INTER_LINEAR + """ + super().__init__() + image_center = np.array((w / 2, h / 2)) + if center is None: + center = image_center + if interp is None: + interp = cv2.INTER_LINEAR + abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) + if expand: + # find the new width and height bounds + bound_w, bound_h = np.rint( + [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] + ).astype(int) + else: + bound_w, bound_h = w, h + + self._set_attributes(locals()) + self.rm_coords = self.create_rotation_matrix() + # Needed because of this problem https://github.com/opencv/opencv/issues/11784 + self.rm_image = self.create_rotation_matrix(offset=-0.5) + + def apply_image(self, img, interp=None): + """ + img should be a numpy array, formatted as Height * Width * Nchannels + """ + if len(img) == 0 or self.angle % 360 == 0: + return img + assert img.shape[:2] == (self.h, self.w) + interp = interp if interp is not None else self.interp + return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) + + def apply_coords(self, coords): + """ + coords should be a N * 2 array-like, containing N couples of (x, y) points + """ + coords = np.asarray(coords, dtype=float) + if len(coords) == 0 or self.angle % 360 == 0: + return coords + return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) + return segmentation + + def create_rotation_matrix(self, offset=0): + center = (self.center[0] + offset, self.center[1] + offset) + rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) + if self.expand: + # Find the coordinates of the center of rotation in the new image + # The only point for which we know the future coordinates is the center of the image + rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] + new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center + # shift the rotation center to the new coordinates + rm[:, 2] += new_center + return rm + + def inverse(self): + """ + The inverse is to rotate it back with expand, and crop to get the original shape. + """ + if not self.expand: # Not possible to inverse if a part of the image is lost + raise NotImplementedError() + rotation = RotationTransform( + self.bound_h, self.bound_w, -self.angle, True, None, self.interp + ) + crop = CropTransform( + (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h + ) + return TransformList([rotation, crop]) + + +class ColorTransform(Transform): + """ + Generic wrapper for any photometric transforms. + These transformations should only affect the color space and + not the coordinate space of the image (e.g. annotation + coordinates such as bounding boxes should not be changed) + """ + + def __init__(self, op): + """ + Args: + op (Callable): operation to be applied to the image, + which takes in an ndarray and returns an ndarray. + """ + if not callable(op): + raise ValueError("op parameter should be callable") + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img): + return self.op(img) + + def apply_coords(self, coords): + return coords + + def inverse(self): + return NoOpTransform() + + def apply_segmentation(self, segmentation): + return segmentation + + +class PILColorTransform(ColorTransform): + """ + Generic wrapper for PIL Photometric image transforms, + which affect the color space and not the coordinate + space of the image + """ + + def __init__(self, op): + """ + Args: + op (Callable): operation to be applied to the image, + which takes in a PIL Image and returns a transformed + PIL Image. + For reference on possible operations see: + - https://pillow.readthedocs.io/en/stable/ + """ + if not callable(op): + raise ValueError("op parameter should be callable") + super().__init__(op) + + def apply_image(self, img): + img = Image.fromarray(img) + return np.asarray(super().apply_image(img)) + + +def HFlip_rotated_box(transform, rotated_boxes): + """ + Apply the horizontal flip transform on rotated boxes. + + Args: + rotated_boxes (ndarray): Nx5 floating point array of + (x_center, y_center, width, height, angle_degrees) format + in absolute coordinates. + """ + # Transform x_center + rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] + # Transform angle + rotated_boxes[:, 4] = -rotated_boxes[:, 4] + return rotated_boxes + + +def Resize_rotated_box(transform, rotated_boxes): + """ + Apply the resizing transform on rotated boxes. For details of how these (approximation) + formulas are derived, please refer to :meth:`RotatedBoxes.scale`. + + Args: + rotated_boxes (ndarray): Nx5 floating point array of + (x_center, y_center, width, height, angle_degrees) format + in absolute coordinates. + """ + scale_factor_x = transform.new_w * 1.0 / transform.w + scale_factor_y = transform.new_h * 1.0 / transform.h + rotated_boxes[:, 0] *= scale_factor_x + rotated_boxes[:, 1] *= scale_factor_y + theta = rotated_boxes[:, 4] * np.pi / 180.0 + c = np.cos(theta) + s = np.sin(theta) + rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) + rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) + rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi + + return rotated_boxes + + +HFlipTransform.register_type("rotated_box", HFlip_rotated_box) +ResizeTransform.register_type("rotated_box", Resize_rotated_box) + +# not necessary any more with latest fvcore +NoOpTransform.register_type("rotated_box", lambda t, x: x) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08a61572b4c7d09c8d400e903a96cbf5b2cc4763 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +from .launch import * +from .train_loop import * + +__all__ = [k for k in globals().keys() if not k.startswith("_")] + + +# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) +# but still make them available here +from .hooks import * +from .defaults import * diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/defaults.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..51d49148ca7b048402a63490bf7df83a43c65d9f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/defaults.py @@ -0,0 +1,715 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +This file contains components with some default boilerplate logic user may need +in training / testing. They will not work for everyone, but many users may find them useful. + +The behavior of functions/classes in this file is subject to change, +since they are meant to represent the "common default behavior" people need in their projects. +""" + +import argparse +import logging +import os +import sys +import weakref +from collections import OrderedDict +from typing import Optional +import torch +from fvcore.nn.precise_bn import get_bn_modules +from omegaconf import OmegaConf +from torch.nn.parallel import DistributedDataParallel + +import annotator.oneformer.detectron2.data.transforms as T +from annotator.oneformer.detectron2.checkpoint import DetectionCheckpointer +from annotator.oneformer.detectron2.config import CfgNode, LazyConfig +from annotator.oneformer.detectron2.data import ( + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from annotator.oneformer.detectron2.evaluation import ( + DatasetEvaluator, + inference_on_dataset, + print_csv_format, + verify_results, +) +from annotator.oneformer.detectron2.modeling import build_model +from annotator.oneformer.detectron2.solver import build_lr_scheduler, build_optimizer +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.collect_env import collect_env_info +from annotator.oneformer.detectron2.utils.env import seed_all_rng +from annotator.oneformer.detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import setup_logger + +from . import hooks +from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase + +__all__ = [ + "create_ddp_model", + "default_argument_parser", + "default_setup", + "default_writers", + "DefaultPredictor", + "DefaultTrainer", +] + + +def create_ddp_model(model, *, fp16_compression=False, **kwargs): + """ + Create a DistributedDataParallel model if there are >1 processes. + + Args: + model: a torch.nn.Module + fp16_compression: add fp16 compression hooks to the ddp object. + See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook + kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. + """ # noqa + if comm.get_world_size() == 1: + return model + if "device_ids" not in kwargs: + kwargs["device_ids"] = [comm.get_local_rank()] + ddp = DistributedDataParallel(model, **kwargs) + if fp16_compression: + from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks + + ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) + return ddp + + +def default_argument_parser(epilog=None): + """ + Create a parser with some common arguments used by detectron2 users. + + Args: + epilog (str): epilog passed to ArgumentParser describing the usage. + + Returns: + argparse.ArgumentParser: + """ + parser = argparse.ArgumentParser( + epilog=epilog + or f""" +Examples: + +Run on single machine: + $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml + +Change some config options: + $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 + +Run on multiple machines: + (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] + (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") + parser.add_argument( + "--resume", + action="store_true", + help="Whether to attempt to resume from the checkpoint directory. " + "See documentation of `DefaultTrainer.resume_or_load()` for what it means.", + ) + parser.add_argument("--eval-only", action="store_true", help="perform evaluation only") + parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*") + parser.add_argument("--num-machines", type=int, default=1, help="total number of machines") + parser.add_argument( + "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)" + ) + + # PyTorch still may leave orphan processes in multi-gpu training. + # Therefore we use a deterministic way to obtain port, + # so that users are aware of orphan processes by seeing the port occupied. + port = 2**15 + 2**14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2**14 + parser.add_argument( + "--dist-url", + default="tcp://127.0.0.1:{}".format(port), + help="initialization URL for pytorch distributed backend. See " + "https://pytorch.org/docs/stable/distributed.html for details.", + ) + parser.add_argument( + "opts", + help=""" +Modify config options at the end of the command. For Yacs configs, use +space-separated "PATH.KEY VALUE" pairs. +For python-based LazyConfig, use "path.key=value". + """.strip(), + default=None, + nargs=argparse.REMAINDER, + ) + return parser + + +def _try_get_key(cfg, *keys, default=None): + """ + Try select keys from cfg until the first key that exists. Otherwise return default. + """ + if isinstance(cfg, CfgNode): + cfg = OmegaConf.create(cfg.dump()) + for k in keys: + none = object() + p = OmegaConf.select(cfg, k, default=none) + if p is not none: + return p + return default + + +def _highlight(code, filename): + try: + import pygments + except ImportError: + return code + + from pygments.lexers import Python3Lexer, YamlLexer + from pygments.formatters import Terminal256Formatter + + lexer = Python3Lexer() if filename.endswith(".py") else YamlLexer() + code = pygments.highlight(code, lexer, Terminal256Formatter(style="monokai")) + return code + + +def default_setup(cfg, args): + """ + Perform some basic common setups at the beginning of a job, including: + + 1. Set up the detectron2 logger + 2. Log basic information about environment, cmdline arguments, and config + 3. Backup the config to the output directory + + Args: + cfg (CfgNode or omegaconf.DictConfig): the full config to be used + args (argparse.NameSpace): the command line arguments to be logged + """ + output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") + if comm.is_main_process() and output_dir: + PathManager.mkdirs(output_dir) + + rank = comm.get_rank() + setup_logger(output_dir, distributed_rank=rank, name="fvcore") + logger = setup_logger(output_dir, distributed_rank=rank) + + logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) + logger.info("Environment info:\n" + collect_env_info()) + + logger.info("Command line arguments: " + str(args)) + if hasattr(args, "config_file") and args.config_file != "": + logger.info( + "Contents of args.config_file={}:\n{}".format( + args.config_file, + _highlight(PathManager.open(args.config_file, "r").read(), args.config_file), + ) + ) + + if comm.is_main_process() and output_dir: + # Note: some of our scripts may expect the existence of + # config.yaml in output directory + path = os.path.join(output_dir, "config.yaml") + if isinstance(cfg, CfgNode): + logger.info("Running with full config:\n{}".format(_highlight(cfg.dump(), ".yaml"))) + with PathManager.open(path, "w") as f: + f.write(cfg.dump()) + else: + LazyConfig.save(cfg, path) + logger.info("Full config saved to {}".format(path)) + + # make sure each worker has a different, yet deterministic seed if specified + seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) + seed_all_rng(None if seed < 0 else seed + rank) + + # cudnn benchmark has large overhead. It shouldn't be used considering the small size of + # typical validation set. + if not (hasattr(args, "eval_only") and args.eval_only): + torch.backends.cudnn.benchmark = _try_get_key( + cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False + ) + + +def default_writers(output_dir: str, max_iter: Optional[int] = None): + """ + Build a list of :class:`EventWriter` to be used. + It now consists of a :class:`CommonMetricPrinter`, + :class:`TensorboardXWriter` and :class:`JSONWriter`. + + Args: + output_dir: directory to store JSON metrics and tensorboard events + max_iter: the total number of iterations + + Returns: + list[EventWriter]: a list of :class:`EventWriter` objects. + """ + PathManager.mkdirs(output_dir) + return [ + # It may not always print what you want to see, since it prints "common" metrics only. + CommonMetricPrinter(max_iter), + JSONWriter(os.path.join(output_dir, "metrics.json")), + TensorboardXWriter(output_dir), + ] + + +class DefaultPredictor: + """ + Create a simple end-to-end predictor with the given config that runs on + single device for a single input image. + + Compared to using the model directly, this class does the following additions: + + 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. + 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. + 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. + 4. Take one input image and produce a single output, instead of a batch. + + This is meant for simple demo purposes, so it does the above steps automatically. + This is not meant for benchmarks or running complicated inference logic. + If you'd like to do anything more complicated, please refer to its source code as + examples to build and use the model manually. + + Attributes: + metadata (Metadata): the metadata of the underlying dataset, obtained from + cfg.DATASETS.TEST. + + Examples: + :: + pred = DefaultPredictor(cfg) + inputs = cv2.imread("input.jpg") + outputs = pred(inputs) + """ + + def __init__(self, cfg): + self.cfg = cfg.clone() # cfg can be modified by model + self.model = build_model(self.cfg) + self.model.eval() + if len(cfg.DATASETS.TEST): + self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) + + checkpointer = DetectionCheckpointer(self.model) + checkpointer.load(cfg.MODEL.WEIGHTS) + + self.aug = T.ResizeShortestEdge( + [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST + ) + + self.input_format = cfg.INPUT.FORMAT + assert self.input_format in ["RGB", "BGR"], self.input_format + + def __call__(self, original_image): + """ + Args: + original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). + + Returns: + predictions (dict): + the output of the model for one image only. + See :doc:`/tutorials/models` for details about the format. + """ + with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258 + # Apply pre-processing to image. + if self.input_format == "RGB": + # whether the model expects BGR inputs or RGB + original_image = original_image[:, :, ::-1] + height, width = original_image.shape[:2] + image = self.aug.get_transform(original_image).apply_image(original_image) + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + inputs = {"image": image, "height": height, "width": width} + predictions = self.model([inputs])[0] + return predictions + + +class DefaultTrainer(TrainerBase): + """ + A trainer with default training logic. It does the following: + + 1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader + defined by the given config. Create a LR scheduler defined by the config. + 2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when + `resume_or_load` is called. + 3. Register a few common hooks defined by the config. + + It is created to simplify the **standard model training workflow** and reduce code boilerplate + for users who only need the standard training workflow, with standard features. + It means this class makes *many assumptions* about your training logic that + may easily become invalid in a new research. In fact, any assumptions beyond those made in the + :class:`SimpleTrainer` are too much for research. + + The code of this class has been annotated about restrictive assumptions it makes. + When they do not work for you, you're encouraged to: + + 1. Overwrite methods of this class, OR: + 2. Use :class:`SimpleTrainer`, which only does minimal SGD training and + nothing else. You can then add your own hooks if needed. OR: + 3. Write your own training loop similar to `tools/plain_train_net.py`. + + See the :doc:`/tutorials/training` tutorials for more details. + + Note that the behavior of this class, like other functions/classes in + this file, is not stable, since it is meant to represent the "common default behavior". + It is only guaranteed to work well with the standard models and training workflow in detectron2. + To obtain more stable behavior, write your own training logic with other public APIs. + + Examples: + :: + trainer = DefaultTrainer(cfg) + trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS + trainer.train() + + Attributes: + scheduler: + checkpointer (DetectionCheckpointer): + cfg (CfgNode): + """ + + def __init__(self, cfg): + """ + Args: + cfg (CfgNode): + """ + super().__init__() + logger = logging.getLogger("detectron2") + if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 + setup_logger() + cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) + + # Assume these objects must be constructed in this order. + model = self.build_model(cfg) + optimizer = self.build_optimizer(cfg, model) + data_loader = self.build_train_loader(cfg) + + model = create_ddp_model(model, broadcast_buffers=False) + self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( + model, data_loader, optimizer + ) + + self.scheduler = self.build_lr_scheduler(cfg, optimizer) + self.checkpointer = DetectionCheckpointer( + # Assume you want to save checkpoints together with logs/statistics + model, + cfg.OUTPUT_DIR, + trainer=weakref.proxy(self), + ) + self.start_iter = 0 + self.max_iter = cfg.SOLVER.MAX_ITER + self.cfg = cfg + + self.register_hooks(self.build_hooks()) + + def resume_or_load(self, resume=True): + """ + If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by + a `last_checkpoint` file), resume from the file. Resuming means loading all + available states (eg. optimizer and scheduler) and update iteration counter + from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used. + + Otherwise, this is considered as an independent training. The method will load model + weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start + from iteration 0. + + Args: + resume (bool): whether to do resume or not + """ + self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume) + if resume and self.checkpointer.has_checkpoint(): + # The checkpoint stores the training iteration that just finished, thus we start + # at the next iteration + self.start_iter = self.iter + 1 + + def build_hooks(self): + """ + Build a list of default hooks, including timing, evaluation, + checkpointing, lr scheduling, precise BN, writing events. + + Returns: + list[HookBase]: + """ + cfg = self.cfg.clone() + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN + + ret = [ + hooks.IterationTimer(), + hooks.LRScheduler(), + hooks.PreciseBN( + # Run at the same freq as (but before) evaluation. + cfg.TEST.EVAL_PERIOD, + self.model, + # Build a new data loader to not affect training + self.build_train_loader(cfg), + cfg.TEST.PRECISE_BN.NUM_ITER, + ) + if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) + else None, + ] + + # Do PreciseBN before checkpointer, because it updates the model and need to + # be saved by checkpointer. + # This is not always the best: if checkpointing has a different frequency, + # some checkpoints may have more precise statistics than others. + if comm.is_main_process(): + ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) + + def test_and_save_results(): + self._last_eval_results = self.test(self.cfg, self.model) + return self._last_eval_results + + # Do evaluation after checkpointer, because then if it fails, + # we can use the saved checkpoint to debug. + ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) + + if comm.is_main_process(): + # Here the default print/log frequency of each writer is used. + # run writers in the end, so that evaluation metrics are written + ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) + return ret + + def build_writers(self): + """ + Build a list of writers to be used using :func:`default_writers()`. + If you'd like a different list of writers, you can overwrite it in + your trainer. + + Returns: + list[EventWriter]: a list of :class:`EventWriter` objects. + """ + return default_writers(self.cfg.OUTPUT_DIR, self.max_iter) + + def train(self): + """ + Run training. + + Returns: + OrderedDict of results, if evaluation is enabled. Otherwise None. + """ + super().train(self.start_iter, self.max_iter) + if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process(): + assert hasattr( + self, "_last_eval_results" + ), "No evaluation results obtained during training!" + verify_results(self.cfg, self._last_eval_results) + return self._last_eval_results + + def run_step(self): + self._trainer.iter = self.iter + self._trainer.run_step() + + def state_dict(self): + ret = super().state_dict() + ret["_trainer"] = self._trainer.state_dict() + return ret + + def load_state_dict(self, state_dict): + super().load_state_dict(state_dict) + self._trainer.load_state_dict(state_dict["_trainer"]) + + @classmethod + def build_model(cls, cfg): + """ + Returns: + torch.nn.Module: + + It now calls :func:`detectron2.modeling.build_model`. + Overwrite it if you'd like a different model. + """ + model = build_model(cfg) + logger = logging.getLogger(__name__) + logger.info("Model:\n{}".format(model)) + return model + + @classmethod + def build_optimizer(cls, cfg, model): + """ + Returns: + torch.optim.Optimizer: + + It now calls :func:`detectron2.solver.build_optimizer`. + Overwrite it if you'd like a different optimizer. + """ + return build_optimizer(cfg, model) + + @classmethod + def build_lr_scheduler(cls, cfg, optimizer): + """ + It now calls :func:`detectron2.solver.build_lr_scheduler`. + Overwrite it if you'd like a different scheduler. + """ + return build_lr_scheduler(cfg, optimizer) + + @classmethod + def build_train_loader(cls, cfg): + """ + Returns: + iterable + + It now calls :func:`detectron2.data.build_detection_train_loader`. + Overwrite it if you'd like a different data loader. + """ + return build_detection_train_loader(cfg) + + @classmethod + def build_test_loader(cls, cfg, dataset_name): + """ + Returns: + iterable + + It now calls :func:`detectron2.data.build_detection_test_loader`. + Overwrite it if you'd like a different data loader. + """ + return build_detection_test_loader(cfg, dataset_name) + + @classmethod + def build_evaluator(cls, cfg, dataset_name): + """ + Returns: + DatasetEvaluator or None + + It is not implemented by default. + """ + raise NotImplementedError( + """ +If you want DefaultTrainer to automatically run evaluation, +please implement `build_evaluator()` in subclasses (see train_net.py for example). +Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example). +""" + ) + + @classmethod + def test(cls, cfg, model, evaluators=None): + """ + Evaluate the given model. The given model is expected to already contain + weights to evaluate. + + Args: + cfg (CfgNode): + model (nn.Module): + evaluators (list[DatasetEvaluator] or None): if None, will call + :meth:`build_evaluator`. Otherwise, must have the same length as + ``cfg.DATASETS.TEST``. + + Returns: + dict: a dict of result metrics + """ + logger = logging.getLogger(__name__) + if isinstance(evaluators, DatasetEvaluator): + evaluators = [evaluators] + if evaluators is not None: + assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( + len(cfg.DATASETS.TEST), len(evaluators) + ) + + results = OrderedDict() + for idx, dataset_name in enumerate(cfg.DATASETS.TEST): + data_loader = cls.build_test_loader(cfg, dataset_name) + # When evaluators are passed in as arguments, + # implicitly assume that evaluators can be created before data_loader. + if evaluators is not None: + evaluator = evaluators[idx] + else: + try: + evaluator = cls.build_evaluator(cfg, dataset_name) + except NotImplementedError: + logger.warn( + "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " + "or implement its `build_evaluator` method." + ) + results[dataset_name] = {} + continue + results_i = inference_on_dataset(model, data_loader, evaluator) + results[dataset_name] = results_i + if comm.is_main_process(): + assert isinstance( + results_i, dict + ), "Evaluator must return a dict on the main process. Got {} instead.".format( + results_i + ) + logger.info("Evaluation results for {} in csv format:".format(dataset_name)) + print_csv_format(results_i) + + if len(results) == 1: + results = list(results.values())[0] + return results + + @staticmethod + def auto_scale_workers(cfg, num_workers: int): + """ + When the config is defined for certain number of workers (according to + ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of + workers currently in use, returns a new cfg where the total batch size + is scaled so that the per-GPU batch size stays the same as the + original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``. + + Other config options are also scaled accordingly: + * training steps and warmup steps are scaled inverse proportionally. + * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`. + + For example, with the original config like the following: + + .. code-block:: yaml + + IMS_PER_BATCH: 16 + BASE_LR: 0.1 + REFERENCE_WORLD_SIZE: 8 + MAX_ITER: 5000 + STEPS: (4000,) + CHECKPOINT_PERIOD: 1000 + + When this config is used on 16 GPUs instead of the reference number 8, + calling this method will return a new config with: + + .. code-block:: yaml + + IMS_PER_BATCH: 32 + BASE_LR: 0.2 + REFERENCE_WORLD_SIZE: 16 + MAX_ITER: 2500 + STEPS: (2000,) + CHECKPOINT_PERIOD: 500 + + Note that both the original config and this new config can be trained on 16 GPUs. + It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``). + + Returns: + CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``. + """ + old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE + if old_world_size == 0 or old_world_size == num_workers: + return cfg + cfg = cfg.clone() + frozen = cfg.is_frozen() + cfg.defrost() + + assert ( + cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0 + ), "Invalid REFERENCE_WORLD_SIZE in config!" + scale = num_workers / old_world_size + bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale)) + lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale + max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale)) + warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale)) + cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS) + cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale)) + cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale)) + cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers # maintain invariant + logger = logging.getLogger(__name__) + logger.info( + f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, " + f"max_iter={max_iter}, warmup={warmup_iter}." + ) + + if frozen: + cfg.freeze() + return cfg + + +# Access basic attributes from the underlying trainer +for _attr in ["model", "data_loader", "optimizer"]: + setattr( + DefaultTrainer, + _attr, + property( + # getter + lambda self, x=_attr: getattr(self._trainer, x), + # setter + lambda self, value, x=_attr: setattr(self._trainer, x, value), + ), + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/hooks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd43ac77068c908bc13263f1697fa2e3332d7c9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/hooks.py @@ -0,0 +1,690 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import datetime +import itertools +import logging +import math +import operator +import os +import tempfile +import time +import warnings +from collections import Counter +import torch +from fvcore.common.checkpoint import Checkpointer +from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer +from fvcore.common.param_scheduler import ParamScheduler +from fvcore.common.timer import Timer +from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.evaluation.testing import flatten_results_dict +from annotator.oneformer.detectron2.solver import LRMultiplier +from annotator.oneformer.detectron2.solver import LRScheduler as _LRScheduler +from annotator.oneformer.detectron2.utils.events import EventStorage, EventWriter +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .train_loop import HookBase + +__all__ = [ + "CallbackHook", + "IterationTimer", + "PeriodicWriter", + "PeriodicCheckpointer", + "BestCheckpointer", + "LRScheduler", + "AutogradProfiler", + "EvalHook", + "PreciseBN", + "TorchProfiler", + "TorchMemoryStats", +] + + +""" +Implement some common hooks. +""" + + +class CallbackHook(HookBase): + """ + Create a hook using callback functions provided by the user. + """ + + def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None): + """ + Each argument is a function that takes one argument: the trainer. + """ + self._before_train = before_train + self._before_step = before_step + self._after_step = after_step + self._after_train = after_train + + def before_train(self): + if self._before_train: + self._before_train(self.trainer) + + def after_train(self): + if self._after_train: + self._after_train(self.trainer) + # The functions may be closures that hold reference to the trainer + # Therefore, delete them to avoid circular reference. + del self._before_train, self._after_train + del self._before_step, self._after_step + + def before_step(self): + if self._before_step: + self._before_step(self.trainer) + + def after_step(self): + if self._after_step: + self._after_step(self.trainer) + + +class IterationTimer(HookBase): + """ + Track the time spent for each iteration (each run_step call in the trainer). + Print a summary in the end of training. + + This hook uses the time between the call to its :meth:`before_step` + and :meth:`after_step` methods. + Under the convention that :meth:`before_step` of all hooks should only + take negligible amount of time, the :class:`IterationTimer` hook should be + placed at the beginning of the list of hooks to obtain accurate timing. + """ + + def __init__(self, warmup_iter=3): + """ + Args: + warmup_iter (int): the number of iterations at the beginning to exclude + from timing. + """ + self._warmup_iter = warmup_iter + self._step_timer = Timer() + self._start_time = time.perf_counter() + self._total_timer = Timer() + + def before_train(self): + self._start_time = time.perf_counter() + self._total_timer.reset() + self._total_timer.pause() + + def after_train(self): + logger = logging.getLogger(__name__) + total_time = time.perf_counter() - self._start_time + total_time_minus_hooks = self._total_timer.seconds() + hook_time = total_time - total_time_minus_hooks + + num_iter = self.trainer.storage.iter + 1 - self.trainer.start_iter - self._warmup_iter + + if num_iter > 0 and total_time_minus_hooks > 0: + # Speed is meaningful only after warmup + # NOTE this format is parsed by grep in some scripts + logger.info( + "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( + num_iter, + str(datetime.timedelta(seconds=int(total_time_minus_hooks))), + total_time_minus_hooks / num_iter, + ) + ) + + logger.info( + "Total training time: {} ({} on hooks)".format( + str(datetime.timedelta(seconds=int(total_time))), + str(datetime.timedelta(seconds=int(hook_time))), + ) + ) + + def before_step(self): + self._step_timer.reset() + self._total_timer.resume() + + def after_step(self): + # +1 because we're in after_step, the current step is done + # but not yet counted + iter_done = self.trainer.storage.iter - self.trainer.start_iter + 1 + if iter_done >= self._warmup_iter: + sec = self._step_timer.seconds() + self.trainer.storage.put_scalars(time=sec) + else: + self._start_time = time.perf_counter() + self._total_timer.reset() + + self._total_timer.pause() + + +class PeriodicWriter(HookBase): + """ + Write events to EventStorage (by calling ``writer.write()``) periodically. + + It is executed every ``period`` iterations and after the last iteration. + Note that ``period`` does not affect how data is smoothed by each writer. + """ + + def __init__(self, writers, period=20): + """ + Args: + writers (list[EventWriter]): a list of EventWriter objects + period (int): + """ + self._writers = writers + for w in writers: + assert isinstance(w, EventWriter), w + self._period = period + + def after_step(self): + if (self.trainer.iter + 1) % self._period == 0 or ( + self.trainer.iter == self.trainer.max_iter - 1 + ): + for writer in self._writers: + writer.write() + + def after_train(self): + for writer in self._writers: + # If any new data is found (e.g. produced by other after_train), + # write them before closing + writer.write() + writer.close() + + +class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase): + """ + Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook. + + Note that when used as a hook, + it is unable to save additional data other than what's defined + by the given `checkpointer`. + + It is executed every ``period`` iterations and after the last iteration. + """ + + def before_train(self): + self.max_iter = self.trainer.max_iter + + def after_step(self): + # No way to use **kwargs + self.step(self.trainer.iter) + + +class BestCheckpointer(HookBase): + """ + Checkpoints best weights based off given metric. + + This hook should be used in conjunction to and executed after the hook + that produces the metric, e.g. `EvalHook`. + """ + + def __init__( + self, + eval_period: int, + checkpointer: Checkpointer, + val_metric: str, + mode: str = "max", + file_prefix: str = "model_best", + ) -> None: + """ + Args: + eval_period (int): the period `EvalHook` is set to run. + checkpointer: the checkpointer object used to save checkpoints. + val_metric (str): validation metric to track for best checkpoint, e.g. "bbox/AP50" + mode (str): one of {'max', 'min'}. controls whether the chosen val metric should be + maximized or minimized, e.g. for "bbox/AP50" it should be "max" + file_prefix (str): the prefix of checkpoint's filename, defaults to "model_best" + """ + self._logger = logging.getLogger(__name__) + self._period = eval_period + self._val_metric = val_metric + assert mode in [ + "max", + "min", + ], f'Mode "{mode}" to `BestCheckpointer` is unknown. It should be one of {"max", "min"}.' + if mode == "max": + self._compare = operator.gt + else: + self._compare = operator.lt + self._checkpointer = checkpointer + self._file_prefix = file_prefix + self.best_metric = None + self.best_iter = None + + def _update_best(self, val, iteration): + if math.isnan(val) or math.isinf(val): + return False + self.best_metric = val + self.best_iter = iteration + return True + + def _best_checking(self): + metric_tuple = self.trainer.storage.latest().get(self._val_metric) + if metric_tuple is None: + self._logger.warning( + f"Given val metric {self._val_metric} does not seem to be computed/stored." + "Will not be checkpointing based on it." + ) + return + else: + latest_metric, metric_iter = metric_tuple + + if self.best_metric is None: + if self._update_best(latest_metric, metric_iter): + additional_state = {"iteration": metric_iter} + self._checkpointer.save(f"{self._file_prefix}", **additional_state) + self._logger.info( + f"Saved first model at {self.best_metric:0.5f} @ {self.best_iter} steps" + ) + elif self._compare(latest_metric, self.best_metric): + additional_state = {"iteration": metric_iter} + self._checkpointer.save(f"{self._file_prefix}", **additional_state) + self._logger.info( + f"Saved best model as latest eval score for {self._val_metric} is " + f"{latest_metric:0.5f}, better than last best score " + f"{self.best_metric:0.5f} @ iteration {self.best_iter}." + ) + self._update_best(latest_metric, metric_iter) + else: + self._logger.info( + f"Not saving as latest eval score for {self._val_metric} is {latest_metric:0.5f}, " + f"not better than best score {self.best_metric:0.5f} @ iteration {self.best_iter}." + ) + + def after_step(self): + # same conditions as `EvalHook` + next_iter = self.trainer.iter + 1 + if ( + self._period > 0 + and next_iter % self._period == 0 + and next_iter != self.trainer.max_iter + ): + self._best_checking() + + def after_train(self): + # same conditions as `EvalHook` + if self.trainer.iter + 1 >= self.trainer.max_iter: + self._best_checking() + + +class LRScheduler(HookBase): + """ + A hook which executes a torch builtin LR scheduler and summarizes the LR. + It is executed after every iteration. + """ + + def __init__(self, optimizer=None, scheduler=None): + """ + Args: + optimizer (torch.optim.Optimizer): + scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler): + if a :class:`ParamScheduler` object, it defines the multiplier over the base LR + in the optimizer. + + If any argument is not given, will try to obtain it from the trainer. + """ + self._optimizer = optimizer + self._scheduler = scheduler + + def before_train(self): + self._optimizer = self._optimizer or self.trainer.optimizer + if isinstance(self.scheduler, ParamScheduler): + self._scheduler = LRMultiplier( + self._optimizer, + self.scheduler, + self.trainer.max_iter, + last_iter=self.trainer.iter - 1, + ) + self._best_param_group_id = LRScheduler.get_best_param_group_id(self._optimizer) + + @staticmethod + def get_best_param_group_id(optimizer): + # NOTE: some heuristics on what LR to summarize + # summarize the param group with most parameters + largest_group = max(len(g["params"]) for g in optimizer.param_groups) + + if largest_group == 1: + # If all groups have one parameter, + # then find the most common initial LR, and use it for summary + lr_count = Counter([g["lr"] for g in optimizer.param_groups]) + lr = lr_count.most_common()[0][0] + for i, g in enumerate(optimizer.param_groups): + if g["lr"] == lr: + return i + else: + for i, g in enumerate(optimizer.param_groups): + if len(g["params"]) == largest_group: + return i + + def after_step(self): + lr = self._optimizer.param_groups[self._best_param_group_id]["lr"] + self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False) + self.scheduler.step() + + @property + def scheduler(self): + return self._scheduler or self.trainer.scheduler + + def state_dict(self): + if isinstance(self.scheduler, _LRScheduler): + return self.scheduler.state_dict() + return {} + + def load_state_dict(self, state_dict): + if isinstance(self.scheduler, _LRScheduler): + logger = logging.getLogger(__name__) + logger.info("Loading scheduler from state_dict ...") + self.scheduler.load_state_dict(state_dict) + + +class TorchProfiler(HookBase): + """ + A hook which runs `torch.profiler.profile`. + + Examples: + :: + hooks.TorchProfiler( + lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR + ) + + The above example will run the profiler for iteration 10~20 and dump + results to ``OUTPUT_DIR``. We did not profile the first few iterations + because they are typically slower than the rest. + The result files can be loaded in the ``chrome://tracing`` page in chrome browser, + and the tensorboard visualizations can be visualized using + ``tensorboard --logdir OUTPUT_DIR/log`` + """ + + def __init__(self, enable_predicate, output_dir, *, activities=None, save_tensorboard=True): + """ + Args: + enable_predicate (callable[trainer -> bool]): a function which takes a trainer, + and returns whether to enable the profiler. + It will be called once every step, and can be used to select which steps to profile. + output_dir (str): the output directory to dump tracing files. + activities (iterable): same as in `torch.profiler.profile`. + save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/ + """ + self._enable_predicate = enable_predicate + self._activities = activities + self._output_dir = output_dir + self._save_tensorboard = save_tensorboard + + def before_step(self): + if self._enable_predicate(self.trainer): + if self._save_tensorboard: + on_trace_ready = torch.profiler.tensorboard_trace_handler( + os.path.join( + self._output_dir, + "log", + "profiler-tensorboard-iter{}".format(self.trainer.iter), + ), + f"worker{comm.get_rank()}", + ) + else: + on_trace_ready = None + self._profiler = torch.profiler.profile( + activities=self._activities, + on_trace_ready=on_trace_ready, + record_shapes=True, + profile_memory=True, + with_stack=True, + with_flops=True, + ) + self._profiler.__enter__() + else: + self._profiler = None + + def after_step(self): + if self._profiler is None: + return + self._profiler.__exit__(None, None, None) + if not self._save_tensorboard: + PathManager.mkdirs(self._output_dir) + out_file = os.path.join( + self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter) + ) + if "://" not in out_file: + self._profiler.export_chrome_trace(out_file) + else: + # Support non-posix filesystems + with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d: + tmp_file = os.path.join(d, "tmp.json") + self._profiler.export_chrome_trace(tmp_file) + with open(tmp_file) as f: + content = f.read() + with PathManager.open(out_file, "w") as f: + f.write(content) + + +class AutogradProfiler(TorchProfiler): + """ + A hook which runs `torch.autograd.profiler.profile`. + + Examples: + :: + hooks.AutogradProfiler( + lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR + ) + + The above example will run the profiler for iteration 10~20 and dump + results to ``OUTPUT_DIR``. We did not profile the first few iterations + because they are typically slower than the rest. + The result files can be loaded in the ``chrome://tracing`` page in chrome browser. + + Note: + When used together with NCCL on older version of GPUs, + autograd profiler may cause deadlock because it unnecessarily allocates + memory on every device it sees. The memory management calls, if + interleaved with NCCL calls, lead to deadlock on GPUs that do not + support ``cudaLaunchCooperativeKernelMultiDevice``. + """ + + def __init__(self, enable_predicate, output_dir, *, use_cuda=True): + """ + Args: + enable_predicate (callable[trainer -> bool]): a function which takes a trainer, + and returns whether to enable the profiler. + It will be called once every step, and can be used to select which steps to profile. + output_dir (str): the output directory to dump tracing files. + use_cuda (bool): same as in `torch.autograd.profiler.profile`. + """ + warnings.warn("AutogradProfiler has been deprecated in favor of TorchProfiler.") + self._enable_predicate = enable_predicate + self._use_cuda = use_cuda + self._output_dir = output_dir + + def before_step(self): + if self._enable_predicate(self.trainer): + self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda) + self._profiler.__enter__() + else: + self._profiler = None + + +class EvalHook(HookBase): + """ + Run an evaluation function periodically, and at the end of training. + + It is executed every ``eval_period`` iterations and after the last iteration. + """ + + def __init__(self, eval_period, eval_function, eval_after_train=True): + """ + Args: + eval_period (int): the period to run `eval_function`. Set to 0 to + not evaluate periodically (but still evaluate after the last iteration + if `eval_after_train` is True). + eval_function (callable): a function which takes no arguments, and + returns a nested dict of evaluation metrics. + eval_after_train (bool): whether to evaluate after the last iteration + + Note: + This hook must be enabled in all or none workers. + If you would like only certain workers to perform evaluation, + give other workers a no-op function (`eval_function=lambda: None`). + """ + self._period = eval_period + self._func = eval_function + self._eval_after_train = eval_after_train + + def _do_eval(self): + results = self._func() + + if results: + assert isinstance( + results, dict + ), "Eval function must return a dict. Got {} instead.".format(results) + + flattened_results = flatten_results_dict(results) + for k, v in flattened_results.items(): + try: + v = float(v) + except Exception as e: + raise ValueError( + "[EvalHook] eval_function should return a nested dict of float. " + "Got '{}: {}' instead.".format(k, v) + ) from e + self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) + + # Evaluation may take different time among workers. + # A barrier make them start the next iteration together. + comm.synchronize() + + def after_step(self): + next_iter = self.trainer.iter + 1 + if self._period > 0 and next_iter % self._period == 0: + # do the last eval in after_train + if next_iter != self.trainer.max_iter: + self._do_eval() + + def after_train(self): + # This condition is to prevent the eval from running after a failed training + if self._eval_after_train and self.trainer.iter + 1 >= self.trainer.max_iter: + self._do_eval() + # func is likely a closure that holds reference to the trainer + # therefore we clean it to avoid circular reference in the end + del self._func + + +class PreciseBN(HookBase): + """ + The standard implementation of BatchNorm uses EMA in inference, which is + sometimes suboptimal. + This class computes the true average of statistics rather than the moving average, + and put true averages to every BN layer in the given model. + + It is executed every ``period`` iterations and after the last iteration. + """ + + def __init__(self, period, model, data_loader, num_iter): + """ + Args: + period (int): the period this hook is run, or 0 to not run during training. + The hook will always run in the end of training. + model (nn.Module): a module whose all BN layers in training mode will be + updated by precise BN. + Note that user is responsible for ensuring the BN layers to be + updated are in training mode when this hook is triggered. + data_loader (iterable): it will produce data to be run by `model(data)`. + num_iter (int): number of iterations used to compute the precise + statistics. + """ + self._logger = logging.getLogger(__name__) + if len(get_bn_modules(model)) == 0: + self._logger.info( + "PreciseBN is disabled because model does not contain BN layers in training mode." + ) + self._disabled = True + return + + self._model = model + self._data_loader = data_loader + self._num_iter = num_iter + self._period = period + self._disabled = False + + self._data_iter = None + + def after_step(self): + next_iter = self.trainer.iter + 1 + is_final = next_iter == self.trainer.max_iter + if is_final or (self._period > 0 and next_iter % self._period == 0): + self.update_stats() + + def update_stats(self): + """ + Update the model with precise statistics. Users can manually call this method. + """ + if self._disabled: + return + + if self._data_iter is None: + self._data_iter = iter(self._data_loader) + + def data_loader(): + for num_iter in itertools.count(1): + if num_iter % 100 == 0: + self._logger.info( + "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter) + ) + # This way we can reuse the same iterator + yield next(self._data_iter) + + with EventStorage(): # capture events in a new storage to discard them + self._logger.info( + "Running precise-BN for {} iterations... ".format(self._num_iter) + + "Note that this could produce different statistics every time." + ) + update_bn_stats(self._model, data_loader(), self._num_iter) + + +class TorchMemoryStats(HookBase): + """ + Writes pytorch's cuda memory statistics periodically. + """ + + def __init__(self, period=20, max_runs=10): + """ + Args: + period (int): Output stats each 'period' iterations + max_runs (int): Stop the logging after 'max_runs' + """ + + self._logger = logging.getLogger(__name__) + self._period = period + self._max_runs = max_runs + self._runs = 0 + + def after_step(self): + if self._runs > self._max_runs: + return + + if (self.trainer.iter + 1) % self._period == 0 or ( + self.trainer.iter == self.trainer.max_iter - 1 + ): + if torch.cuda.is_available(): + max_reserved_mb = torch.cuda.max_memory_reserved() / 1024.0 / 1024.0 + reserved_mb = torch.cuda.memory_reserved() / 1024.0 / 1024.0 + max_allocated_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + allocated_mb = torch.cuda.memory_allocated() / 1024.0 / 1024.0 + + self._logger.info( + ( + " iter: {} " + " max_reserved_mem: {:.0f}MB " + " reserved_mem: {:.0f}MB " + " max_allocated_mem: {:.0f}MB " + " allocated_mem: {:.0f}MB " + ).format( + self.trainer.iter, + max_reserved_mb, + reserved_mb, + max_allocated_mb, + allocated_mb, + ) + ) + + self._runs += 1 + if self._runs == self._max_runs: + mem_summary = torch.cuda.memory_summary() + self._logger.info("\n" + mem_summary) + + torch.cuda.reset_peak_memory_stats() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/launch.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..0a2d6bcdb5f1906d3eedb04b5aa939f8269f0344 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/launch.py @@ -0,0 +1,123 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +from datetime import timedelta +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +from annotator.oneformer.detectron2.utils import comm + +__all__ = ["DEFAULT_TIMEOUT", "launch"] + +DEFAULT_TIMEOUT = timedelta(minutes=30) + + +def _find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch( + main_func, + # Should be num_processes_per_machine, but kept for compatibility. + num_gpus_per_machine, + num_machines=1, + machine_rank=0, + dist_url=None, + args=(), + timeout=DEFAULT_TIMEOUT, +): + """ + Launch multi-process or distributed training. + This function must be called on all machines involved in the training. + It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. + + Args: + main_func: a function that will be called by `main_func(*args)` + num_gpus_per_machine (int): number of processes per machine. When + using GPUs, this should be the number of GPUs. + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine + dist_url (str): url to connect to for distributed jobs, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to "auto" to automatically select a free port on localhost + timeout (timedelta): timeout of the distributed workers + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + # https://github.com/pytorch/pytorch/pull/14391 + # TODO prctl in spawned processes + + if dist_url == "auto": + assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." + port = _find_free_port() + dist_url = f"tcp://127.0.0.1:{port}" + if num_machines > 1 and dist_url.startswith("file://"): + logger = logging.getLogger(__name__) + logger.warning( + "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" + ) + + mp.start_processes( + _distributed_worker, + nprocs=num_gpus_per_machine, + args=( + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + args, + timeout, + ), + daemon=False, + ) + else: + main_func(*args) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + args, + timeout=DEFAULT_TIMEOUT, +): + has_gpu = torch.cuda.is_available() + if has_gpu: + assert num_gpus_per_machine <= torch.cuda.device_count() + global_rank = machine_rank * num_gpus_per_machine + local_rank + try: + dist.init_process_group( + backend="NCCL" if has_gpu else "GLOO", + init_method=dist_url, + world_size=world_size, + rank=global_rank, + timeout=timeout, + ) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error("Process group URL: {}".format(dist_url)) + raise e + + # Setup the local process group. + comm.create_local_process_group(num_gpus_per_machine) + if has_gpu: + torch.cuda.set_device(local_rank) + + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + comm.synchronize() + + main_func(*args) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/train_loop.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/train_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..0c24c5af94e8f9367a5d577a617ec426292d3f89 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/engine/train_loop.py @@ -0,0 +1,469 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +import numpy as np +import time +import weakref +from typing import List, Mapping, Optional +import torch +from torch.nn.parallel import DataParallel, DistributedDataParallel + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.utils.events import EventStorage, get_event_storage +from annotator.oneformer.detectron2.utils.logger import _log_api_usage + +__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"] + + +class HookBase: + """ + Base class for hooks that can be registered with :class:`TrainerBase`. + + Each hook can implement 4 methods. The way they are called is demonstrated + in the following snippet: + :: + hook.before_train() + for iter in range(start_iter, max_iter): + hook.before_step() + trainer.run_step() + hook.after_step() + iter += 1 + hook.after_train() + + Notes: + 1. In the hook method, users can access ``self.trainer`` to access more + properties about the context (e.g., model, current iteration, or config + if using :class:`DefaultTrainer`). + + 2. A hook that does something in :meth:`before_step` can often be + implemented equivalently in :meth:`after_step`. + If the hook takes non-trivial time, it is strongly recommended to + implement the hook in :meth:`after_step` instead of :meth:`before_step`. + The convention is that :meth:`before_step` should only take negligible time. + + Following this convention will allow hooks that do care about the difference + between :meth:`before_step` and :meth:`after_step` (e.g., timer) to + function properly. + + """ + + trainer: "TrainerBase" = None + """ + A weak reference to the trainer object. Set by the trainer when the hook is registered. + """ + + def before_train(self): + """ + Called before the first iteration. + """ + pass + + def after_train(self): + """ + Called after the last iteration. + """ + pass + + def before_step(self): + """ + Called before each iteration. + """ + pass + + def after_backward(self): + """ + Called after the backward pass of each iteration. + """ + pass + + def after_step(self): + """ + Called after each iteration. + """ + pass + + def state_dict(self): + """ + Hooks are stateless by default, but can be made checkpointable by + implementing `state_dict` and `load_state_dict`. + """ + return {} + + +class TrainerBase: + """ + Base class for iterative trainer with hooks. + + The only assumption we made here is: the training runs in a loop. + A subclass can implement what the loop is. + We made no assumptions about the existence of dataloader, optimizer, model, etc. + + Attributes: + iter(int): the current iteration. + + start_iter(int): The iteration to start with. + By convention the minimum possible value is 0. + + max_iter(int): The iteration to end training. + + storage(EventStorage): An EventStorage that's opened during the course of training. + """ + + def __init__(self) -> None: + self._hooks: List[HookBase] = [] + self.iter: int = 0 + self.start_iter: int = 0 + self.max_iter: int + self.storage: EventStorage + _log_api_usage("trainer." + self.__class__.__name__) + + def register_hooks(self, hooks: List[Optional[HookBase]]) -> None: + """ + Register hooks to the trainer. The hooks are executed in the order + they are registered. + + Args: + hooks (list[Optional[HookBase]]): list of hooks + """ + hooks = [h for h in hooks if h is not None] + for h in hooks: + assert isinstance(h, HookBase) + # To avoid circular reference, hooks and trainer cannot own each other. + # This normally does not matter, but will cause memory leak if the + # involved objects contain __del__: + # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ + h.trainer = weakref.proxy(self) + self._hooks.extend(hooks) + + def train(self, start_iter: int, max_iter: int): + """ + Args: + start_iter, max_iter (int): See docs above + """ + logger = logging.getLogger(__name__) + logger.info("Starting training from iteration {}".format(start_iter)) + + self.iter = self.start_iter = start_iter + self.max_iter = max_iter + + with EventStorage(start_iter) as self.storage: + try: + self.before_train() + for self.iter in range(start_iter, max_iter): + self.before_step() + self.run_step() + self.after_step() + # self.iter == max_iter can be used by `after_train` to + # tell whether the training successfully finished or failed + # due to exceptions. + self.iter += 1 + except Exception: + logger.exception("Exception during training:") + raise + finally: + self.after_train() + + def before_train(self): + for h in self._hooks: + h.before_train() + + def after_train(self): + self.storage.iter = self.iter + for h in self._hooks: + h.after_train() + + def before_step(self): + # Maintain the invariant that storage.iter == trainer.iter + # for the entire execution of each step + self.storage.iter = self.iter + + for h in self._hooks: + h.before_step() + + def after_backward(self): + for h in self._hooks: + h.after_backward() + + def after_step(self): + for h in self._hooks: + h.after_step() + + def run_step(self): + raise NotImplementedError + + def state_dict(self): + ret = {"iteration": self.iter} + hooks_state = {} + for h in self._hooks: + sd = h.state_dict() + if sd: + name = type(h).__qualname__ + if name in hooks_state: + # TODO handle repetitive stateful hooks + continue + hooks_state[name] = sd + if hooks_state: + ret["hooks"] = hooks_state + return ret + + def load_state_dict(self, state_dict): + logger = logging.getLogger(__name__) + self.iter = state_dict["iteration"] + for key, value in state_dict.get("hooks", {}).items(): + for h in self._hooks: + try: + name = type(h).__qualname__ + except AttributeError: + continue + if name == key: + h.load_state_dict(value) + break + else: + logger.warning(f"Cannot find the hook '{key}', its state_dict is ignored.") + + +class SimpleTrainer(TrainerBase): + """ + A simple trainer for the most common type of task: + single-cost single-optimizer single-data-source iterative optimization, + optionally using data-parallelism. + It assumes that every step, you: + + 1. Compute the loss with a data from the data_loader. + 2. Compute the gradients with the above loss. + 3. Update the model with the optimizer. + + All other tasks during training (checkpointing, logging, evaluation, LR schedule) + are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`. + + If you want to do anything fancier than this, + either subclass TrainerBase and implement your own `run_step`, + or write your own training loop. + """ + + def __init__(self, model, data_loader, optimizer, gather_metric_period=1): + """ + Args: + model: a torch Module. Takes a data from data_loader and returns a + dict of losses. + data_loader: an iterable. Contains data to be used to call model. + optimizer: a torch optimizer. + gather_metric_period: an int. Every gather_metric_period iterations + the metrics are gathered from all the ranks to rank 0 and logged. + """ + super().__init__() + + """ + We set the model to training mode in the trainer. + However it's valid to train a model that's in eval mode. + If you want your model (or a submodule of it) to behave + like evaluation during training, you can overwrite its train() method. + """ + model.train() + + self.model = model + self.data_loader = data_loader + # to access the data loader iterator, call `self._data_loader_iter` + self._data_loader_iter_obj = None + self.optimizer = optimizer + self.gather_metric_period = gather_metric_period + + def run_step(self): + """ + Implement the standard training logic described above. + """ + assert self.model.training, "[SimpleTrainer] model was changed to eval mode!" + start = time.perf_counter() + """ + If you want to do something with the data, you can wrap the dataloader. + """ + data = next(self._data_loader_iter) + data_time = time.perf_counter() - start + + """ + If you want to do something with the losses, you can wrap the model. + """ + loss_dict = self.model(data) + if isinstance(loss_dict, torch.Tensor): + losses = loss_dict + loss_dict = {"total_loss": loss_dict} + else: + losses = sum(loss_dict.values()) + + """ + If you need to accumulate gradients or do something similar, you can + wrap the optimizer with your custom `zero_grad()` method. + """ + self.optimizer.zero_grad() + losses.backward() + + self.after_backward() + + self._write_metrics(loss_dict, data_time) + + """ + If you need gradient clipping/scaling or other processing, you can + wrap the optimizer with your custom `step()` method. But it is + suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4 + """ + self.optimizer.step() + + @property + def _data_loader_iter(self): + # only create the data loader iterator when it is used + if self._data_loader_iter_obj is None: + self._data_loader_iter_obj = iter(self.data_loader) + return self._data_loader_iter_obj + + def reset_data_loader(self, data_loader_builder): + """ + Delete and replace the current data loader with a new one, which will be created + by calling `data_loader_builder` (without argument). + """ + del self.data_loader + data_loader = data_loader_builder() + self.data_loader = data_loader + self._data_loader_iter_obj = None + + def _write_metrics( + self, + loss_dict: Mapping[str, torch.Tensor], + data_time: float, + prefix: str = "", + ) -> None: + if (self.iter + 1) % self.gather_metric_period == 0: + SimpleTrainer.write_metrics(loss_dict, data_time, prefix) + + @staticmethod + def write_metrics( + loss_dict: Mapping[str, torch.Tensor], + data_time: float, + prefix: str = "", + ) -> None: + """ + Args: + loss_dict (dict): dict of scalar losses + data_time (float): time taken by the dataloader iteration + prefix (str): prefix for logging keys + """ + metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()} + metrics_dict["data_time"] = data_time + + # Gather metrics among all workers for logging + # This assumes we do DDP-style training, which is currently the only + # supported method in detectron2. + all_metrics_dict = comm.gather(metrics_dict) + + if comm.is_main_process(): + storage = get_event_storage() + + # data_time among workers can have high variance. The actual latency + # caused by data_time is the maximum among workers. + data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) + storage.put_scalar("data_time", data_time) + + # average the rest metrics + metrics_dict = { + k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() + } + total_losses_reduced = sum(metrics_dict.values()) + if not np.isfinite(total_losses_reduced): + raise FloatingPointError( + f"Loss became infinite or NaN at iteration={storage.iter}!\n" + f"loss_dict = {metrics_dict}" + ) + + storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced) + if len(metrics_dict) > 1: + storage.put_scalars(**metrics_dict) + + def state_dict(self): + ret = super().state_dict() + ret["optimizer"] = self.optimizer.state_dict() + return ret + + def load_state_dict(self, state_dict): + super().load_state_dict(state_dict) + self.optimizer.load_state_dict(state_dict["optimizer"]) + + +class AMPTrainer(SimpleTrainer): + """ + Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision + in the training loop. + """ + + def __init__( + self, + model, + data_loader, + optimizer, + gather_metric_period=1, + grad_scaler=None, + precision: torch.dtype = torch.float16, + log_grad_scaler: bool = False, + ): + """ + Args: + model, data_loader, optimizer, gather_metric_period: same as in :class:`SimpleTrainer`. + grad_scaler: torch GradScaler to automatically scale gradients. + precision: torch.dtype as the target precision to cast to in computations + """ + unsupported = "AMPTrainer does not support single-process multi-device training!" + if isinstance(model, DistributedDataParallel): + assert not (model.device_ids and len(model.device_ids) > 1), unsupported + assert not isinstance(model, DataParallel), unsupported + + super().__init__(model, data_loader, optimizer, gather_metric_period) + + if grad_scaler is None: + from torch.cuda.amp import GradScaler + + grad_scaler = GradScaler() + self.grad_scaler = grad_scaler + self.precision = precision + self.log_grad_scaler = log_grad_scaler + + def run_step(self): + """ + Implement the AMP training logic. + """ + assert self.model.training, "[AMPTrainer] model was changed to eval mode!" + assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!" + from torch.cuda.amp import autocast + + start = time.perf_counter() + data = next(self._data_loader_iter) + data_time = time.perf_counter() - start + + with autocast(dtype=self.precision): + loss_dict = self.model(data) + if isinstance(loss_dict, torch.Tensor): + losses = loss_dict + loss_dict = {"total_loss": loss_dict} + else: + losses = sum(loss_dict.values()) + + self.optimizer.zero_grad() + self.grad_scaler.scale(losses).backward() + + if self.log_grad_scaler: + storage = get_event_storage() + storage.put_scalar("[metric]grad_scaler", self.grad_scaler.get_scale()) + + self.after_backward() + + self._write_metrics(loss_dict, data_time) + + self.grad_scaler.step(self.optimizer) + self.grad_scaler.update() + + def state_dict(self): + ret = super().state_dict() + ret["grad_scaler"] = self.grad_scaler.state_dict() + return ret + + def load_state_dict(self, state_dict): + super().load_state_dict(state_dict) + self.grad_scaler.load_state_dict(state_dict["grad_scaler"]) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d96609e8f2261a6800fe85fcf3e1eaeaa44455c6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator +from .coco_evaluation import COCOEvaluator +from .rotated_coco_evaluation import RotatedCOCOEvaluator +from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset +from .lvis_evaluation import LVISEvaluator +from .panoptic_evaluation import COCOPanopticEvaluator +from .pascal_voc_evaluation import PascalVOCDetectionEvaluator +from .sem_seg_evaluation import SemSegEvaluator +from .testing import print_csv_format, verify_results + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/cityscapes_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/cityscapes_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..f5be637dc87b5ca8645563a4a921144f6c5fd877 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/cityscapes_evaluation.py @@ -0,0 +1,197 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import glob +import logging +import numpy as np +import os +import tempfile +from collections import OrderedDict +import torch +from PIL import Image + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .evaluator import DatasetEvaluator + + +class CityscapesEvaluator(DatasetEvaluator): + """ + Base class for evaluation using cityscapes API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): the name of the dataset. + It must have the following metadata associated with it: + "thing_classes", "gt_dir". + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") + self._temp_dir = self._working_dir.name + # All workers will write to the same results directory + # TODO this does not work in distributed training + assert ( + comm.get_local_size() == comm.get_world_size() + ), "CityscapesEvaluator currently do not work with multiple machines." + self._temp_dir = comm.all_gather(self._temp_dir)[0] + if self._temp_dir != self._working_dir.name: + self._working_dir.cleanup() + self._logger.info( + "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) + ) + + +class CityscapesInstanceEvaluator(CityscapesEvaluator): + """ + Evaluate instance segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import name2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") + + if "instances" in output: + output = output["instances"].to(self._cpu_device) + num_instances = len(output) + with open(pred_txt, "w") as fout: + for i in range(num_instances): + pred_class = output.pred_classes[i] + classes = self._metadata.thing_classes[pred_class] + class_id = name2label[classes].id + score = output.scores[i] + mask = output.pred_masks[i].numpy().astype("uint8") + png_filename = os.path.join( + self._temp_dir, basename + "_{}_{}.png".format(i, classes) + ) + + Image.fromarray(mask * 255).save(png_filename) + fout.write( + "{} {} {}\n".format(os.path.basename(png_filename), class_id, score) + ) + else: + # Cityscapes requires a prediction file for every ground truth image. + with open(pred_txt, "w") as fout: + pass + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP" and "AP50". + """ + comm.synchronize() + if comm.get_rank() > 0: + return + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + )["averages"] + + ret = OrderedDict() + ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} + self._working_dir.cleanup() + return ret + + +class CityscapesSemSegEvaluator(CityscapesEvaluator): + """ + Evaluate semantic segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import trainId2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") + + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() + pred = 255 * np.ones(output.shape, dtype=np.uint8) + for train_id, label in trainId2label.items(): + if label.ignoreInEval: + continue + pred[output == train_id] = label.id + Image.fromarray(pred).save(pred_filename) + + def evaluate(self): + comm.synchronize() + if comm.get_rank() > 0: + return + # Load the Cityscapes eval script *after* setting the required env var, + # since the script reads CITYSCAPES_DATASET into global variables at load time. + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + ) + ret = OrderedDict() + ret["sem_seg"] = { + "IoU": 100.0 * results["averageScoreClasses"], + "iIoU": 100.0 * results["averageScoreInstClasses"], + "IoU_sup": 100.0 * results["averageScoreCategories"], + "iIoU_sup": 100.0 * results["averageScoreInstCategories"], + } + self._working_dir.cleanup() + return ret diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/coco_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..fdc41798537d3b2e6fc7096c9f4bebd724f1e395 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/coco_evaluation.py @@ -0,0 +1,722 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from annotator.oneformer.pycocotools.coco import COCO +from annotator.oneformer.pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.coco import convert_to_coco_json +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, pairwise_iou +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import create_small_table + +from .evaluator import DatasetEvaluator + +try: + from annotator.oneformer.detectron2.evaluation.fast_eval_api import COCOeval_opt +except ImportError: + COCOeval_opt = COCOeval + + +class COCOEvaluator(DatasetEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means + the metric cannot be computed (e.g. due to no predictions made). + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def __init__( + self, + dataset_name, + tasks=None, + distributed=True, + output_dir=None, + *, + max_dets_per_image=None, + use_fast_impl=True, + kpt_oks_sigmas=(), + allow_cached_coco=True, + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have either the following corresponding metadata: + + "json_file": the path to the COCO format annotation + + Or it must be in detectron2's standard dataset format + so it can be converted to COCO format automatically. + tasks (tuple[str]): tasks that can be evaluated under the given + configuration. A task is one of "bbox", "segm", "keypoints". + By default, will infer this automatically from predictions. + distributed (True): if True, will collect results from all ranks and run evaluation + in the main process. + Otherwise, will only evaluate the results in the current process. + output_dir (str): optional, an output directory to dump all + results predicted on the dataset. The dump contains two files: + + 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and + contains all the results in the format they are produced by the model. + 2. "coco_instances_results.json" a json file in COCO's result format. + max_dets_per_image (int): limit on the maximum number of detections per image. + By default in COCO, this limit is to 100, but this can be customized + to be greater, as is needed in evaluation metrics AP fixed and AP pool + (see https://arxiv.org/pdf/2102.01066.pdf) + This doesn't affect keypoint evaluation. + use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. + Although the results should be very close to the official implementation in COCO + API, it is still recommended to compute results with the official API for use in + papers. The faster implementation also uses more RAM. + kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. + See http://cocodataset.org/#keypoints-eval + When empty, it will use the defaults in COCO. + Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. + allow_cached_coco (bool): Whether to use cached coco json from previous validation + runs. You should set this to False if you need to use different validation data. + Defaults to True. + """ + self._logger = logging.getLogger(__name__) + self._distributed = distributed + self._output_dir = output_dir + + if use_fast_impl and (COCOeval_opt is COCOeval): + self._logger.info("Fast COCO eval is not built. Falling back to official COCO eval.") + use_fast_impl = False + self._use_fast_impl = use_fast_impl + + # COCOeval requires the limit on the number of detections per image (maxDets) to be a list + # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the + # 3rd element (100) is used as the limit on the number of detections per image when + # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval, + # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] + else: + max_dets_per_image = [1, 10, max_dets_per_image] + self._max_dets_per_image = max_dets_per_image + + if tasks is not None and isinstance(tasks, CfgNode): + kpt_oks_sigmas = ( + tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas + ) + self._logger.warn( + "COCO Evaluator instantiated using config, this is deprecated behavior." + " Please pass in explicit arguments instead." + ) + self._tasks = None # Infering it from predictions should be better + else: + self._tasks = tasks + + self._cpu_device = torch.device("cpu") + + self._metadata = MetadataCatalog.get(dataset_name) + if not hasattr(self._metadata, "json_file"): + if output_dir is None: + raise ValueError( + "output_dir must be provided to COCOEvaluator " + "for datasets not in COCO format." + ) + self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...") + + cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") + self._metadata.json_file = cache_path + convert_to_coco_json(dataset_name, cache_path, allow_cached=allow_cached_coco) + + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + # Test set json files do not contain annotations (evaluation must be + # performed using the COCO evaluation server). + self._do_evaluation = "annotations" in self._coco_api.dataset + if self._do_evaluation: + self._kpt_oks_sigmas = kpt_oks_sigmas + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + if len(prediction) > 1: + self._predictions.append(prediction) + + def evaluate(self, img_ids=None): + """ + Args: + img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset + """ + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return {} + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "instances" in predictions[0]: + self._eval_predictions(predictions, img_ids=img_ids) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _tasks_from_predictions(self, predictions): + """ + Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. + """ + tasks = {"bbox"} + for pred in predictions: + if "segmentation" in pred: + tasks.add("segm") + if "keypoints" in pred: + tasks.add("keypoints") + return sorted(tasks) + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + num_classes = len(all_contiguous_ids) + assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + assert category_id < num_classes, ( + f"A prediction has class={category_id}, " + f"but the dataset only has {num_classes} classes and " + f"predicted class id should be in [0, {num_classes - 1}]." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + cocoeval_fn=COCOeval_opt if self._use_fast_impl else COCOeval, + img_ids=img_ids, + max_dets_per_image=self._max_dets_per_image, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + def _derive_coco_results(self, coco_eval, iou_type, class_names=None): + """ + Derive the desired score numbers from summarized COCOeval. + + Args: + coco_eval (None or COCOEval): None represents no predictions from model. + iou_type (str): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], + }[iou_type] + + if coco_eval is None: + self._logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + # the standard metrics + results = { + metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") + for idx, metric in enumerate(metrics) + } + self._logger.info( + "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) + ) + if not np.isfinite(sum(results.values())): + self._logger.info("Some metrics cannot be computed and is shown as NaN.") + + if class_names is None or len(class_names) <= 1: + return results + # Compute per-category AP + # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa + precisions = coco_eval.eval["precision"] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + results_per_category = [] + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + results_per_category.append(("{}".format(name), float(ap * 100))) + + # tabulate it + N_COLS = min(6, len(results_per_category) * 2) + results_flatten = list(itertools.chain(*results_per_category)) + results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + results_2d, + tablefmt="pipe", + floatfmt=".3f", + headers=["category", "AP"] * (N_COLS // 2), + numalign="left", + ) + self._logger.info("Per-category {} AP: \n".format(iou_type) + table) + + results.update({"AP-" + name: ap for name, ap in results_per_category}) + return results + + +def instances_to_coco_json(instances, img_id): + """ + Dump an "Instances" object to a COCO-format json that's used for evaluation. + + Args: + instances (Instances): + img_id (int): the image id + + Returns: + list[dict]: list of json annotations in COCO format. + """ + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + has_mask = instances.has("pred_masks") + if has_mask: + # use RLE to encode the masks, because they are too large and takes memory + # since this evaluator stores outputs of the entire dataset + rles = [ + mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in instances.pred_masks + ] + for rle in rles: + # "counts" is an array encoded by mask_util as a byte-stream. Python3's + # json writer which always produces strings cannot serialize a bytestream + # unless you decode it. Thankfully, utf-8 works out (which is also what + # the annotator.oneformer.pycocotools/_mask.pyx does). + rle["counts"] = rle["counts"].decode("utf-8") + + has_keypoints = instances.has("pred_keypoints") + if has_keypoints: + keypoints = instances.pred_keypoints + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + if has_mask: + result["segmentation"] = rles[k] + if has_keypoints: + # In COCO annotations, + # keypoints coordinates are pixel indices. + # However our predictions are floating point coordinates. + # Therefore we subtract 0.5 to be consistent with the annotation format. + # This is the inverse of data loading logic in `datasets/coco.py`. + keypoints[k][:, :2] -= 0.5 + result["keypoints"] = keypoints[k].flatten().tolist() + results.append(result) + return results + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official COCO API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0**2, 1e5**2], # all + [0**2, 32**2], # small + [32**2, 96**2], # medium + [96**2, 1e5**2], # large + [96**2, 128**2], # 96-128 + [128**2, 256**2], # 128-256 + [256**2, 512**2], # 256-512 + [512**2, 1e5**2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) + anno = coco_api.loadAnns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + for obj in anno + if obj["iscrowd"] == 0 + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_coco( + coco_gt, + coco_results, + iou_type, + kpt_oks_sigmas=None, + cocoeval_fn=COCOeval_opt, + img_ids=None, + max_dets_per_image=None, +): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + if iou_type == "segm": + coco_results = copy.deepcopy(coco_results) + # When evaluating mask AP, if the results contain bbox, cocoapi will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in coco_results: + c.pop("bbox", None) + + coco_dt = coco_gt.loadRes(coco_results) + coco_eval = cocoeval_fn(coco_gt, coco_dt, iou_type) + # For COCO, the default max_dets_per_image is [1, 10, 100]. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] # Default from COCOEval + else: + assert ( + len(max_dets_per_image) >= 3 + ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3" + # In the case that user supplies a custom input for max_dets_per_image, + # apply COCOevalMaxDets to evaluate AP with the custom input. + if max_dets_per_image[2] != 100: + coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type) + if iou_type != "keypoints": + coco_eval.params.maxDets = max_dets_per_image + + if img_ids is not None: + coco_eval.params.imgIds = img_ids + + if iou_type == "keypoints": + # Use the COCO default keypoint OKS sigmas unless overrides are specified + if kpt_oks_sigmas: + assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "annotator.oneformer.pycocotools is too old!" + coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) + # COCOAPI requires every detection and every gt to have keypoints, so + # we just take the first entry from both + num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 + num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 + num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) + assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( + f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " + f"Ground truth contains {num_keypoints_gt} keypoints. " + f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " + "They have to agree with each other. For meaning of OKS, please refer to " + "http://cocodataset.org/#keypoints-eval." + ) + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval + + +class COCOevalMaxDets(COCOeval): + """ + Modified version of COCOeval for evaluating AP with a custom + maxDets (by default for COCO, maxDets is 100) + """ + + def summarize(self): + """ + Compute and display summary metrics for evaluation results given + a custom value for max_dets_per_image + """ + + def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): + p = self.params + iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" + titleStr = "Average Precision" if ap == 1 else "Average Recall" + typeStr = "(AP)" if ap == 1 else "(AR)" + iouStr = ( + "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) + if iouThr is None + else "{:0.2f}".format(iouThr) + ) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval["precision"] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval["recall"] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + + def _summarizeDets(): + stats = np.zeros((12,)) + # Evaluate AP using the custom limit on maximum detections per image + stats[0] = _summarize(1, maxDets=self.params.maxDets[2]) + stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) + return stats + + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=0.5) + stats[2] = _summarize(1, maxDets=20, iouThr=0.75) + stats[3] = _summarize(1, maxDets=20, areaRng="medium") + stats[4] = _summarize(1, maxDets=20, areaRng="large") + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=0.5) + stats[7] = _summarize(0, maxDets=20, iouThr=0.75) + stats[8] = _summarize(0, maxDets=20, areaRng="medium") + stats[9] = _summarize(0, maxDets=20, areaRng="large") + return stats + + if not self.eval: + raise Exception("Please run accumulate() first") + iouType = self.params.iouType + if iouType == "segm" or iouType == "bbox": + summarize = _summarizeDets + elif iouType == "keypoints": + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/evaluator.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..9cddc296432cbb6f11caf3c3be98833a50778ffb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/evaluator.py @@ -0,0 +1,224 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import datetime +import logging +import time +from collections import OrderedDict, abc +from contextlib import ExitStack, contextmanager +from typing import List, Union +import torch +from torch import nn + +from annotator.oneformer.detectron2.utils.comm import get_world_size, is_main_process +from annotator.oneformer.detectron2.utils.logger import log_every_n_seconds + + +class DatasetEvaluator: + """ + Base class for a dataset evaluator. + + The function :func:`inference_on_dataset` runs the model over + all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. + + This class will accumulate information of the inputs/outputs (by :meth:`process`), + and produce evaluation results in the end (by :meth:`evaluate`). + """ + + def reset(self): + """ + Preparation for a new round of evaluation. + Should be called before starting a round of evaluation. + """ + pass + + def process(self, inputs, outputs): + """ + Process the pair of inputs and outputs. + If they contain batches, the pairs can be consumed one-by-one using `zip`: + + .. code-block:: python + + for input_, output in zip(inputs, outputs): + # do evaluation on single input/output pair + ... + + Args: + inputs (list): the inputs that's used to call the model. + outputs (list): the return value of `model(inputs)` + """ + pass + + def evaluate(self): + """ + Evaluate/summarize the performance, after processing all input/output pairs. + + Returns: + dict: + A new evaluator class can return a dict of arbitrary format + as long as the user can process the results. + In our train_net.py, we expect the following format: + + * key: the name of the task (e.g., bbox) + * value: a dict of {metric name: score}, e.g.: {"AP50": 80} + """ + pass + + +class DatasetEvaluators(DatasetEvaluator): + """ + Wrapper class to combine multiple :class:`DatasetEvaluator` instances. + + This class dispatches every evaluation call to + all of its :class:`DatasetEvaluator`. + """ + + def __init__(self, evaluators): + """ + Args: + evaluators (list): the evaluators to combine. + """ + super().__init__() + self._evaluators = evaluators + + def reset(self): + for evaluator in self._evaluators: + evaluator.reset() + + def process(self, inputs, outputs): + for evaluator in self._evaluators: + evaluator.process(inputs, outputs) + + def evaluate(self): + results = OrderedDict() + for evaluator in self._evaluators: + result = evaluator.evaluate() + if is_main_process() and result is not None: + for k, v in result.items(): + assert ( + k not in results + ), "Different evaluators produce results with the same key {}".format(k) + results[k] = v + return results + + +def inference_on_dataset( + model, data_loader, evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None] +): + """ + Run model on the data_loader and evaluate the metrics with evaluator. + Also benchmark the inference speed of `model.__call__` accurately. + The model will be used in eval mode. + + Args: + model (callable): a callable which takes an object from + `data_loader` and returns some outputs. + + If it's an nn.Module, it will be temporarily set to `eval` mode. + If you wish to evaluate a model in `training` mode instead, you can + wrap the given model and override its behavior of `.eval()` and `.train()`. + data_loader: an iterable object with a length. + The elements it generates will be the inputs to the model. + evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark, + but don't want to do any evaluation. + + Returns: + The return value of `evaluator.evaluate()` + """ + num_devices = get_world_size() + logger = logging.getLogger(__name__) + logger.info("Start inference on {} batches".format(len(data_loader))) + + total = len(data_loader) # inference data loader must have a fixed length + if evaluator is None: + # create a no-op evaluator + evaluator = DatasetEvaluators([]) + if isinstance(evaluator, abc.MutableSequence): + evaluator = DatasetEvaluators(evaluator) + evaluator.reset() + + num_warmup = min(5, total - 1) + start_time = time.perf_counter() + total_data_time = 0 + total_compute_time = 0 + total_eval_time = 0 + with ExitStack() as stack: + if isinstance(model, nn.Module): + stack.enter_context(inference_context(model)) + stack.enter_context(torch.no_grad()) + + start_data_time = time.perf_counter() + for idx, inputs in enumerate(data_loader): + total_data_time += time.perf_counter() - start_data_time + if idx == num_warmup: + start_time = time.perf_counter() + total_data_time = 0 + total_compute_time = 0 + total_eval_time = 0 + + start_compute_time = time.perf_counter() + outputs = model(inputs) + if torch.cuda.is_available(): + torch.cuda.synchronize() + total_compute_time += time.perf_counter() - start_compute_time + + start_eval_time = time.perf_counter() + evaluator.process(inputs, outputs) + total_eval_time += time.perf_counter() - start_eval_time + + iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) + data_seconds_per_iter = total_data_time / iters_after_start + compute_seconds_per_iter = total_compute_time / iters_after_start + eval_seconds_per_iter = total_eval_time / iters_after_start + total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start + if idx >= num_warmup * 2 or compute_seconds_per_iter > 5: + eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) + log_every_n_seconds( + logging.INFO, + ( + f"Inference done {idx + 1}/{total}. " + f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " + f"Inference: {compute_seconds_per_iter:.4f} s/iter. " + f"Eval: {eval_seconds_per_iter:.4f} s/iter. " + f"Total: {total_seconds_per_iter:.4f} s/iter. " + f"ETA={eta}" + ), + n=5, + ) + start_data_time = time.perf_counter() + + # Measure the time only for this worker (before the synchronization barrier) + total_time = time.perf_counter() - start_time + total_time_str = str(datetime.timedelta(seconds=total_time)) + # NOTE this format is parsed by grep + logger.info( + "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format( + total_time_str, total_time / (total - num_warmup), num_devices + ) + ) + total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) + logger.info( + "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format( + total_compute_time_str, total_compute_time / (total - num_warmup), num_devices + ) + ) + + results = evaluator.evaluate() + # An evaluator may return None when not in main process. + # Replace it by an empty dict instead to make it easier for downstream code to handle + if results is None: + results = {} + return results + + +@contextmanager +def inference_context(model): + """ + A context where the model is temporarily changed to eval mode, + and restored to previous mode afterwards. + + Args: + model: a torch Module + """ + training_mode = model.training + model.eval() + yield + model.train(training_mode) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/fast_eval_api.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/fast_eval_api.py new file mode 100644 index 0000000000000000000000000000000000000000..ad1a8f82350098bafe56f6d9481626e812717052 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/fast_eval_api.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import numpy as np +import time +from annotator.oneformer.pycocotools.cocoeval import COCOeval + +from annotator.oneformer.detectron2 import _C + +logger = logging.getLogger(__name__) + + +class COCOeval_opt(COCOeval): + """ + This is a slightly modified version of the original COCO API, where the functions evaluateImg() + and accumulate() are implemented in C++ to speedup evaluation + """ + + def evaluate(self): + """ + Run per image evaluation on given images and store results in self.evalImgs_cpp, a + datastructure that isn't readable from Python but is used by a c++ implementation of + accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure + self.evalImgs because this datastructure is a computational bottleneck. + :return: None + """ + tic = time.time() + + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + logger.info("Evaluate annotation type *{}*".format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() # bottleneck + + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == "segm" or p.iouType == "bbox": + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds + } # bottleneck + + maxDet = p.maxDets[-1] + + # <<<< Beginning of code differences with original COCO API + def convert_instances_to_cpp(instances, is_det=False): + # Convert annotations for a list of instances in an image to a format that's fast + # to access in C++ + instances_cpp = [] + for instance in instances: + instance_cpp = _C.InstanceAnnotation( + int(instance["id"]), + instance["score"] if is_det else instance.get("score", 0.0), + instance["area"], + bool(instance.get("iscrowd", 0)), + bool(instance.get("ignore", 0)), + ) + instances_cpp.append(instance_cpp) + return instances_cpp + + # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ + ground_truth_instances = [ + [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] + for imgId in p.imgIds + ] + detected_instances = [ + [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds] + for imgId in p.imgIds + ] + ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] + + if not p.useCats: + # For each image, flatten per-category lists into a single list + ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances] + detected_instances = [[[o for c in i for o in c]] for i in detected_instances] + + # Call C++ implementation of self.evaluateImgs() + self._evalImgs_cpp = _C.COCOevalEvaluateImages( + p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances + ) + self._evalImgs = None + + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) + # >>>> End of code differences with original COCO API + + def accumulate(self): + """ + Accumulate per image evaluation results and store the result in self.eval. Does not + support changing parameter settings from those used by self.evaluate() + """ + logger.info("Accumulating evaluation results...") + tic = time.time() + assert hasattr( + self, "_evalImgs_cpp" + ), "evaluate() must be called before accmulate() is called." + + self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) + + # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections + self.eval["recall"] = np.array(self.eval["recall"]).reshape( + self.eval["counts"][:1] + self.eval["counts"][2:] + ) + + # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X + # num_area_ranges X num_max_detections + self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"]) + self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) + toc = time.time() + logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/lvis_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/lvis_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..7d712ef262789edb85392cb54577c3a6b15e223e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/lvis_evaluation.py @@ -0,0 +1,380 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import itertools +import json +import logging +import os +import pickle +from collections import OrderedDict +import torch + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, pairwise_iou +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import create_small_table + +from .coco_evaluation import instances_to_coco_json +from .evaluator import DatasetEvaluator + + +class LVISEvaluator(DatasetEvaluator): + """ + Evaluate object proposal and instance detection/segmentation outputs using + LVIS's metrics and evaluation API. + """ + + def __init__( + self, + dataset_name, + tasks=None, + distributed=True, + output_dir=None, + *, + max_dets_per_image=None, + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have the following corresponding metadata: + "json_file": the path to the LVIS format annotation + tasks (tuple[str]): tasks that can be evaluated under the given + configuration. A task is one of "bbox", "segm". + By default, will infer this automatically from predictions. + distributed (True): if True, will collect results from all ranks for evaluation. + Otherwise, will evaluate the results in the current process. + output_dir (str): optional, an output directory to dump results. + max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP + This limit, by default of the LVIS dataset, is 300. + """ + from lvis import LVIS + + self._logger = logging.getLogger(__name__) + + if tasks is not None and isinstance(tasks, CfgNode): + self._logger.warn( + "COCO Evaluator instantiated using config, this is deprecated behavior." + " Please pass in explicit arguments instead." + ) + self._tasks = None # Infering it from predictions should be better + else: + self._tasks = tasks + + self._distributed = distributed + self._output_dir = output_dir + self._max_dets_per_image = max_dets_per_image + + self._cpu_device = torch.device("cpu") + + self._metadata = MetadataCatalog.get(dataset_name) + json_file = PathManager.get_local_path(self._metadata.json_file) + self._lvis_api = LVIS(json_file) + # Test set json files do not contain annotations (evaluation must be + # performed using the LVIS evaluation server). + self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0 + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a LVIS model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + self._predictions.append(prediction) + + def evaluate(self): + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "instances" in predictions[0]: + self._eval_predictions(predictions) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _tasks_from_predictions(self, predictions): + for pred in predictions: + if "segmentation" in pred: + return ("bbox", "segm") + return ("bbox",) + + def _eval_predictions(self, predictions): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + + Args: + predictions (list[dict]): list of outputs from the model + """ + self._logger.info("Preparing results in the LVIS format ...") + lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(lvis_results) + + # LVIS evaluator can be used to evaluate results for COCO dataset categories. + # In this case `_metadata` variable will have a field with COCO-specific category mapping. + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + for result in lvis_results: + result["category_id"] = reverse_id_mapping[result["category_id"]] + else: + # unmap the category ids for LVIS (from 0-indexed to 1-indexed) + for result in lvis_results: + result["category_id"] += 1 + + if self._output_dir: + file_path = os.path.join(self._output_dir, "lvis_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(lvis_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating predictions ...") + for task in sorted(tasks): + res = _evaluate_predictions_on_lvis( + self._lvis_api, + lvis_results, + task, + max_dets_per_image=self._max_dets_per_image, + class_names=self._metadata.get("thing_classes"), + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official LVIS API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0**2, 1e5**2], # all + [0**2, 32**2], # small + [32**2, 96**2], # medium + [96**2, 1e5**2], # large + [96**2, 128**2], # 96-128 + [128**2, 256**2], # 128-256 + [256**2, 512**2], # 256-512 + [512**2, 1e5**2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) + anno = lvis_api.load_anns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_lvis( + lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None +): + """ + Args: + iou_type (str): + max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP + This limit, by default of the LVIS dataset, is 300. + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], + }[iou_type] + + logger = logging.getLogger(__name__) + + if len(lvis_results) == 0: # TODO: check if needed + logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + if iou_type == "segm": + lvis_results = copy.deepcopy(lvis_results) + # When evaluating mask AP, if the results contain bbox, LVIS API will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in lvis_results: + c.pop("bbox", None) + + if max_dets_per_image is None: + max_dets_per_image = 300 # Default for LVIS dataset + + from lvis import LVISEval, LVISResults + + logger.info(f"Evaluating with max detections per image = {max_dets_per_image}") + lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) + lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) + lvis_eval.run() + lvis_eval.print_results() + + # Pull the standard metrics from the LVIS results + results = lvis_eval.get_results() + results = {metric: float(results[metric] * 100) for metric in metrics} + logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) + return results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/panoptic_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/panoptic_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..bf77fe061291f44381f8417e82e8b2bc7c5a60c6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/panoptic_evaluation.py @@ -0,0 +1,199 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import contextlib +import io +import itertools +import json +import logging +import numpy as np +import os +import tempfile +from collections import OrderedDict +from typing import Optional +from PIL import Image +from tabulate import tabulate + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .evaluator import DatasetEvaluator + +logger = logging.getLogger(__name__) + + +class COCOPanopticEvaluator(DatasetEvaluator): + """ + Evaluate Panoptic Quality metrics on COCO using PanopticAPI. + It saves panoptic segmentation prediction in `output_dir` + + It contains a synchronize call and has to be called from all workers. + """ + + def __init__(self, dataset_name: str, output_dir: Optional[str] = None): + """ + Args: + dataset_name: name of the dataset + output_dir: output directory to save results for evaluation. + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._thing_contiguous_id_to_dataset_id = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + self._stuff_contiguous_id_to_dataset_id = { + v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() + } + + self._output_dir = output_dir + if self._output_dir is not None: + PathManager.mkdirs(self._output_dir) + + def reset(self): + self._predictions = [] + + def _convert_category_id(self, segment_info): + isthing = segment_info.pop("isthing", None) + if isthing is None: + # the model produces panoptic category id directly. No more conversion needed + return segment_info + if isthing is True: + segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[ + segment_info["category_id"] + ] + else: + segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[ + segment_info["category_id"] + ] + return segment_info + + def process(self, inputs, outputs): + from panopticapi.utils import id2rgb + + for input, output in zip(inputs, outputs): + panoptic_img, segments_info = output["panoptic_seg"] + panoptic_img = panoptic_img.cpu().numpy() + if segments_info is None: + # If "segments_info" is None, we assume "panoptic_img" is a + # H*W int32 image storing the panoptic_id in the format of + # category_id * label_divisor + instance_id. We reserve -1 for + # VOID label, and add 1 to panoptic_img since the official + # evaluation script uses 0 for VOID label. + label_divisor = self._metadata.label_divisor + segments_info = [] + for panoptic_label in np.unique(panoptic_img): + if panoptic_label == -1: + # VOID region. + continue + pred_class = panoptic_label // label_divisor + isthing = ( + pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values() + ) + segments_info.append( + { + "id": int(panoptic_label) + 1, + "category_id": int(pred_class), + "isthing": bool(isthing), + } + ) + # Official evaluation script uses 0 for VOID label. + panoptic_img += 1 + + file_name = os.path.basename(input["file_name"]) + file_name_png = os.path.splitext(file_name)[0] + ".png" + with io.BytesIO() as out: + Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG") + segments_info = [self._convert_category_id(x) for x in segments_info] + self._predictions.append( + { + "image_id": input["image_id"], + "file_name": file_name_png, + "png_string": out.getvalue(), + "segments_info": segments_info, + } + ) + + def evaluate(self): + comm.synchronize() + + self._predictions = comm.gather(self._predictions) + self._predictions = list(itertools.chain(*self._predictions)) + if not comm.is_main_process(): + return + + # PanopticApi requires local files + gt_json = PathManager.get_local_path(self._metadata.panoptic_json) + gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) + + with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: + logger.info("Writing all panoptic predictions to {} ...".format(pred_dir)) + for p in self._predictions: + with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: + f.write(p.pop("png_string")) + + with open(gt_json, "r") as f: + json_data = json.load(f) + json_data["annotations"] = self._predictions + + output_dir = self._output_dir or pred_dir + predictions_json = os.path.join(output_dir, "predictions.json") + with PathManager.open(predictions_json, "w") as f: + f.write(json.dumps(json_data)) + + from panopticapi.evaluation import pq_compute + + with contextlib.redirect_stdout(io.StringIO()): + pq_res = pq_compute( + gt_json, + PathManager.get_local_path(predictions_json), + gt_folder=gt_folder, + pred_folder=pred_dir, + ) + + res = {} + res["PQ"] = 100 * pq_res["All"]["pq"] + res["SQ"] = 100 * pq_res["All"]["sq"] + res["RQ"] = 100 * pq_res["All"]["rq"] + res["PQ_th"] = 100 * pq_res["Things"]["pq"] + res["SQ_th"] = 100 * pq_res["Things"]["sq"] + res["RQ_th"] = 100 * pq_res["Things"]["rq"] + res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] + res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] + res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] + + results = OrderedDict({"panoptic_seg": res}) + _print_panoptic_results(pq_res) + + return results + + +def _print_panoptic_results(pq_res): + headers = ["", "PQ", "SQ", "RQ", "#categories"] + data = [] + for name in ["All", "Things", "Stuff"]: + row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]] + data.append(row) + table = tabulate( + data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center" + ) + logger.info("Panoptic Evaluation Results:\n" + table) + + +if __name__ == "__main__": + from annotator.oneformer.detectron2.utils.logger import setup_logger + + logger = setup_logger() + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--gt-json") + parser.add_argument("--gt-dir") + parser.add_argument("--pred-json") + parser.add_argument("--pred-dir") + args = parser.parse_args() + + from panopticapi.evaluation import pq_compute + + with contextlib.redirect_stdout(io.StringIO()): + pq_res = pq_compute( + args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir + ) + _print_panoptic_results(pq_res) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/pascal_voc_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/pascal_voc_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..b2963e5dc5b6ed471f0c37056b35a350ea4cf020 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/pascal_voc_evaluation.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +import numpy as np +import os +import tempfile +import xml.etree.ElementTree as ET +from collections import OrderedDict, defaultdict +from functools import lru_cache +import torch + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .evaluator import DatasetEvaluator + + +class PascalVOCDetectionEvaluator(DatasetEvaluator): + """ + Evaluate Pascal VOC style AP for Pascal VOC dataset. + It contains a synchronization, therefore has to be called from all ranks. + + Note that the concept of AP can be implemented in different ways and may not + produce identical results. This class mimics the implementation of the official + Pascal VOC Matlab API, and should produce similar but not identical results to the + official API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): name of the dataset, e.g., "voc_2007_test" + """ + self._dataset_name = dataset_name + meta = MetadataCatalog.get(dataset_name) + + # Too many tiny files, download all to local for speed. + annotation_dir_local = PathManager.get_local_path( + os.path.join(meta.dirname, "Annotations/") + ) + self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml") + self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") + self._class_names = meta.thing_classes + assert meta.year in [2007, 2012], meta.year + self._is_2007 = meta.year == 2007 + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._predictions = defaultdict(list) # class name -> list of prediction strings + + def process(self, inputs, outputs): + for input, output in zip(inputs, outputs): + image_id = input["image_id"] + instances = output["instances"].to(self._cpu_device) + boxes = instances.pred_boxes.tensor.numpy() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + for box, score, cls in zip(boxes, scores, classes): + xmin, ymin, xmax, ymax = box + # The inverse of data loading logic in `datasets/pascal_voc.py` + xmin += 1 + ymin += 1 + self._predictions[cls].append( + f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" + ) + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". + """ + all_predictions = comm.gather(self._predictions, dst=0) + if not comm.is_main_process(): + return + predictions = defaultdict(list) + for predictions_per_rank in all_predictions: + for clsid, lines in predictions_per_rank.items(): + predictions[clsid].extend(lines) + del all_predictions + + self._logger.info( + "Evaluating {} using {} metric. " + "Note that results do not use the official Matlab API.".format( + self._dataset_name, 2007 if self._is_2007 else 2012 + ) + ) + + with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: + res_file_template = os.path.join(dirname, "{}.txt") + + aps = defaultdict(list) # iou -> ap per class + for cls_id, cls_name in enumerate(self._class_names): + lines = predictions.get(cls_id, [""]) + + with open(res_file_template.format(cls_name), "w") as f: + f.write("\n".join(lines)) + + for thresh in range(50, 100, 5): + rec, prec, ap = voc_eval( + res_file_template, + self._anno_file_template, + self._image_set_path, + cls_name, + ovthresh=thresh / 100.0, + use_07_metric=self._is_2007, + ) + aps[thresh].append(ap * 100) + + ret = OrderedDict() + mAP = {iou: np.mean(x) for iou, x in aps.items()} + ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} + return ret + + +############################################################################## +# +# Below code is modified from +# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py +# -------------------------------------------------------- +# Fast/er R-CNN +# Licensed under The MIT License [see LICENSE for details] +# Written by Bharath Hariharan +# -------------------------------------------------------- + +"""Python implementation of the PASCAL VOC devkit's AP evaluation code.""" + + +@lru_cache(maxsize=None) +def parse_rec(filename): + """Parse a PASCAL VOC xml file.""" + with PathManager.open(filename) as f: + tree = ET.parse(f) + objects = [] + for obj in tree.findall("object"): + obj_struct = {} + obj_struct["name"] = obj.find("name").text + obj_struct["pose"] = obj.find("pose").text + obj_struct["truncated"] = int(obj.find("truncated").text) + obj_struct["difficult"] = int(obj.find("difficult").text) + bbox = obj.find("bndbox") + obj_struct["bbox"] = [ + int(bbox.find("xmin").text), + int(bbox.find("ymin").text), + int(bbox.find("xmax").text), + int(bbox.find("ymax").text), + ] + objects.append(obj_struct) + + return objects + + +def voc_ap(rec, prec, use_07_metric=False): + """Compute VOC AP given precision and recall. If use_07_metric is true, uses + the VOC 07 11-point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0.0 + for t in np.arange(0.0, 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11.0 + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], rec, [1.0])) + mpre = np.concatenate(([0.0], prec, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): + """rec, prec, ap = voc_eval(detpath, + annopath, + imagesetfile, + classname, + [ovthresh], + [use_07_metric]) + + Top level function that does the PASCAL VOC evaluation. + + detpath: Path to detections + detpath.format(classname) should produce the detection results file. + annopath: Path to annotations + annopath.format(imagename) should be the xml annotations file. + imagesetfile: Text file containing the list of images, one image per line. + classname: Category name (duh) + [ovthresh]: Overlap threshold (default = 0.5) + [use_07_metric]: Whether to use VOC07's 11 point AP computation + (default False) + """ + # assumes detections are in detpath.format(classname) + # assumes annotations are in annopath.format(imagename) + # assumes imagesetfile is a text file with each line an image name + + # first load gt + # read list of images + with PathManager.open(imagesetfile, "r") as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + + # load annots + recs = {} + for imagename in imagenames: + recs[imagename] = parse_rec(annopath.format(imagename)) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj["name"] == classname] + bbox = np.array([x["bbox"] for x in R]) + difficult = np.array([x["difficult"] for x in R]).astype(bool) + # difficult = np.array([False for x in R]).astype(bool) # treat all "difficult" as GT + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} + + # read dets + detfile = detpath.format(classname) + with open(detfile, "r") as f: + lines = f.readlines() + + splitlines = [x.strip().split(" ") for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R["bbox"].astype(float) + + if BBGT.size > 0: + # compute overlaps + # intersection + ixmin = np.maximum(BBGT[:, 0], bb[0]) + iymin = np.maximum(BBGT[:, 1], bb[1]) + ixmax = np.minimum(BBGT[:, 2], bb[2]) + iymax = np.minimum(BBGT[:, 3], bb[3]) + iw = np.maximum(ixmax - ixmin + 1.0, 0.0) + ih = np.maximum(iymax - iymin + 1.0, 0.0) + inters = iw * ih + + # union + uni = ( + (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) + + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) + - inters + ) + + overlaps = inters / uni + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + + if ovmax > ovthresh: + if not R["difficult"][jmax]: + if not R["det"][jmax]: + tp[d] = 1.0 + R["det"][jmax] = 1 + else: + fp[d] = 1.0 + else: + fp[d] = 1.0 + + # compute precision recall + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec, use_07_metric) + + return rec, prec, ap diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/rotated_coco_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/rotated_coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..0d5306c3a0601ed555c7bef20e0ac4ca64264442 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/rotated_coco_evaluation.py @@ -0,0 +1,207 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import json +import numpy as np +import os +import torch +from annotator.oneformer.pycocotools.cocoeval import COCOeval, maskUtils + +from annotator.oneformer.detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .coco_evaluation import COCOEvaluator + + +class RotatedCOCOeval(COCOeval): + @staticmethod + def is_rotated(box_list): + if type(box_list) == np.ndarray: + return box_list.shape[1] == 5 + elif type(box_list) == list: + if box_list == []: # cannot decide the box_dim + return False + return np.all( + np.array( + [ + (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray)) + for obj in box_list + ] + ) + ) + return False + + @staticmethod + def boxlist_to_tensor(boxlist, output_box_dim): + if type(boxlist) == np.ndarray: + box_tensor = torch.from_numpy(boxlist) + elif type(boxlist) == list: + if boxlist == []: + return torch.zeros((0, output_box_dim), dtype=torch.float32) + else: + box_tensor = torch.FloatTensor(boxlist) + else: + raise Exception("Unrecognized boxlist type") + + input_box_dim = box_tensor.shape[1] + if input_box_dim != output_box_dim: + if input_box_dim == 4 and output_box_dim == 5: + box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) + else: + raise Exception( + "Unable to convert from {}-dim box to {}-dim box".format( + input_box_dim, output_box_dim + ) + ) + return box_tensor + + def compute_iou_dt_gt(self, dt, gt, is_crowd): + if self.is_rotated(dt) or self.is_rotated(gt): + # TODO: take is_crowd into consideration + assert all(c == 0 for c in is_crowd) + dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) + gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) + return pairwise_iou_rotated(dt, gt) + else: + # This is the same as the classical COCO evaluation + return maskUtils.iou(dt, gt, is_crowd) + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId, catId] + dt = self._dts[imgId, catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] + if len(gt) == 0 and len(dt) == 0: + return [] + inds = np.argsort([-d["score"] for d in dt], kind="mergesort") + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt = dt[0 : p.maxDets[-1]] + + assert p.iouType == "bbox", "unsupported iouType for iou computation" + + g = [g["bbox"] for g in gt] + d = [d["bbox"] for d in dt] + + # compute iou between each dt and gt region + iscrowd = [int(o["iscrowd"]) for o in gt] + + # Note: this function is copied from cocoeval.py in cocoapi + # and the major difference is here. + ious = self.compute_iou_dt_gt(d, g, iscrowd) + return ious + + +class RotatedCOCOEvaluator(COCOEvaluator): + """ + Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs, + with rotated boxes support. + Note: this uses IOU only and does not consider angle differences. + """ + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + + prediction["instances"] = self.instances_to_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + self._predictions.append(prediction) + + def instances_to_json(self, instances, img_id): + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + if boxes.shape[1] == 4: + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + + results.append(result) + return results + + def _eval_predictions(self, predictions, img_ids=None): # img_ids: unused + """ + Evaluate predictions on the given tasks. + Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + for result in coco_results: + result["category_id"] = reverse_id_mapping[result["category_id"]] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating predictions ...") + + assert self._tasks is None or set(self._tasks) == { + "bbox" + }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported" + coco_eval = ( + self._evaluate_predictions_on_coco(self._coco_api, coco_results) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + task = "bbox" + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _evaluate_predictions_on_coco(self, coco_gt, coco_results): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + coco_dt = coco_gt.loadRes(coco_results) + + # Only bbox is supported for now + coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox") + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/sem_seg_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/sem_seg_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..1c2f3f5a659bc270d313efb053908d9b1e942f44 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/sem_seg_evaluation.py @@ -0,0 +1,265 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import json +import logging +import numpy as np +import os +from collections import OrderedDict +from typing import Optional, Union +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from PIL import Image + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.utils.comm import all_gather, is_main_process, synchronize +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .evaluator import DatasetEvaluator + +_CV2_IMPORTED = True +try: + import cv2 # noqa +except ImportError: + # OpenCV is an optional dependency at the moment + _CV2_IMPORTED = False + + +def load_image_into_numpy_array( + filename: str, + copy: bool = False, + dtype: Optional[Union[np.dtype, str]] = None, +) -> np.ndarray: + with PathManager.open(filename, "rb") as f: + array = np.array(Image.open(f), copy=copy, dtype=dtype) + return array + + +class SemSegEvaluator(DatasetEvaluator): + """ + Evaluate semantic segmentation metrics. + """ + + def __init__( + self, + dataset_name, + distributed=True, + output_dir=None, + *, + sem_seg_loading_fn=load_image_into_numpy_array, + num_classes=None, + ignore_label=None, + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + distributed (bool): if True, will collect results from all ranks for evaluation. + Otherwise, will evaluate the results in the current process. + output_dir (str): an output directory to dump results. + sem_seg_loading_fn: function to read sem seg file and load into numpy array. + Default provided, but projects can customize. + num_classes, ignore_label: deprecated argument + """ + self._logger = logging.getLogger(__name__) + if num_classes is not None: + self._logger.warn( + "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata." + ) + if ignore_label is not None: + self._logger.warn( + "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata." + ) + self._dataset_name = dataset_name + self._distributed = distributed + self._output_dir = output_dir + + self._cpu_device = torch.device("cpu") + + self.input_file_to_gt_file = { + dataset_record["file_name"]: dataset_record["sem_seg_file_name"] + for dataset_record in DatasetCatalog.get(dataset_name) + } + + meta = MetadataCatalog.get(dataset_name) + # Dict that maps contiguous training ids to COCO category ids + try: + c2d = meta.stuff_dataset_id_to_contiguous_id + self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} + except AttributeError: + self._contiguous_id_to_dataset_id = None + self._class_names = meta.stuff_classes + self.sem_seg_loading_fn = sem_seg_loading_fn + self._num_classes = len(meta.stuff_classes) + if num_classes is not None: + assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}" + self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label + + # This is because cv2.erode did not work for int datatype. Only works for uint8. + self._compute_boundary_iou = True + if not _CV2_IMPORTED: + self._compute_boundary_iou = False + self._logger.warn( + """Boundary IoU calculation requires OpenCV. B-IoU metrics are + not going to be computed because OpenCV is not available to import.""" + ) + if self._num_classes >= np.iinfo(np.uint8).max: + self._compute_boundary_iou = False + self._logger.warn( + f"""SemSegEvaluator(num_classes) is more than supported value for Boundary IoU calculation! + B-IoU metrics are not going to be computed. Max allowed value (exclusive) + for num_classes for calculating Boundary IoU is {np.iinfo(np.uint8).max}. + The number of classes of dataset {self._dataset_name} is {self._num_classes}""" + ) + + def reset(self): + self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) + self._b_conf_matrix = np.zeros( + (self._num_classes + 1, self._num_classes + 1), dtype=np.int64 + ) + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a model. + It is a list of dicts. Each dict corresponds to an image and + contains keys like "height", "width", "file_name". + outputs: the outputs of a model. It is either list of semantic segmentation predictions + (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic + segmentation prediction in the same format. + """ + for input, output in zip(inputs, outputs): + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) + pred = np.array(output, dtype=np.int) + gt_filename = self.input_file_to_gt_file[input["file_name"]] + gt = self.sem_seg_loading_fn(gt_filename, dtype=np.int) + + gt[gt == self._ignore_label] = self._num_classes + + self._conf_matrix += np.bincount( + (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1), + minlength=self._conf_matrix.size, + ).reshape(self._conf_matrix.shape) + + if self._compute_boundary_iou: + b_gt = self._mask_to_boundary(gt.astype(np.uint8)) + b_pred = self._mask_to_boundary(pred.astype(np.uint8)) + + self._b_conf_matrix += np.bincount( + (self._num_classes + 1) * b_pred.reshape(-1) + b_gt.reshape(-1), + minlength=self._conf_matrix.size, + ).reshape(self._conf_matrix.shape) + + self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) + + def evaluate(self): + """ + Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): + + * Mean intersection-over-union averaged across classes (mIoU) + * Frequency Weighted IoU (fwIoU) + * Mean pixel accuracy averaged across classes (mACC) + * Pixel Accuracy (pACC) + """ + if self._distributed: + synchronize() + conf_matrix_list = all_gather(self._conf_matrix) + b_conf_matrix_list = all_gather(self._b_conf_matrix) + self._predictions = all_gather(self._predictions) + self._predictions = list(itertools.chain(*self._predictions)) + if not is_main_process(): + return + + self._conf_matrix = np.zeros_like(self._conf_matrix) + for conf_matrix in conf_matrix_list: + self._conf_matrix += conf_matrix + + self._b_conf_matrix = np.zeros_like(self._b_conf_matrix) + for b_conf_matrix in b_conf_matrix_list: + self._b_conf_matrix += b_conf_matrix + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(self._predictions)) + + acc = np.full(self._num_classes, np.nan, dtype=np.float) + iou = np.full(self._num_classes, np.nan, dtype=np.float) + tp = self._conf_matrix.diagonal()[:-1].astype(np.float) + pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) + class_weights = pos_gt / np.sum(pos_gt) + pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) + acc_valid = pos_gt > 0 + acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] + union = pos_gt + pos_pred - tp + iou_valid = np.logical_and(acc_valid, union > 0) + iou[iou_valid] = tp[iou_valid] / union[iou_valid] + macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) + miou = np.sum(iou[iou_valid]) / np.sum(iou_valid) + fiou = np.sum(iou[iou_valid] * class_weights[iou_valid]) + pacc = np.sum(tp) / np.sum(pos_gt) + + if self._compute_boundary_iou: + b_iou = np.full(self._num_classes, np.nan, dtype=np.float) + b_tp = self._b_conf_matrix.diagonal()[:-1].astype(np.float) + b_pos_gt = np.sum(self._b_conf_matrix[:-1, :-1], axis=0).astype(np.float) + b_pos_pred = np.sum(self._b_conf_matrix[:-1, :-1], axis=1).astype(np.float) + b_union = b_pos_gt + b_pos_pred - b_tp + b_iou_valid = b_union > 0 + b_iou[b_iou_valid] = b_tp[b_iou_valid] / b_union[b_iou_valid] + + res = {} + res["mIoU"] = 100 * miou + res["fwIoU"] = 100 * fiou + for i, name in enumerate(self._class_names): + res[f"IoU-{name}"] = 100 * iou[i] + if self._compute_boundary_iou: + res[f"BoundaryIoU-{name}"] = 100 * b_iou[i] + res[f"min(IoU, B-Iou)-{name}"] = 100 * min(iou[i], b_iou[i]) + res["mACC"] = 100 * macc + res["pACC"] = 100 * pacc + for i, name in enumerate(self._class_names): + res[f"ACC-{name}"] = 100 * acc[i] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(res, f) + results = OrderedDict({"sem_seg": res}) + self._logger.info(results) + return results + + def encode_json_sem_seg(self, sem_seg, input_file_name): + """ + Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. + See http://cocodataset.org/#format-results + """ + json_list = [] + for label in np.unique(sem_seg): + if self._contiguous_id_to_dataset_id is not None: + assert ( + label in self._contiguous_id_to_dataset_id + ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) + dataset_id = self._contiguous_id_to_dataset_id[label] + else: + dataset_id = int(label) + mask = (sem_seg == label).astype(np.uint8) + mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] + mask_rle["counts"] = mask_rle["counts"].decode("utf-8") + json_list.append( + {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} + ) + return json_list + + def _mask_to_boundary(self, mask: np.ndarray, dilation_ratio=0.02): + assert mask.ndim == 2, "mask_to_boundary expects a 2-dimensional image" + h, w = mask.shape + diag_len = np.sqrt(h**2 + w**2) + dilation = max(1, int(round(dilation_ratio * diag_len))) + kernel = np.ones((3, 3), dtype=np.uint8) + + padded_mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0) + eroded_mask_with_padding = cv2.erode(padded_mask, kernel, iterations=dilation) + eroded_mask = eroded_mask_with_padding[1:-1, 1:-1] + boundary = mask - eroded_mask + return boundary diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/testing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..9e5ae625bb0593fc20739dd3ea549157e4df4f3d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/evaluation/testing.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +import pprint +import sys +from collections.abc import Mapping + + +def print_csv_format(results): + """ + Print main metrics in a format similar to Detectron, + so that they are easy to copypaste into a spreadsheet. + + Args: + results (OrderedDict[dict]): task_name -> {metric -> score} + unordered dict can also be printed, but in arbitrary order + """ + assert isinstance(results, Mapping) or not len(results), results + logger = logging.getLogger(__name__) + for task, res in results.items(): + if isinstance(res, Mapping): + # Don't print "AP-category" metrics since they are usually not tracked. + important_res = [(k, v) for k, v in res.items() if "-" not in k] + logger.info("copypaste: Task: {}".format(task)) + logger.info("copypaste: " + ",".join([k[0] for k in important_res])) + logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) + else: + logger.info(f"copypaste: {task}={res}") + + +def verify_results(cfg, results): + """ + Args: + results (OrderedDict[dict]): task_name -> {metric -> score} + + Returns: + bool: whether the verification succeeds or not + """ + expected_results = cfg.TEST.EXPECTED_RESULTS + if not len(expected_results): + return True + + ok = True + for task, metric, expected, tolerance in expected_results: + actual = results[task].get(metric, None) + if actual is None: + ok = False + continue + if not np.isfinite(actual): + ok = False + continue + diff = abs(actual - expected) + if diff > tolerance: + ok = False + + logger = logging.getLogger(__name__) + if not ok: + logger.error("Result verification failed!") + logger.error("Expected Results: " + str(expected_results)) + logger.error("Actual Results: " + pprint.pformat(results)) + + sys.exit(1) + else: + logger.info("Results verification passed.") + return ok + + +def flatten_results_dict(results): + """ + Expand a hierarchical dict of scalars into a flat dict of scalars. + If results[k1][k2][k3] = v, the returned dict will have the entry + {"k1/k2/k3": v}. + + Args: + results (dict): + """ + r = {} + for k, v in results.items(): + if isinstance(v, Mapping): + v = flatten_results_dict(v) + for kk, vv in v.items(): + r[k + "/" + kk] = vv + else: + r[k] = v + return r diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c86ff62516f4e8e4b1a6c1f33f11192933cf3861 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/README.md @@ -0,0 +1,15 @@ + +This directory contains code to prepare a detectron2 model for deployment. +Currently it supports exporting a detectron2 model to TorchScript, ONNX, or (deprecated) Caffe2 format. + +Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. + + +### Acknowledgements + +Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools. + +Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who +help export Detectron2 models to TorchScript. + +Thanks to ONNX Converter team at Microsoft who help export Detectron2 models to ONNX. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a58758f64aae6071fa688be4400622ce6036efa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/__init__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +import warnings + +from .flatten import TracingAdapter +from .torchscript import dump_torchscript_IR, scripting_with_instances + +try: + from caffe2.proto import caffe2_pb2 as _tmp + from caffe2.python import core + + # caffe2 is optional +except ImportError: + pass +else: + from .api import * + + +# TODO: Update ONNX Opset version and run tests when a newer PyTorch is supported +STABLE_ONNX_OPSET_VERSION = 11 + + +def add_export_config(cfg): + warnings.warn( + "add_export_config has been deprecated and behaves as no-op function.", DeprecationWarning + ) + return cfg + + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/api.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/api.py new file mode 100644 index 0000000000000000000000000000000000000000..cf1a27a4806ca83d97f5cd8c27726ec29f4e7e50 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/api.py @@ -0,0 +1,230 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import os +import torch +from caffe2.proto import caffe2_pb2 +from torch import nn + +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .caffe2_inference import ProtobufDetectionModel +from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format +from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph + +__all__ = [ + "Caffe2Model", + "Caffe2Tracer", +] + + +class Caffe2Tracer: + """ + Make a detectron2 model traceable with Caffe2 operators. + This class creates a traceable version of a detectron2 model which: + + 1. Rewrite parts of the model using ops in Caffe2. Note that some ops do + not have GPU implementation in Caffe2. + 2. Remove post-processing and only produce raw layer outputs + + After making a traceable model, the class provide methods to export such a + model to different deployment formats. + Exported graph produced by this class take two input tensors: + + 1. (1, C, H, W) float "data" which is an image (usually in [0, 255]). + (H, W) often has to be padded to multiple of 32 (depend on the model + architecture). + 2. 1x3 float "im_info", each row of which is (height, width, 1.0). + Height and width are true image shapes before padding. + + The class currently only supports models using builtin meta architectures. + Batch inference is not supported, and contributions are welcome. + """ + + def __init__(self, cfg: CfgNode, model: nn.Module, inputs): + """ + Args: + cfg (CfgNode): a detectron2 config used to construct caffe2-compatible model. + model (nn.Module): An original pytorch model. Must be among a few official models + in detectron2 that can be converted to become caffe2-compatible automatically. + Weights have to be already loaded to this model. + inputs: sample inputs that the given model takes for inference. + Will be used to trace the model. For most models, random inputs with + no detected objects will not work as they lead to wrong traces. + """ + assert isinstance(cfg, CfgNode), cfg + assert isinstance(model, torch.nn.Module), type(model) + + # TODO make it support custom models, by passing in c2 model directly + C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE] + self.traceable_model = C2MetaArch(cfg, copy.deepcopy(model)) + self.inputs = inputs + self.traceable_inputs = self.traceable_model.get_caffe2_inputs(inputs) + + def export_caffe2(self): + """ + Export the model to Caffe2's protobuf format. + The returned object can be saved with its :meth:`.save_protobuf()` method. + The result can be loaded and executed using Caffe2 runtime. + + Returns: + :class:`Caffe2Model` + """ + from .caffe2_export import export_caffe2_detection_model + + predict_net, init_net = export_caffe2_detection_model( + self.traceable_model, self.traceable_inputs + ) + return Caffe2Model(predict_net, init_net) + + def export_onnx(self): + """ + Export the model to ONNX format. + Note that the exported model contains custom ops only available in caffe2, therefore it + cannot be directly executed by other runtime (such as onnxruntime or TensorRT). + Post-processing or transformation passes may be applied on the model to accommodate + different runtimes, but we currently do not provide support for them. + + Returns: + onnx.ModelProto: an onnx model. + """ + from .caffe2_export import export_onnx_model as export_onnx_model_impl + + return export_onnx_model_impl(self.traceable_model, (self.traceable_inputs,)) + + def export_torchscript(self): + """ + Export the model to a ``torch.jit.TracedModule`` by tracing. + The returned object can be saved to a file by ``.save()``. + + Returns: + torch.jit.TracedModule: a torch TracedModule + """ + logger = logging.getLogger(__name__) + logger.info("Tracing the model with torch.jit.trace ...") + with torch.no_grad(): + return torch.jit.trace(self.traceable_model, (self.traceable_inputs,)) + + +class Caffe2Model(nn.Module): + """ + A wrapper around the traced model in Caffe2's protobuf format. + The exported graph has different inputs/outputs from the original Pytorch + model, as explained in :class:`Caffe2Tracer`. This class wraps around the + exported graph to simulate the same interface as the original Pytorch model. + It also provides functions to save/load models in Caffe2's format.' + + Examples: + :: + c2_model = Caffe2Tracer(cfg, torch_model, inputs).export_caffe2() + inputs = [{"image": img_tensor_CHW}] + outputs = c2_model(inputs) + orig_outputs = torch_model(inputs) + """ + + def __init__(self, predict_net, init_net): + super().__init__() + self.eval() # always in eval mode + self._predict_net = predict_net + self._init_net = init_net + self._predictor = None + + __init__.__HIDE_SPHINX_DOC__ = True + + @property + def predict_net(self): + """ + caffe2.core.Net: the underlying caffe2 predict net + """ + return self._predict_net + + @property + def init_net(self): + """ + caffe2.core.Net: the underlying caffe2 init net + """ + return self._init_net + + def save_protobuf(self, output_dir): + """ + Save the model as caffe2's protobuf format. + It saves the following files: + + * "model.pb": definition of the graph. Can be visualized with + tools like `netron `_. + * "model_init.pb": model parameters + * "model.pbtxt": human-readable definition of the graph. Not + needed for deployment. + + Args: + output_dir (str): the output directory to save protobuf files. + """ + logger = logging.getLogger(__name__) + logger.info("Saving model to {} ...".format(output_dir)) + if not PathManager.exists(output_dir): + PathManager.mkdirs(output_dir) + + with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f: + f.write(self._predict_net.SerializeToString()) + with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f: + f.write(str(self._predict_net)) + with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f: + f.write(self._init_net.SerializeToString()) + + def save_graph(self, output_file, inputs=None): + """ + Save the graph as SVG format. + + Args: + output_file (str): a SVG file + inputs: optional inputs given to the model. + If given, the inputs will be used to run the graph to record + shape of every tensor. The shape information will be + saved together with the graph. + """ + from .caffe2_export import run_and_save_graph + + if inputs is None: + save_graph(self._predict_net, output_file, op_only=False) + else: + size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0) + device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii") + inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device) + inputs = [x.cpu().numpy() for x in inputs] + run_and_save_graph(self._predict_net, self._init_net, inputs, output_file) + + @staticmethod + def load_protobuf(dir): + """ + Args: + dir (str): a directory used to save Caffe2Model with + :meth:`save_protobuf`. + The files "model.pb" and "model_init.pb" are needed. + + Returns: + Caffe2Model: the caffe2 model loaded from this directory. + """ + predict_net = caffe2_pb2.NetDef() + with PathManager.open(os.path.join(dir, "model.pb"), "rb") as f: + predict_net.ParseFromString(f.read()) + + init_net = caffe2_pb2.NetDef() + with PathManager.open(os.path.join(dir, "model_init.pb"), "rb") as f: + init_net.ParseFromString(f.read()) + + return Caffe2Model(predict_net, init_net) + + def __call__(self, inputs): + """ + An interface that wraps around a Caffe2 model and mimics detectron2's models' + input/output format. See details about the format at :doc:`/tutorials/models`. + This is used to compare the outputs of caffe2 model with its original torch model. + + Due to the extra conversion between Pytorch/Caffe2, this method is not meant for + benchmark. Because of the conversion, this method also has dependency + on detectron2 in order to convert to detectron2's output format. + """ + if self._predictor is None: + self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net) + return self._predictor(inputs) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/c10.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/c10.py new file mode 100644 index 0000000000000000000000000000000000000000..fde3fb71189e6f1061e83b878bfdd16add7d8350 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/c10.py @@ -0,0 +1,557 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import math +from typing import Dict +import torch +import torch.nn.functional as F + +from annotator.oneformer.detectron2.layers import ShapeSpec, cat +from annotator.oneformer.detectron2.layers.roi_align_rotated import ROIAlignRotated +from annotator.oneformer.detectron2.modeling import poolers +from annotator.oneformer.detectron2.modeling.proposal_generator import rpn +from annotator.oneformer.detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, Keypoints, RotatedBoxes + +from .shared import alias, to_device + + +""" +This file contains caffe2-compatible implementation of several detectron2 components. +""" + + +class Caffe2Boxes(Boxes): + """ + Representing a list of detectron2.structures.Boxes from minibatch, each box + is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector + (batch index + 5 coordinates) for RotatedBoxes. + """ + + def __init__(self, tensor): + assert isinstance(tensor, torch.Tensor) + assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size() + # TODO: make tensor immutable when dim is Nx5 for Boxes, + # and Nx6 for RotatedBoxes? + self.tensor = tensor + + +# TODO clean up this class, maybe just extend Instances +class InstancesList(object): + """ + Tensor representation of a list of Instances object for a batch of images. + + When dealing with a batch of images with Caffe2 ops, a list of bboxes + (instances) are usually represented by single Tensor with size + (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is + for providing common functions to convert between these two representations. + """ + + def __init__(self, im_info, indices, extra_fields=None): + # [N, 3] -> (H, W, Scale) + self.im_info = im_info + # [N,] -> indice of batch to which the instance belongs + self.indices = indices + # [N, ...] + self.batch_extra_fields = extra_fields or {} + + self.image_size = self.im_info + + def get_fields(self): + """like `get_fields` in the Instances object, + but return each field in tensor representations""" + ret = {} + for k, v in self.batch_extra_fields.items(): + # if isinstance(v, torch.Tensor): + # tensor_rep = v + # elif isinstance(v, (Boxes, Keypoints)): + # tensor_rep = v.tensor + # else: + # raise ValueError("Can't find tensor representation for: {}".format()) + ret[k] = v + return ret + + def has(self, name): + return name in self.batch_extra_fields + + def set(self, name, value): + # len(tensor) is a bad practice that generates ONNX constants during tracing. + # Although not a problem for the `assert` statement below, torch ONNX exporter + # still raises a misleading warning as it does not this call comes from `assert` + if isinstance(value, Boxes): + data_len = value.tensor.shape[0] + elif isinstance(value, torch.Tensor): + data_len = value.shape[0] + else: + data_len = len(value) + if len(self.batch_extra_fields): + assert ( + len(self) == data_len + ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) + self.batch_extra_fields[name] = value + + def __getattr__(self, name): + if name not in self.batch_extra_fields: + raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) + return self.batch_extra_fields[name] + + def __len__(self): + return len(self.indices) + + def flatten(self): + ret = [] + for _, v in self.batch_extra_fields.items(): + if isinstance(v, (Boxes, Keypoints)): + ret.append(v.tensor) + else: + ret.append(v) + return ret + + @staticmethod + def to_d2_instances_list(instances_list): + """ + Convert InstancesList to List[Instances]. The input `instances_list` can + also be a List[Instances], in this case this method is a non-op. + """ + if not isinstance(instances_list, InstancesList): + assert all(isinstance(x, Instances) for x in instances_list) + return instances_list + + ret = [] + for i, info in enumerate(instances_list.im_info): + instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())])) + + ids = instances_list.indices == i + for k, v in instances_list.batch_extra_fields.items(): + if isinstance(v, torch.Tensor): + instances.set(k, v[ids]) + continue + elif isinstance(v, Boxes): + instances.set(k, v[ids, -4:]) + continue + + target_type, tensor_source = v + assert isinstance(tensor_source, torch.Tensor) + assert tensor_source.shape[0] == instances_list.indices.shape[0] + tensor_source = tensor_source[ids] + + if issubclass(target_type, Boxes): + instances.set(k, Boxes(tensor_source[:, -4:])) + elif issubclass(target_type, Keypoints): + instances.set(k, Keypoints(tensor_source)) + elif issubclass(target_type, torch.Tensor): + instances.set(k, tensor_source) + else: + raise ValueError("Can't handle targe type: {}".format(target_type)) + + ret.append(instances) + return ret + + +class Caffe2Compatible(object): + """ + A model can inherit this class to indicate that it can be traced and deployed with caffe2. + """ + + def _get_tensor_mode(self): + return self._tensor_mode + + def _set_tensor_mode(self, v): + self._tensor_mode = v + + tensor_mode = property(_get_tensor_mode, _set_tensor_mode) + """ + If true, the model expects C2-style tensor only inputs/outputs format. + """ + + +class Caffe2RPN(Caffe2Compatible, rpn.RPN): + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + ret = super(Caffe2Compatible, cls).from_config(cfg, input_shape) + assert tuple(cfg.MODEL.RPN.BBOX_REG_WEIGHTS) == (1.0, 1.0, 1.0, 1.0) or tuple( + cfg.MODEL.RPN.BBOX_REG_WEIGHTS + ) == (1.0, 1.0, 1.0, 1.0, 1.0) + return ret + + def _generate_proposals( + self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None + ): + assert isinstance(images, ImageList) + if self.tensor_mode: + im_info = images.image_sizes + else: + im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to( + images.tensor.device + ) + assert isinstance(im_info, torch.Tensor) + + rpn_rois_list = [] + rpn_roi_probs_list = [] + for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip( + objectness_logits_pred, + anchor_deltas_pred, + [b for (n, b) in self.anchor_generator.cell_anchors.named_buffers()], + self.anchor_generator.strides, + ): + scores = scores.detach() + bbox_deltas = bbox_deltas.detach() + + rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals( + scores, + bbox_deltas, + im_info, + cell_anchors_tensor, + spatial_scale=1.0 / feat_stride, + pre_nms_topN=self.pre_nms_topk[self.training], + post_nms_topN=self.post_nms_topk[self.training], + nms_thresh=self.nms_thresh, + min_size=self.min_box_size, + # correct_transform_coords=True, # deprecated argument + angle_bound_on=True, # Default + angle_bound_lo=-180, + angle_bound_hi=180, + clip_angle_thresh=1.0, # Default + legacy_plus_one=False, + ) + rpn_rois_list.append(rpn_rois) + rpn_roi_probs_list.append(rpn_roi_probs) + + # For FPN in D2, in RPN all proposals from different levels are concated + # together, ranked and picked by top post_nms_topk. Then in ROIPooler + # it calculates level_assignments and calls the RoIAlign from + # the corresponding level. + + if len(objectness_logits_pred) == 1: + rpn_rois = rpn_rois_list[0] + rpn_roi_probs = rpn_roi_probs_list[0] + else: + assert len(rpn_rois_list) == len(rpn_roi_probs_list) + rpn_post_nms_topN = self.post_nms_topk[self.training] + + device = rpn_rois_list[0].device + input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)] + + # TODO remove this after confirming rpn_max_level/rpn_min_level + # is not needed in CollectRpnProposals. + feature_strides = list(self.anchor_generator.strides) + rpn_min_level = int(math.log2(feature_strides[0])) + rpn_max_level = int(math.log2(feature_strides[-1])) + assert (rpn_max_level - rpn_min_level + 1) == len( + rpn_rois_list + ), "CollectRpnProposals requires continuous levels" + + rpn_rois = torch.ops._caffe2.CollectRpnProposals( + input_list, + # NOTE: in current implementation, rpn_max_level and rpn_min_level + # are not needed, only the subtraction of two matters and it + # can be infer from the number of inputs. Keep them now for + # consistency. + rpn_max_level=2 + len(rpn_rois_list) - 1, + rpn_min_level=2, + rpn_post_nms_topN=rpn_post_nms_topN, + ) + rpn_rois = to_device(rpn_rois, device) + rpn_roi_probs = [] + + proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode) + return proposals, {} + + def forward(self, images, features, gt_instances=None): + assert not self.training + features = [features[f] for f in self.in_features] + objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features) + return self._generate_proposals( + images, + objectness_logits_pred, + anchor_deltas_pred, + gt_instances, + ) + + @staticmethod + def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode): + proposals = InstancesList( + im_info=im_info, + indices=rpn_rois[:, 0], + extra_fields={ + "proposal_boxes": Caffe2Boxes(rpn_rois), + "objectness_logits": (torch.Tensor, rpn_roi_probs), + }, + ) + if not tensor_mode: + proposals = InstancesList.to_d2_instances_list(proposals) + else: + proposals = [proposals] + return proposals + + +class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler): + @staticmethod + def c2_preprocess(box_lists): + assert all(isinstance(x, Boxes) for x in box_lists) + if all(isinstance(x, Caffe2Boxes) for x in box_lists): + # input is pure-tensor based + assert len(box_lists) == 1 + pooler_fmt_boxes = box_lists[0].tensor + else: + pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists) + return pooler_fmt_boxes + + def forward(self, x, box_lists): + assert not self.training + + pooler_fmt_boxes = self.c2_preprocess(box_lists) + num_level_assignments = len(self.level_poolers) + + if num_level_assignments == 1: + if isinstance(self.level_poolers[0], ROIAlignRotated): + c2_roi_align = torch.ops._caffe2.RoIAlignRotated + aligned = True + else: + c2_roi_align = torch.ops._caffe2.RoIAlign + aligned = self.level_poolers[0].aligned + + x0 = x[0] + if x0.is_quantized: + x0 = x0.dequantize() + + out = c2_roi_align( + x0, + pooler_fmt_boxes, + order="NCHW", + spatial_scale=float(self.level_poolers[0].spatial_scale), + pooled_h=int(self.output_size[0]), + pooled_w=int(self.output_size[1]), + sampling_ratio=int(self.level_poolers[0].sampling_ratio), + aligned=aligned, + ) + return out + + device = pooler_fmt_boxes.device + assert ( + self.max_level - self.min_level + 1 == 4 + ), "Currently DistributeFpnProposals only support 4 levels" + fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( + to_device(pooler_fmt_boxes, "cpu"), + roi_canonical_scale=self.canonical_box_size, + roi_canonical_level=self.canonical_level, + roi_max_level=self.max_level, + roi_min_level=self.min_level, + legacy_plus_one=False, + ) + fpn_outputs = [to_device(x, device) for x in fpn_outputs] + + rois_fpn_list = fpn_outputs[:-1] + rois_idx_restore_int32 = fpn_outputs[-1] + + roi_feat_fpn_list = [] + for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers): + if isinstance(pooler, ROIAlignRotated): + c2_roi_align = torch.ops._caffe2.RoIAlignRotated + aligned = True + else: + c2_roi_align = torch.ops._caffe2.RoIAlign + aligned = bool(pooler.aligned) + + if x_level.is_quantized: + x_level = x_level.dequantize() + + roi_feat_fpn = c2_roi_align( + x_level, + roi_fpn, + order="NCHW", + spatial_scale=float(pooler.spatial_scale), + pooled_h=int(self.output_size[0]), + pooled_w=int(self.output_size[1]), + sampling_ratio=int(pooler.sampling_ratio), + aligned=aligned, + ) + roi_feat_fpn_list.append(roi_feat_fpn) + + roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0) + assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, ( + "Caffe2 export requires tracing with a model checkpoint + input that can produce valid" + " detections. But no detections were obtained with the given checkpoint and input!" + ) + roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32) + return roi_feat + + +class Caffe2FastRCNNOutputsInference: + def __init__(self, tensor_mode): + self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode + + def __call__(self, box_predictor, predictions, proposals): + """equivalent to FastRCNNOutputLayers.inference""" + num_classes = box_predictor.num_classes + score_thresh = box_predictor.test_score_thresh + nms_thresh = box_predictor.test_nms_thresh + topk_per_image = box_predictor.test_topk_per_image + is_rotated = len(box_predictor.box2box_transform.weights) == 5 + + if is_rotated: + box_dim = 5 + assert box_predictor.box2box_transform.weights[4] == 1, ( + "The weights for Rotated BBoxTransform in C2 have only 4 dimensions," + + " thus enforcing the angle weight to be 1 for now" + ) + box2box_transform_weights = box_predictor.box2box_transform.weights[:4] + else: + box_dim = 4 + box2box_transform_weights = box_predictor.box2box_transform.weights + + class_logits, box_regression = predictions + if num_classes + 1 == class_logits.shape[1]: + class_prob = F.softmax(class_logits, -1) + else: + assert num_classes == class_logits.shape[1] + class_prob = F.sigmoid(class_logits) + # BoxWithNMSLimit will infer num_classes from the shape of the class_prob + # So append a zero column as placeholder for the background class + class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1) + + assert box_regression.shape[1] % box_dim == 0 + cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1 + + input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1 + + proposal_boxes = proposals[0].proposal_boxes + if isinstance(proposal_boxes, Caffe2Boxes): + rois = Caffe2Boxes.cat([p.proposal_boxes for p in proposals]) + elif isinstance(proposal_boxes, RotatedBoxes): + rois = RotatedBoxes.cat([p.proposal_boxes for p in proposals]) + elif isinstance(proposal_boxes, Boxes): + rois = Boxes.cat([p.proposal_boxes for p in proposals]) + else: + raise NotImplementedError( + 'Expected proposals[0].proposal_boxes to be type "Boxes", ' + f"instead got {type(proposal_boxes)}" + ) + + device, dtype = rois.tensor.device, rois.tensor.dtype + if input_tensor_mode: + im_info = proposals[0].image_size + rois = rois.tensor + else: + im_info = torch.tensor( + [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]] + ) + batch_ids = cat( + [ + torch.full((b, 1), i, dtype=dtype, device=device) + for i, b in enumerate(len(p) for p in proposals) + ], + dim=0, + ) + rois = torch.cat([batch_ids, rois.tensor], dim=1) + + roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform( + to_device(rois, "cpu"), + to_device(box_regression, "cpu"), + to_device(im_info, "cpu"), + weights=box2box_transform_weights, + apply_scale=True, + rotated=is_rotated, + angle_bound_on=True, + angle_bound_lo=-180, + angle_bound_hi=180, + clip_angle_thresh=1.0, + legacy_plus_one=False, + ) + roi_pred_bbox = to_device(roi_pred_bbox, device) + roi_batch_splits = to_device(roi_batch_splits, device) + + nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( + to_device(class_prob, "cpu"), + to_device(roi_pred_bbox, "cpu"), + to_device(roi_batch_splits, "cpu"), + score_thresh=float(score_thresh), + nms=float(nms_thresh), + detections_per_im=int(topk_per_image), + soft_nms_enabled=False, + soft_nms_method="linear", + soft_nms_sigma=0.5, + soft_nms_min_score_thres=0.001, + rotated=is_rotated, + cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, + input_boxes_include_bg_cls=False, + output_classes_include_bg_cls=False, + legacy_plus_one=False, + ) + roi_score_nms = to_device(nms_outputs[0], device) + roi_bbox_nms = to_device(nms_outputs[1], device) + roi_class_nms = to_device(nms_outputs[2], device) + roi_batch_splits_nms = to_device(nms_outputs[3], device) + roi_keeps_nms = to_device(nms_outputs[4], device) + roi_keeps_size_nms = to_device(nms_outputs[5], device) + if not self.tensor_mode: + roi_class_nms = roi_class_nms.to(torch.int64) + + roi_batch_ids = cat( + [ + torch.full((b, 1), i, dtype=dtype, device=device) + for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms) + ], + dim=0, + ) + + roi_class_nms = alias(roi_class_nms, "class_nms") + roi_score_nms = alias(roi_score_nms, "score_nms") + roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms") + roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms") + roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms") + roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms") + + results = InstancesList( + im_info=im_info, + indices=roi_batch_ids[:, 0], + extra_fields={ + "pred_boxes": Caffe2Boxes(roi_bbox_nms), + "scores": roi_score_nms, + "pred_classes": roi_class_nms, + }, + ) + + if not self.tensor_mode: + results = InstancesList.to_d2_instances_list(results) + batch_splits = roi_batch_splits_nms.int().tolist() + kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits)) + else: + results = [results] + kept_indices = [roi_keeps_nms] + + return results, kept_indices + + +class Caffe2MaskRCNNInference: + def __call__(self, pred_mask_logits, pred_instances): + """equivalent to mask_head.mask_rcnn_inference""" + if all(isinstance(x, InstancesList) for x in pred_instances): + assert len(pred_instances) == 1 + mask_probs_pred = pred_mask_logits.sigmoid() + mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs") + pred_instances[0].set("pred_masks", mask_probs_pred) + else: + mask_rcnn_inference(pred_mask_logits, pred_instances) + + +class Caffe2KeypointRCNNInference: + def __init__(self, use_heatmap_max_keypoint): + self.use_heatmap_max_keypoint = use_heatmap_max_keypoint + + def __call__(self, pred_keypoint_logits, pred_instances): + # just return the keypoint heatmap for now, + # there will be option to call HeatmapMaxKeypointOp + output = alias(pred_keypoint_logits, "kps_score") + if all(isinstance(x, InstancesList) for x in pred_instances): + assert len(pred_instances) == 1 + if self.use_heatmap_max_keypoint: + device = output.device + output = torch.ops._caffe2.HeatmapMaxKeypoint( + to_device(output, "cpu"), + pred_instances[0].pred_boxes.tensor, + should_output_softmax=True, # worth make it configerable? + ) + output = to_device(output, device) + output = alias(output, "keypoints_out") + pred_instances[0].set("pred_keypoints", output) + return pred_keypoint_logits diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_export.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_export.py new file mode 100644 index 0000000000000000000000000000000000000000..d609c27c7deb396352967dbcbc79b1e00f2a2de1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_export.py @@ -0,0 +1,203 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import copy +import io +import logging +import numpy as np +from typing import List +import onnx +import onnx.optimizer +import torch +from caffe2.proto import caffe2_pb2 +from caffe2.python import core +from caffe2.python.onnx.backend import Caffe2Backend +from tabulate import tabulate +from termcolor import colored +from torch.onnx import OperatorExportTypes + +from .shared import ( + ScopedWS, + construct_init_net_from_params, + fuse_alias_placeholder, + fuse_copy_between_cpu_and_gpu, + get_params_from_init_net, + group_norm_replace_aten_with_caffe2, + infer_device_type, + remove_dead_end_ops, + remove_reshape_for_fc, + save_graph, +) + +logger = logging.getLogger(__name__) + + +def export_onnx_model(model, inputs): + """ + Trace and export a model to onnx format. + + Args: + model (nn.Module): + inputs (tuple[args]): the model will be called by `model(*inputs)` + + Returns: + an onnx model + """ + assert isinstance(model, torch.nn.Module) + + # make sure all modules are in eval mode, onnx may change the training state + # of the module if the states are not consistent + def _check_eval(module): + assert not module.training + + model.apply(_check_eval) + + # Export the model to ONNX + with torch.no_grad(): + with io.BytesIO() as f: + torch.onnx.export( + model, + inputs, + f, + operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, + # verbose=True, # NOTE: uncomment this for debugging + # export_params=True, + ) + onnx_model = onnx.load_from_string(f.getvalue()) + + return onnx_model + + +def _op_stats(net_def): + type_count = {} + for t in [op.type for op in net_def.op]: + type_count[t] = type_count.get(t, 0) + 1 + type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet + type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count + return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list) + + +def _assign_device_option( + predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor] +): + """ + ONNX exported network doesn't have concept of device, assign necessary + device option for each op in order to make it runable on GPU runtime. + """ + + def _get_device_type(torch_tensor): + assert torch_tensor.device.type in ["cpu", "cuda"] + assert torch_tensor.device.index == 0 + return torch_tensor.device.type + + def _assign_op_device_option(net_proto, net_ssa, blob_device_types): + for op, ssa_i in zip(net_proto.op, net_ssa): + if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]: + op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) + else: + devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]] + assert all(d == devices[0] for d in devices) + if devices[0] == "cuda": + op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) + + # update ops in predict_net + predict_net_input_device_types = { + (name, 0): _get_device_type(tensor) + for name, tensor in zip(predict_net.external_input, tensor_inputs) + } + predict_net_device_types = infer_device_type( + predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch" + ) + predict_net_ssa, _ = core.get_ssa(predict_net) + _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types) + + # update ops in init_net + init_net_ssa, versions = core.get_ssa(init_net) + init_net_output_device_types = { + (name, versions[name]): predict_net_device_types[(name, 0)] + for name in init_net.external_output + } + init_net_device_types = infer_device_type( + init_net, known_status=init_net_output_device_types, device_name_style="pytorch" + ) + _assign_op_device_option(init_net, init_net_ssa, init_net_device_types) + + +def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]): + """ + Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX. + + Arg: + model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py + tensor_inputs: a list of tensors that caffe2 model takes as input. + """ + model = copy.deepcopy(model) + assert isinstance(model, torch.nn.Module) + assert hasattr(model, "encode_additional_info") + + # Export via ONNX + logger.info( + "Exporting a {} model via ONNX ...".format(type(model).__name__) + + " Some warnings from ONNX are expected and are usually not to worry about." + ) + onnx_model = export_onnx_model(model, (tensor_inputs,)) + # Convert ONNX model to Caffe2 protobuf + init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) + ops_table = [[op.type, op.input, op.output] for op in predict_net.op] + table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe") + logger.info( + "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan") + ) + + # Apply protobuf optimization + fuse_alias_placeholder(predict_net, init_net) + if any(t.device.type != "cpu" for t in tensor_inputs): + fuse_copy_between_cpu_and_gpu(predict_net) + remove_dead_end_ops(init_net) + _assign_device_option(predict_net, init_net, tensor_inputs) + params, device_options = get_params_from_init_net(init_net) + predict_net, params = remove_reshape_for_fc(predict_net, params) + init_net = construct_init_net_from_params(params, device_options) + group_norm_replace_aten_with_caffe2(predict_net) + + # Record necessary information for running the pb model in Detectron2 system. + model.encode_additional_info(predict_net, init_net) + + logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net))) + logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net))) + + return predict_net, init_net + + +def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path): + """ + Run the caffe2 model on given inputs, recording the shape and draw the graph. + + predict_net/init_net: caffe2 model. + tensor_inputs: a list of tensors that caffe2 model takes as input. + graph_save_path: path for saving graph of exported model. + """ + + logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path)) + save_graph(predict_net, graph_save_path, op_only=False) + + # Run the exported Caffe2 net + logger.info("Running ONNX exported model ...") + with ScopedWS("__ws_tmp__", True) as ws: + ws.RunNetOnce(init_net) + initialized_blobs = set(ws.Blobs()) + uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs] + for name, blob in zip(uninitialized, tensor_inputs): + ws.FeedBlob(name, blob) + + try: + ws.RunNetOnce(predict_net) + except RuntimeError as e: + logger.warning("Encountered RuntimeError: \n{}".format(str(e))) + + ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()} + blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)} + + logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path)) + save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes) + + return ws_blobs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_inference.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..deb886c0417285ed1d5ad85eb941fa1ac757cdab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_inference.py @@ -0,0 +1,161 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +import numpy as np +from itertools import count +import torch +from caffe2.proto import caffe2_pb2 +from caffe2.python import core + +from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format +from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type + +logger = logging.getLogger(__name__) + + +# ===== ref: mobile-vision predictor's 'Caffe2Wrapper' class ====== +class ProtobufModel(torch.nn.Module): + """ + Wrapper of a caffe2's protobuf model. + It works just like nn.Module, but running caffe2 under the hood. + Input/Output are tuple[tensor] that match the caffe2 net's external_input/output. + """ + + _ids = count(0) + + def __init__(self, predict_net, init_net): + logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...") + super().__init__() + assert isinstance(predict_net, caffe2_pb2.NetDef) + assert isinstance(init_net, caffe2_pb2.NetDef) + # create unique temporary workspace for each instance + self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids)) + self.net = core.Net(predict_net) + + logger.info("Running init_net once to fill the parameters ...") + with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws: + ws.RunNetOnce(init_net) + uninitialized_external_input = [] + for blob in self.net.Proto().external_input: + if blob not in ws.Blobs(): + uninitialized_external_input.append(blob) + ws.CreateBlob(blob) + ws.CreateNet(self.net) + + self._error_msgs = set() + self._input_blobs = uninitialized_external_input + + def _infer_output_devices(self, inputs): + """ + Returns: + list[str]: list of device for each external output + """ + + def _get_device_type(torch_tensor): + assert torch_tensor.device.type in ["cpu", "cuda"] + assert torch_tensor.device.index == 0 + return torch_tensor.device.type + + predict_net = self.net.Proto() + input_device_types = { + (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs) + } + device_type_map = infer_device_type( + predict_net, known_status=input_device_types, device_name_style="pytorch" + ) + ssa, versions = core.get_ssa(predict_net) + versioned_outputs = [(name, versions[name]) for name in predict_net.external_output] + output_devices = [device_type_map[outp] for outp in versioned_outputs] + return output_devices + + def forward(self, inputs): + """ + Args: + inputs (tuple[torch.Tensor]) + + Returns: + tuple[torch.Tensor] + """ + assert len(inputs) == len(self._input_blobs), ( + f"Length of inputs ({len(inputs)}) " + f"doesn't match the required input blobs: {self._input_blobs}" + ) + + with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws: + for b, tensor in zip(self._input_blobs, inputs): + ws.FeedBlob(b, tensor) + + try: + ws.RunNet(self.net.Proto().name) + except RuntimeError as e: + if not str(e) in self._error_msgs: + self._error_msgs.add(str(e)) + logger.warning("Encountered new RuntimeError: \n{}".format(str(e))) + logger.warning("Catch the error and use partial results.") + + c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output] + # Remove outputs of current run, this is necessary in order to + # prevent fetching the result from previous run if the model fails + # in the middle. + for b in self.net.Proto().external_output: + # Needs to create uninitialized blob to make the net runable. + # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b), + # but there'no such API. + ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).") + + # Cast output to torch.Tensor on the desired device + output_devices = ( + self._infer_output_devices(inputs) + if any(t.device.type != "cpu" for t in inputs) + else ["cpu" for _ in self.net.Proto().external_output] + ) + + outputs = [] + for name, c2_output, device in zip( + self.net.Proto().external_output, c2_outputs, output_devices + ): + if not isinstance(c2_output, np.ndarray): + raise RuntimeError( + "Invalid output for blob {}, received: {}".format(name, c2_output) + ) + outputs.append(torch.tensor(c2_output).to(device=device)) + return tuple(outputs) + + +class ProtobufDetectionModel(torch.nn.Module): + """ + A class works just like a pytorch meta arch in terms of inference, but running + caffe2 model under the hood. + """ + + def __init__(self, predict_net, init_net, *, convert_outputs=None): + """ + Args: + predict_net, init_net (core.Net): caffe2 nets + convert_outptus (callable): a function that converts caffe2 + outputs to the same format of the original pytorch model. + By default, use the one defined in the caffe2 meta_arch. + """ + super().__init__() + self.protobuf_model = ProtobufModel(predict_net, init_net) + self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0) + self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii") + + if convert_outputs is None: + meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN") + meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")] + self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net) + else: + self._convert_outputs = convert_outputs + + def _convert_inputs(self, batched_inputs): + # currently all models convert inputs in the same way + return convert_batched_inputs_to_c2_format( + batched_inputs, self.size_divisibility, self.device + ) + + def forward(self, batched_inputs): + c2_inputs = self._convert_inputs(batched_inputs) + c2_results = self.protobuf_model(c2_inputs) + c2_results = dict(zip(self.protobuf_model.net.Proto().external_output, c2_results)) + return self._convert_outputs(batched_inputs, c2_inputs, c2_results) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_modeling.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..e0128e4672bc08eb2983d3d382614c6381baefd9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_modeling.py @@ -0,0 +1,419 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import functools +import io +import struct +import types +import torch + +from annotator.oneformer.detectron2.modeling import meta_arch +from annotator.oneformer.detectron2.modeling.box_regression import Box2BoxTransform +from annotator.oneformer.detectron2.modeling.roi_heads import keypoint_head +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes + +from .c10 import Caffe2Compatible +from .caffe2_patch import ROIHeadsPatcher, patch_generalized_rcnn +from .shared import ( + alias, + check_set_pb_arg, + get_pb_arg_floats, + get_pb_arg_valf, + get_pb_arg_vali, + get_pb_arg_vals, + mock_torch_nn_functional_interpolate, +) + + +def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): + """ + A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) + to detectron2's format (i.e. list of Instances instance). + This only works when the model follows the Caffe2 detectron's naming convention. + + Args: + image_sizes (List[List[int, int]]): [H, W] of every image. + tensor_outputs (Dict[str, Tensor]): external_output to its tensor. + + force_mask_on (Bool): if true, the it make sure there'll be pred_masks even + if the mask is not found from tensor_outputs (usually due to model crash) + """ + + results = [Instances(image_size) for image_size in image_sizes] + + batch_splits = tensor_outputs.get("batch_splits", None) + if batch_splits: + raise NotImplementedError() + assert len(image_sizes) == 1 + result = results[0] + + bbox_nms = tensor_outputs["bbox_nms"] + score_nms = tensor_outputs["score_nms"] + class_nms = tensor_outputs["class_nms"] + # Detection will always success because Conv support 0-batch + assert bbox_nms is not None + assert score_nms is not None + assert class_nms is not None + if bbox_nms.shape[1] == 5: + result.pred_boxes = RotatedBoxes(bbox_nms) + else: + result.pred_boxes = Boxes(bbox_nms) + result.scores = score_nms + result.pred_classes = class_nms.to(torch.int64) + + mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) + if mask_fcn_probs is not None: + # finish the mask pred + mask_probs_pred = mask_fcn_probs + num_masks = mask_probs_pred.shape[0] + class_pred = result.pred_classes + indices = torch.arange(num_masks, device=class_pred.device) + mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] + result.pred_masks = mask_probs_pred + elif force_mask_on: + # NOTE: there's no way to know the height/width of mask here, it won't be + # used anyway when batch size is 0, so just set them to 0. + result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) + + keypoints_out = tensor_outputs.get("keypoints_out", None) + kps_score = tensor_outputs.get("kps_score", None) + if keypoints_out is not None: + # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) + keypoints_tensor = keypoints_out + # NOTE: it's possible that prob is not calculated if "should_output_softmax" + # is set to False in HeatmapMaxKeypoint, so just using raw score, seems + # it doesn't affect mAP. TODO: check more carefully. + keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] + result.pred_keypoints = keypoint_xyp + elif kps_score is not None: + # keypoint heatmap to sparse data structure + pred_keypoint_logits = kps_score + keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) + + return results + + +def _cast_to_f32(f64): + return struct.unpack("f", struct.pack("f", f64))[0] + + +def set_caffe2_compatible_tensor_mode(model, enable=True): + def _fn(m): + if isinstance(m, Caffe2Compatible): + m.tensor_mode = enable + + model.apply(_fn) + + +def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device): + """ + See get_caffe2_inputs() below. + """ + assert all(isinstance(x, dict) for x in batched_inputs) + assert all(x["image"].dim() == 3 for x in batched_inputs) + + images = [x["image"] for x in batched_inputs] + images = ImageList.from_tensors(images, size_divisibility) + + im_info = [] + for input_per_image, image_size in zip(batched_inputs, images.image_sizes): + target_height = input_per_image.get("height", image_size[0]) + target_width = input_per_image.get("width", image_size[1]) # noqa + # NOTE: The scale inside im_info is kept as convention and for providing + # post-processing information if further processing is needed. For + # current Caffe2 model definitions that don't include post-processing inside + # the model, this number is not used. + # NOTE: There can be a slight difference between width and height + # scales, using a single number can results in numerical difference + # compared with D2's post-processing. + scale = target_height / image_size[0] + im_info.append([image_size[0], image_size[1], scale]) + im_info = torch.Tensor(im_info) + + return images.tensor.to(device), im_info.to(device) + + +class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module): + """ + Base class for caffe2-compatible implementation of a meta architecture. + The forward is traceable and its traced graph can be converted to caffe2 + graph through ONNX. + """ + + def __init__(self, cfg, torch_model): + """ + Args: + cfg (CfgNode): + torch_model (nn.Module): the detectron2 model (meta_arch) to be + converted. + """ + super().__init__() + self._wrapped_model = torch_model + self.eval() + set_caffe2_compatible_tensor_mode(self, True) + + def get_caffe2_inputs(self, batched_inputs): + """ + Convert pytorch-style structured inputs to caffe2-style inputs that + are tuples of tensors. + + Args: + batched_inputs (list[dict]): inputs to a detectron2 model + in its standard format. Each dict has "image" (CHW tensor), and optionally + "height" and "width". + + Returns: + tuple[Tensor]: + tuple of tensors that will be the inputs to the + :meth:`forward` method. For existing models, the first + is an NCHW tensor (padded and batched); the second is + a im_info Nx3 tensor, where the rows are + (height, width, unused legacy parameter) + """ + return convert_batched_inputs_to_c2_format( + batched_inputs, + self._wrapped_model.backbone.size_divisibility, + self._wrapped_model.device, + ) + + def encode_additional_info(self, predict_net, init_net): + """ + Save extra metadata that will be used by inference in the output protobuf. + """ + pass + + def forward(self, inputs): + """ + Run the forward in caffe2-style. It has to use caffe2-compatible ops + and the method will be used for tracing. + + Args: + inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`. + They will be the inputs of the converted caffe2 graph. + + Returns: + tuple[Tensor]: output tensors. They will be the outputs of the + converted caffe2 graph. + """ + raise NotImplementedError + + def _caffe2_preprocess_image(self, inputs): + """ + Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward. + It normalizes the input images, and the final caffe2 graph assumes the + inputs have been batched already. + """ + data, im_info = inputs + data = alias(data, "data") + im_info = alias(im_info, "im_info") + mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std + normalized_data = (data - mean) / std + normalized_data = alias(normalized_data, "normalized_data") + + # Pack (data, im_info) into ImageList which is recognized by self.inference. + images = ImageList(tensor=normalized_data, image_sizes=im_info) + return images + + @staticmethod + def get_outputs_converter(predict_net, init_net): + """ + Creates a function that converts outputs of the caffe2 model to + detectron2's standard format. + The function uses information in `predict_net` and `init_net` that are + available at inferene time. Therefore the function logic can be used in inference. + + The returned function has the following signature: + + def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs + + Where + + * batched_inputs (list[dict]): the original input format of the meta arch + * c2_inputs (tuple[Tensor]): the caffe2 inputs. + * c2_results (dict[str, Tensor]): the caffe2 output format, + corresponding to the outputs of the :meth:`forward` function. + * detectron2_outputs: the original output format of the meta arch. + + This function can be used to compare the outputs of the original meta arch and + the converted caffe2 graph. + + Returns: + callable: a callable of the above signature. + """ + raise NotImplementedError + + +class Caffe2GeneralizedRCNN(Caffe2MetaArch): + def __init__(self, cfg, torch_model): + assert isinstance(torch_model, meta_arch.GeneralizedRCNN) + torch_model = patch_generalized_rcnn(torch_model) + super().__init__(cfg, torch_model) + + try: + use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT + except AttributeError: + use_heatmap_max_keypoint = False + self.roi_heads_patcher = ROIHeadsPatcher( + self._wrapped_model.roi_heads, use_heatmap_max_keypoint + ) + + def encode_additional_info(self, predict_net, init_net): + size_divisibility = self._wrapped_model.backbone.size_divisibility + check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) + check_set_pb_arg( + predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") + ) + check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN") + + @mock_torch_nn_functional_interpolate() + def forward(self, inputs): + if not self.tensor_mode: + return self._wrapped_model.inference(inputs) + images = self._caffe2_preprocess_image(inputs) + features = self._wrapped_model.backbone(images.tensor) + proposals, _ = self._wrapped_model.proposal_generator(images, features) + with self.roi_heads_patcher.mock_roi_heads(): + detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) + return tuple(detector_results[0].flatten()) + + @staticmethod + def get_outputs_converter(predict_net, init_net): + def f(batched_inputs, c2_inputs, c2_results): + _, im_info = c2_inputs + image_sizes = [[int(im[0]), int(im[1])] for im in im_info] + results = assemble_rcnn_outputs_by_name(image_sizes, c2_results) + return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) + + return f + + +class Caffe2RetinaNet(Caffe2MetaArch): + def __init__(self, cfg, torch_model): + assert isinstance(torch_model, meta_arch.RetinaNet) + super().__init__(cfg, torch_model) + + @mock_torch_nn_functional_interpolate() + def forward(self, inputs): + assert self.tensor_mode + images = self._caffe2_preprocess_image(inputs) + + # explicitly return the images sizes to avoid removing "im_info" by ONNX + # since it's not used in the forward path + return_tensors = [images.image_sizes] + + features = self._wrapped_model.backbone(images.tensor) + features = [features[f] for f in self._wrapped_model.head_in_features] + for i, feature_i in enumerate(features): + features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True) + return_tensors.append(features[i]) + + pred_logits, pred_anchor_deltas = self._wrapped_model.head(features) + for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)): + return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i))) + return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i))) + + return tuple(return_tensors) + + def encode_additional_info(self, predict_net, init_net): + size_divisibility = self._wrapped_model.backbone.size_divisibility + check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) + check_set_pb_arg( + predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") + ) + check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet") + + # Inference parameters: + check_set_pb_arg( + predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.test_score_thresh) + ) + check_set_pb_arg( + predict_net, "topk_candidates", "i", self._wrapped_model.test_topk_candidates + ) + check_set_pb_arg( + predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.test_nms_thresh) + ) + check_set_pb_arg( + predict_net, + "max_detections_per_image", + "i", + self._wrapped_model.max_detections_per_image, + ) + + check_set_pb_arg( + predict_net, + "bbox_reg_weights", + "floats", + [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights], + ) + self._encode_anchor_generator_cfg(predict_net) + + def _encode_anchor_generator_cfg(self, predict_net): + # serialize anchor_generator for future use + serialized_anchor_generator = io.BytesIO() + torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator) + # Ideally we can put anchor generating inside the model, then we don't + # need to store this information. + bytes = serialized_anchor_generator.getvalue() + check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes) + + @staticmethod + def get_outputs_converter(predict_net, init_net): + self = types.SimpleNamespace() + serialized_anchor_generator = io.BytesIO( + get_pb_arg_vals(predict_net, "serialized_anchor_generator", None) + ) + self.anchor_generator = torch.load(serialized_anchor_generator) + bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None) + self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights)) + self.test_score_thresh = get_pb_arg_valf(predict_net, "score_threshold", None) + self.test_topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None) + self.test_nms_thresh = get_pb_arg_valf(predict_net, "nms_threshold", None) + self.max_detections_per_image = get_pb_arg_vali( + predict_net, "max_detections_per_image", None + ) + + # hack to reuse inference code from RetinaNet + for meth in [ + "forward_inference", + "inference_single_image", + "_transpose_dense_predictions", + "_decode_multi_level_predictions", + "_decode_per_level_predictions", + ]: + setattr(self, meth, functools.partial(getattr(meta_arch.RetinaNet, meth), self)) + + def f(batched_inputs, c2_inputs, c2_results): + _, im_info = c2_inputs + image_sizes = [[int(im[0]), int(im[1])] for im in im_info] + dummy_images = ImageList( + torch.randn( + ( + len(im_info), + 3, + ) + + tuple(image_sizes[0]) + ), + image_sizes, + ) + + num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")]) + pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)] + pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)] + + # For each feature level, feature should have the same batch size and + # spatial dimension as the box_cls and box_delta. + dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits] + # self.num_classess can be inferred + self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4) + + results = self.forward_inference( + dummy_images, dummy_features, [pred_logits, pred_anchor_deltas] + ) + return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) + + return f + + +META_ARCH_CAFFE2_EXPORT_TYPE_MAP = { + "GeneralizedRCNN": Caffe2GeneralizedRCNN, + "RetinaNet": Caffe2RetinaNet, +} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_patch.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_patch.py new file mode 100644 index 0000000000000000000000000000000000000000..9c197cac1e7d5f665b6cbda46268716b1222f217 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/caffe2_patch.py @@ -0,0 +1,152 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import contextlib +from unittest import mock +import torch + +from annotator.oneformer.detectron2.modeling import poolers +from annotator.oneformer.detectron2.modeling.proposal_generator import rpn +from annotator.oneformer.detectron2.modeling.roi_heads import keypoint_head, mask_head +from annotator.oneformer.detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers + +from .c10 import ( + Caffe2Compatible, + Caffe2FastRCNNOutputsInference, + Caffe2KeypointRCNNInference, + Caffe2MaskRCNNInference, + Caffe2ROIPooler, + Caffe2RPN, +) + + +class GenericMixin(object): + pass + + +class Caffe2CompatibleConverter(object): + """ + A GenericUpdater which implements the `create_from` interface, by modifying + module object and assign it with another class replaceCls. + """ + + def __init__(self, replaceCls): + self.replaceCls = replaceCls + + def create_from(self, module): + # update module's class to the new class + assert isinstance(module, torch.nn.Module) + if issubclass(self.replaceCls, GenericMixin): + # replaceCls should act as mixin, create a new class on-the-fly + new_class = type( + "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), + (self.replaceCls, module.__class__), + {}, # {"new_method": lambda self: ...}, + ) + module.__class__ = new_class + else: + # replaceCls is complete class, this allow arbitrary class swap + module.__class__ = self.replaceCls + + # initialize Caffe2Compatible + if isinstance(module, Caffe2Compatible): + module.tensor_mode = False + + return module + + +def patch(model, target, updater, *args, **kwargs): + """ + recursively (post-order) update all modules with the target type and its + subclasses, make a initialization/composition/inheritance/... via the + updater.create_from. + """ + for name, module in model.named_children(): + model._modules[name] = patch(module, target, updater, *args, **kwargs) + if isinstance(model, target): + return updater.create_from(model, *args, **kwargs) + return model + + +def patch_generalized_rcnn(model): + ccc = Caffe2CompatibleConverter + model = patch(model, rpn.RPN, ccc(Caffe2RPN)) + model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) + + return model + + +@contextlib.contextmanager +def mock_fastrcnn_outputs_inference( + tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers +): + with mock.patch.object( + box_predictor_type, + "inference", + autospec=True, + side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +@contextlib.contextmanager +def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): + with mock.patch( + "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +@contextlib.contextmanager +def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): + with mock.patch( + "{}.keypoint_rcnn_inference".format(patched_module), + side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +class ROIHeadsPatcher: + def __init__(self, heads, use_heatmap_max_keypoint): + self.heads = heads + self.use_heatmap_max_keypoint = use_heatmap_max_keypoint + + @contextlib.contextmanager + def mock_roi_heads(self, tensor_mode=True): + """ + Patching several inference functions inside ROIHeads and its subclasses + + Args: + tensor_mode (bool): whether the inputs/outputs are caffe2's tensor + format or not. Default to True. + """ + # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` + # are called inside the same file as BaseXxxHead due to using mock.patch. + kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ + mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ + + mock_ctx_managers = [ + mock_fastrcnn_outputs_inference( + tensor_mode=tensor_mode, + check=True, + box_predictor_type=type(self.heads.box_predictor), + ) + ] + if getattr(self.heads, "keypoint_on", False): + mock_ctx_managers += [ + mock_keypoint_rcnn_inference( + tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint + ) + ] + if getattr(self.heads, "mask_on", False): + mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] + + with contextlib.ExitStack() as stack: # python 3.3+ + for mgr in mock_ctx_managers: + stack.enter_context(mgr) + yield diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/flatten.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/flatten.py new file mode 100644 index 0000000000000000000000000000000000000000..3fcb2bf49a0adad2798a10781a42accd9571218f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/flatten.py @@ -0,0 +1,330 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import collections +from dataclasses import dataclass +from typing import Callable, List, Optional, Tuple +import torch +from torch import nn + +from annotator.oneformer.detectron2.structures import Boxes, Instances, ROIMasks +from annotator.oneformer.detectron2.utils.registry import _convert_target_to_string, locate + +from .torchscript_patch import patch_builtin_len + + +@dataclass +class Schema: + """ + A Schema defines how to flatten a possibly hierarchical object into tuple of + primitive objects, so it can be used as inputs/outputs of PyTorch's tracing. + + PyTorch does not support tracing a function that produces rich output + structures (e.g. dict, Instances, Boxes). To trace such a function, we + flatten the rich object into tuple of tensors, and return this tuple of tensors + instead. Meanwhile, we also need to know how to "rebuild" the original object + from the flattened results, so we can evaluate the flattened results. + A Schema defines how to flatten an object, and while flattening it, it records + necessary schemas so that the object can be rebuilt using the flattened outputs. + + The flattened object and the schema object is returned by ``.flatten`` classmethod. + Then the original object can be rebuilt with the ``__call__`` method of schema. + + A Schema is a dataclass that can be serialized easily. + """ + + # inspired by FetchMapper in tensorflow/python/client/session.py + + @classmethod + def flatten(cls, obj): + raise NotImplementedError + + def __call__(self, values): + raise NotImplementedError + + @staticmethod + def _concat(values): + ret = () + sizes = [] + for v in values: + assert isinstance(v, tuple), "Flattened results must be a tuple" + ret = ret + v + sizes.append(len(v)) + return ret, sizes + + @staticmethod + def _split(values, sizes): + if len(sizes): + expected_len = sum(sizes) + assert ( + len(values) == expected_len + ), f"Values has length {len(values)} but expect length {expected_len}." + ret = [] + for k in range(len(sizes)): + begin, end = sum(sizes[:k]), sum(sizes[: k + 1]) + ret.append(values[begin:end]) + return ret + + +@dataclass +class ListSchema(Schema): + schemas: List[Schema] # the schemas that define how to flatten each element in the list + sizes: List[int] # the flattened length of each element + + def __call__(self, values): + values = self._split(values, self.sizes) + if len(values) != len(self.schemas): + raise ValueError( + f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!" + ) + values = [m(v) for m, v in zip(self.schemas, values)] + return list(values) + + @classmethod + def flatten(cls, obj): + res = [flatten_to_tuple(k) for k in obj] + values, sizes = cls._concat([k[0] for k in res]) + return values, cls([k[1] for k in res], sizes) + + +@dataclass +class TupleSchema(ListSchema): + def __call__(self, values): + return tuple(super().__call__(values)) + + +@dataclass +class IdentitySchema(Schema): + def __call__(self, values): + return values[0] + + @classmethod + def flatten(cls, obj): + return (obj,), cls() + + +@dataclass +class DictSchema(ListSchema): + keys: List[str] + + def __call__(self, values): + values = super().__call__(values) + return dict(zip(self.keys, values)) + + @classmethod + def flatten(cls, obj): + for k in obj.keys(): + if not isinstance(k, str): + raise KeyError("Only support flattening dictionaries if keys are str.") + keys = sorted(obj.keys()) + values = [obj[k] for k in keys] + ret, schema = ListSchema.flatten(values) + return ret, cls(schema.schemas, schema.sizes, keys) + + +@dataclass +class InstancesSchema(DictSchema): + def __call__(self, values): + image_size, fields = values[-1], values[:-1] + fields = super().__call__(fields) + return Instances(image_size, **fields) + + @classmethod + def flatten(cls, obj): + ret, schema = super().flatten(obj.get_fields()) + size = obj.image_size + if not isinstance(size, torch.Tensor): + size = torch.tensor(size) + return ret + (size,), schema + + +@dataclass +class TensorWrapSchema(Schema): + """ + For classes that are simple wrapper of tensors, e.g. + Boxes, RotatedBoxes, BitMasks + """ + + class_name: str + + def __call__(self, values): + return locate(self.class_name)(values[0]) + + @classmethod + def flatten(cls, obj): + return (obj.tensor,), cls(_convert_target_to_string(type(obj))) + + +# if more custom structures needed in the future, can allow +# passing in extra schemas for custom types +def flatten_to_tuple(obj): + """ + Flatten an object so it can be used for PyTorch tracing. + Also returns how to rebuild the original object from the flattened outputs. + + Returns: + res (tuple): the flattened results that can be used as tracing outputs + schema: an object with a ``__call__`` method such that ``schema(res) == obj``. + It is a pure dataclass that can be serialized. + """ + schemas = [ + ((str, bytes), IdentitySchema), + (list, ListSchema), + (tuple, TupleSchema), + (collections.abc.Mapping, DictSchema), + (Instances, InstancesSchema), + ((Boxes, ROIMasks), TensorWrapSchema), + ] + for klass, schema in schemas: + if isinstance(obj, klass): + F = schema + break + else: + F = IdentitySchema + + return F.flatten(obj) + + +class TracingAdapter(nn.Module): + """ + A model may take rich input/output format (e.g. dict or custom classes), + but `torch.jit.trace` requires tuple of tensors as input/output. + This adapter flattens input/output format of a model so it becomes traceable. + + It also records the necessary schema to rebuild model's inputs/outputs from flattened + inputs/outputs. + + Example: + :: + outputs = model(inputs) # inputs/outputs may be rich structure + adapter = TracingAdapter(model, inputs) + + # can now trace the model, with adapter.flattened_inputs, or another + # tuple of tensors with the same length and meaning + traced = torch.jit.trace(adapter, adapter.flattened_inputs) + + # traced model can only produce flattened outputs (tuple of tensors) + flattened_outputs = traced(*adapter.flattened_inputs) + # adapter knows the schema to convert it back (new_outputs == outputs) + new_outputs = adapter.outputs_schema(flattened_outputs) + """ + + flattened_inputs: Tuple[torch.Tensor] = None + """ + Flattened version of inputs given to this class's constructor. + """ + + inputs_schema: Schema = None + """ + Schema of the inputs given to this class's constructor. + """ + + outputs_schema: Schema = None + """ + Schema of the output produced by calling the given model with inputs. + """ + + def __init__( + self, + model: nn.Module, + inputs, + inference_func: Optional[Callable] = None, + allow_non_tensor: bool = False, + ): + """ + Args: + model: an nn.Module + inputs: An input argument or a tuple of input arguments used to call model. + After flattening, it has to only consist of tensors. + inference_func: a callable that takes (model, *inputs), calls the + model with inputs, and return outputs. By default it + is ``lambda model, *inputs: model(*inputs)``. Can be override + if you need to call the model differently. + allow_non_tensor: allow inputs/outputs to contain non-tensor objects. + This option will filter out non-tensor objects to make the + model traceable, but ``inputs_schema``/``outputs_schema`` cannot be + used anymore because inputs/outputs cannot be rebuilt from pure tensors. + This is useful when you're only interested in the single trace of + execution (e.g. for flop count), but not interested in + generalizing the traced graph to new inputs. + """ + super().__init__() + if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)): + model = model.module + self.model = model + if not isinstance(inputs, tuple): + inputs = (inputs,) + self.inputs = inputs + self.allow_non_tensor = allow_non_tensor + + if inference_func is None: + inference_func = lambda model, *inputs: model(*inputs) # noqa + self.inference_func = inference_func + + self.flattened_inputs, self.inputs_schema = flatten_to_tuple(inputs) + + if all(isinstance(x, torch.Tensor) for x in self.flattened_inputs): + return + if self.allow_non_tensor: + self.flattened_inputs = tuple( + [x for x in self.flattened_inputs if isinstance(x, torch.Tensor)] + ) + self.inputs_schema = None + else: + for input in self.flattened_inputs: + if not isinstance(input, torch.Tensor): + raise ValueError( + "Inputs for tracing must only contain tensors. " + f"Got a {type(input)} instead." + ) + + def forward(self, *args: torch.Tensor): + with torch.no_grad(), patch_builtin_len(): + if self.inputs_schema is not None: + inputs_orig_format = self.inputs_schema(args) + else: + if len(args) != len(self.flattened_inputs) or any( + x is not y for x, y in zip(args, self.flattened_inputs) + ): + raise ValueError( + "TracingAdapter does not contain valid inputs_schema." + " So it cannot generalize to other inputs and must be" + " traced with `.flattened_inputs`." + ) + inputs_orig_format = self.inputs + + outputs = self.inference_func(self.model, *inputs_orig_format) + flattened_outputs, schema = flatten_to_tuple(outputs) + + flattened_output_tensors = tuple( + [x for x in flattened_outputs if isinstance(x, torch.Tensor)] + ) + if len(flattened_output_tensors) < len(flattened_outputs): + if self.allow_non_tensor: + flattened_outputs = flattened_output_tensors + self.outputs_schema = None + else: + raise ValueError( + "Model cannot be traced because some model outputs " + "cannot flatten to tensors." + ) + else: # schema is valid + if self.outputs_schema is None: + self.outputs_schema = schema + else: + assert self.outputs_schema == schema, ( + "Model should always return outputs with the same " + "structure so it can be traced!" + ) + return flattened_outputs + + def _create_wrapper(self, traced_model): + """ + Return a function that has an input/output interface the same as the + original model, but it calls the given traced model under the hood. + """ + + def forward(*args): + flattened_inputs, _ = flatten_to_tuple(args) + flattened_outputs = traced_model(*flattened_inputs) + return self.outputs_schema(flattened_outputs) + + return forward diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/shared.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/shared.py new file mode 100644 index 0000000000000000000000000000000000000000..53ba9335e26819f9381115eba17bbbe3816b469c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/shared.py @@ -0,0 +1,1039 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import collections +import copy +import functools +import logging +import numpy as np +import os +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from unittest import mock +import caffe2.python.utils as putils +import torch +import torch.nn.functional as F +from caffe2.proto import caffe2_pb2 +from caffe2.python import core, net_drawer, workspace +from torch.nn.functional import interpolate as interp + +logger = logging.getLogger(__name__) + + +# ==== torch/utils_toffee/cast.py ======================================= + + +def to_device(t, device_str): + """ + This function is a replacement of .to(another_device) such that it allows the + casting to be traced properly by explicitly calling the underlying copy ops. + It also avoids introducing unncessary op when casting to the same device. + """ + src = t.device + dst = torch.device(device_str) + + if src == dst: + return t + elif src.type == "cuda" and dst.type == "cpu": + return torch.ops._caffe2.CopyGPUToCPU(t) + elif src.type == "cpu" and dst.type == "cuda": + return torch.ops._caffe2.CopyCPUToGPU(t) + else: + raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst)) + + +# ==== torch/utils_toffee/interpolate.py ======================================= + + +# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py +def BilinearInterpolation(tensor_in, up_scale): + assert up_scale % 2 == 0, "Scale should be even" + + def upsample_filt(size): + factor = (size + 1) // 2 + if size % 2 == 1: + center = factor - 1 + else: + center = factor - 0.5 + + og = np.ogrid[:size, :size] + return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) + + kernel_size = int(up_scale) * 2 + bil_filt = upsample_filt(kernel_size) + + dim = int(tensor_in.shape[1]) + kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32) + kernel[range(dim), range(dim), :, :] = bil_filt + + tensor_out = F.conv_transpose2d( + tensor_in, + weight=to_device(torch.Tensor(kernel), tensor_in.device), + bias=None, + stride=int(up_scale), + padding=int(up_scale / 2), + ) + + return tensor_out + + +# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if +# using dynamic `scale_factor` rather than static `size`. (T43166860) +# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly. +def onnx_compatibale_interpolate( + input, size=None, scale_factor=None, mode="nearest", align_corners=None +): + # NOTE: The input dimensions are interpreted in the form: + # `mini-batch x channels x [optional depth] x [optional height] x width`. + if size is None and scale_factor is not None: + if input.dim() == 4: + if isinstance(scale_factor, (int, float)): + height_scale, width_scale = (scale_factor, scale_factor) + else: + assert isinstance(scale_factor, (tuple, list)) + assert len(scale_factor) == 2 + height_scale, width_scale = scale_factor + + assert not align_corners, "No matching C2 op for align_corners == True" + if mode == "nearest": + return torch.ops._caffe2.ResizeNearest( + input, order="NCHW", width_scale=width_scale, height_scale=height_scale + ) + elif mode == "bilinear": + logger.warning( + "Use F.conv_transpose2d for bilinear interpolate" + " because there's no such C2 op, this may cause significant" + " slowdown and the boundary pixels won't be as same as" + " using F.interpolate due to padding." + ) + assert height_scale == width_scale + return BilinearInterpolation(input, up_scale=height_scale) + logger.warning("Output size is not static, it might cause ONNX conversion issue") + + return interp(input, size, scale_factor, mode, align_corners) + + +def mock_torch_nn_functional_interpolate(): + def decorator(func): + @functools.wraps(func) + def _mock_torch_nn_functional_interpolate(*args, **kwargs): + if torch.onnx.is_in_onnx_export(): + with mock.patch( + "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate + ): + return func(*args, **kwargs) + else: + return func(*args, **kwargs) + + return _mock_torch_nn_functional_interpolate + + return decorator + + +# ==== torch/utils_caffe2/ws_utils.py ========================================== + + +class ScopedWS(object): + def __init__(self, ws_name, is_reset, is_cleanup=False): + self.ws_name = ws_name + self.is_reset = is_reset + self.is_cleanup = is_cleanup + self.org_ws = "" + + def __enter__(self): + self.org_ws = workspace.CurrentWorkspace() + if self.ws_name is not None: + workspace.SwitchWorkspace(self.ws_name, True) + if self.is_reset: + workspace.ResetWorkspace() + + return workspace + + def __exit__(self, *args): + if self.is_cleanup: + workspace.ResetWorkspace() + if self.ws_name is not None: + workspace.SwitchWorkspace(self.org_ws) + + +def fetch_any_blob(name): + bb = None + try: + bb = workspace.FetchBlob(name) + except TypeError: + bb = workspace.FetchInt8Blob(name) + except Exception as e: + logger.error("Get blob {} error: {}".format(name, e)) + + return bb + + +# ==== torch/utils_caffe2/protobuf.py ========================================== + + +def get_pb_arg(pb, arg_name): + for x in pb.arg: + if x.name == arg_name: + return x + return None + + +def get_pb_arg_valf(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.f if arg is not None else default_val + + +def get_pb_arg_floats(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(map(float, arg.floats)) if arg is not None else default_val + + +def get_pb_arg_ints(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(map(int, arg.ints)) if arg is not None else default_val + + +def get_pb_arg_vali(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.i if arg is not None else default_val + + +def get_pb_arg_vals(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.s if arg is not None else default_val + + +def get_pb_arg_valstrings(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(arg.strings) if arg is not None else default_val + + +def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False): + arg = get_pb_arg(pb, arg_name) + if arg is None: + arg = putils.MakeArgument(arg_name, arg_value) + assert hasattr(arg, arg_attr) + pb.arg.extend([arg]) + if allow_override and getattr(arg, arg_attr) != arg_value: + logger.warning( + "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value) + ) + setattr(arg, arg_attr, arg_value) + else: + assert arg is not None + assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format( + getattr(arg, arg_attr), arg_value + ) + + +def _create_const_fill_op_from_numpy(name, tensor, device_option=None): + assert type(tensor) == np.ndarray + kTypeNameMapper = { + np.dtype("float32"): "GivenTensorFill", + np.dtype("int32"): "GivenTensorIntFill", + np.dtype("int64"): "GivenTensorInt64Fill", + np.dtype("uint8"): "GivenTensorStringFill", + } + + args_dict = {} + if tensor.dtype == np.dtype("uint8"): + args_dict.update({"values": [str(tensor.data)], "shape": [1]}) + else: + args_dict.update({"values": tensor, "shape": tensor.shape}) + + if device_option is not None: + args_dict["device_option"] = device_option + + return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict) + + +def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor): + assert type(int8_tensor) == workspace.Int8Tensor + kTypeNameMapper = { + np.dtype("int32"): "Int8GivenIntTensorFill", + np.dtype("uint8"): "Int8GivenTensorFill", + } + + tensor = int8_tensor.data + assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")] + values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor + + return core.CreateOperator( + kTypeNameMapper[tensor.dtype], + [], + [name], + values=values, + shape=tensor.shape, + Y_scale=int8_tensor.scale, + Y_zero_point=int8_tensor.zero_point, + ) + + +def create_const_fill_op( + name: str, + blob: Union[np.ndarray, workspace.Int8Tensor], + device_option: Optional[caffe2_pb2.DeviceOption] = None, +) -> caffe2_pb2.OperatorDef: + """ + Given a blob object, return the Caffe2 operator that creates this blob + as constant. Currently support NumPy tensor and Caffe2 Int8Tensor. + """ + + tensor_type = type(blob) + assert tensor_type in [ + np.ndarray, + workspace.Int8Tensor, + ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format( + name, type(blob) + ) + + if tensor_type == np.ndarray: + return _create_const_fill_op_from_numpy(name, blob, device_option) + elif tensor_type == workspace.Int8Tensor: + assert device_option is None + return _create_const_fill_op_from_c2_int8_tensor(name, blob) + + +def construct_init_net_from_params( + params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None +) -> caffe2_pb2.NetDef: + """ + Construct the init_net from params dictionary + """ + init_net = caffe2_pb2.NetDef() + device_options = device_options or {} + for name, blob in params.items(): + if isinstance(blob, str): + logger.warning( + ( + "Blob {} with type {} is not supported in generating init net," + " skipped.".format(name, type(blob)) + ) + ) + continue + init_net.op.extend( + [create_const_fill_op(name, blob, device_option=device_options.get(name, None))] + ) + init_net.external_output.append(name) + return init_net + + +def get_producer_map(ssa): + """ + Return dict from versioned blob to (i, j), + where i is index of producer op, j is the index of output of that op. + """ + producer_map = {} + for i in range(len(ssa)): + outputs = ssa[i][1] + for j, outp in enumerate(outputs): + producer_map[outp] = (i, j) + return producer_map + + +def get_consumer_map(ssa): + """ + Return dict from versioned blob to list of (i, j), + where i is index of consumer op, j is the index of input of that op. + """ + consumer_map = collections.defaultdict(list) + for i in range(len(ssa)): + inputs = ssa[i][0] + for j, inp in enumerate(inputs): + consumer_map[inp].append((i, j)) + return consumer_map + + +def get_params_from_init_net( + init_net: caffe2_pb2.NetDef, +) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]: + """ + Take the output blobs from init_net by running it. + Outputs: + params: dict from blob name to numpy array + device_options: dict from blob name to the device option of its creating op + """ + # NOTE: this assumes that the params is determined by producer op with the + # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor. + def _get_device_option(producer_op): + if producer_op.type == "CopyGPUToCPU": + return caffe2_pb2.DeviceOption() + else: + return producer_op.device_option + + with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws: + ws.RunNetOnce(init_net) + params = {b: fetch_any_blob(b) for b in init_net.external_output} + ssa, versions = core.get_ssa(init_net) + producer_map = get_producer_map(ssa) + device_options = { + b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]]) + for b in init_net.external_output + } + return params, device_options + + +def _updater_raise(op, input_types, output_types): + raise RuntimeError( + "Failed to apply updater for op {} given input_types {} and" + " output_types {}".format(op, input_types, output_types) + ) + + +def _generic_status_identifier( + predict_net: caffe2_pb2.NetDef, + status_updater: Callable, + known_status: Dict[Tuple[str, int], Any], +) -> Dict[Tuple[str, int], Any]: + """ + Statically infer the status of each blob, the status can be such as device type + (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here + is versioned blob (Tuple[str, int]) in the format compatible with ssa. + Inputs: + predict_net: the caffe2 network + status_updater: a callable, given an op and the status of its input/output, + it returns the updated status of input/output. `None` is used for + representing unknown status. + known_status: a dict containing known status, used as initialization. + Outputs: + A dict mapping from versioned blob to its status + """ + ssa, versions = core.get_ssa(predict_net) + versioned_ext_input = [(b, 0) for b in predict_net.external_input] + versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output] + all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa]) + + allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output) + assert all(k in allowed_vbs for k in known_status) + assert all(v is not None for v in known_status.values()) + _known_status = copy.deepcopy(known_status) + + def _check_and_update(key, value): + assert value is not None + if key in _known_status: + if not _known_status[key] == value: + raise RuntimeError( + "Confilict status for {}, existing status {}, new status {}".format( + key, _known_status[key], value + ) + ) + _known_status[key] = value + + def _update_i(op, ssa_i): + versioned_inputs = ssa_i[0] + versioned_outputs = ssa_i[1] + + inputs_status = [_known_status.get(b, None) for b in versioned_inputs] + outputs_status = [_known_status.get(b, None) for b in versioned_outputs] + + new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status) + + for versioned_blob, status in zip( + versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status + ): + if status is not None: + _check_and_update(versioned_blob, status) + + for op, ssa_i in zip(predict_net.op, ssa): + _update_i(op, ssa_i) + for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)): + _update_i(op, ssa_i) + + # NOTE: This strictly checks all the blob from predict_net must be assgined + # a known status. However sometimes it's impossible (eg. having deadend op), + # we may relax this constraint if + for k in all_versioned_blobs: + if k not in _known_status: + raise NotImplementedError( + "Can not infer the status for {}. Currently only support the case where" + " a single forward and backward pass can identify status for all blobs.".format(k) + ) + + return _known_status + + +def infer_device_type( + predict_net: caffe2_pb2.NetDef, + known_status: Dict[Tuple[str, int], Any], + device_name_style: str = "caffe2", +) -> Dict[Tuple[str, int], str]: + """Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob""" + + assert device_name_style in ["caffe2", "pytorch"] + _CPU_STR = "cpu" + _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda" + + def _copy_cpu_to_gpu_updater(op, input_types, output_types): + if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR: + _updater_raise(op, input_types, output_types) + return ([_CPU_STR], [_GPU_STR]) + + def _copy_gpu_to_cpu_updater(op, input_types, output_types): + if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR: + _updater_raise(op, input_types, output_types) + return ([_GPU_STR], [_CPU_STR]) + + def _other_ops_updater(op, input_types, output_types): + non_none_types = [x for x in input_types + output_types if x is not None] + if len(non_none_types) > 0: + the_type = non_none_types[0] + if not all(x == the_type for x in non_none_types): + _updater_raise(op, input_types, output_types) + else: + the_type = None + return ([the_type for _ in op.input], [the_type for _ in op.output]) + + def _device_updater(op, *args, **kwargs): + return { + "CopyCPUToGPU": _copy_cpu_to_gpu_updater, + "CopyGPUToCPU": _copy_gpu_to_cpu_updater, + }.get(op.type, _other_ops_updater)(op, *args, **kwargs) + + return _generic_status_identifier(predict_net, _device_updater, known_status) + + +# ==== torch/utils_caffe2/vis.py =============================================== + + +def _modify_blob_names(ops, blob_rename_f): + ret = [] + + def _replace_list(blob_list, replaced_list): + del blob_list[:] + blob_list.extend(replaced_list) + + for x in ops: + cur = copy.deepcopy(x) + _replace_list(cur.input, list(map(blob_rename_f, cur.input))) + _replace_list(cur.output, list(map(blob_rename_f, cur.output))) + ret.append(cur) + + return ret + + +def _rename_blob(name, blob_sizes, blob_ranges): + def _list_to_str(bsize): + ret = ", ".join([str(x) for x in bsize]) + ret = "[" + ret + "]" + return ret + + ret = name + if blob_sizes is not None and name in blob_sizes: + ret += "\n" + _list_to_str(blob_sizes[name]) + if blob_ranges is not None and name in blob_ranges: + ret += "\n" + _list_to_str(blob_ranges[name]) + + return ret + + +# graph_name could not contain word 'graph' +def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None): + blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges) + return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f) + + +def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None): + graph = None + ops = net.op + if blob_rename_func is not None: + ops = _modify_blob_names(ops, blob_rename_func) + if not op_only: + graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB") + else: + graph = net_drawer.GetPydotGraphMinimal( + ops, graph_name, rankdir="TB", minimal_dependency=True + ) + + try: + par_dir = os.path.dirname(file_name) + if not os.path.exists(par_dir): + os.makedirs(par_dir) + + format = os.path.splitext(os.path.basename(file_name))[-1] + if format == ".png": + graph.write_png(file_name) + elif format == ".pdf": + graph.write_pdf(file_name) + elif format == ".svg": + graph.write_svg(file_name) + else: + print("Incorrect format {}".format(format)) + except Exception as e: + print("Error when writing graph to image {}".format(e)) + + return graph + + +# ==== torch/utils_toffee/aten_to_caffe2.py ==================================== + + +def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef): + """ + For ONNX exported model, GroupNorm will be represented as ATen op, + this can be a drop in replacement from ATen to GroupNorm + """ + count = 0 + for op in predict_net.op: + if op.type == "ATen": + op_name = get_pb_arg_vals(op, "operator", None) # return byte in py3 + if op_name and op_name.decode() == "group_norm": + op.arg.remove(get_pb_arg(op, "operator")) + + if get_pb_arg_vali(op, "cudnn_enabled", None): + op.arg.remove(get_pb_arg(op, "cudnn_enabled")) + + num_groups = get_pb_arg_vali(op, "num_groups", None) + if num_groups is not None: + op.arg.remove(get_pb_arg(op, "num_groups")) + check_set_pb_arg(op, "group", "i", num_groups) + + op.type = "GroupNorm" + count += 1 + if count > 1: + logger.info("Replaced {} ATen operator to GroupNormOp".format(count)) + + +# ==== torch/utils_toffee/alias.py ============================================= + + +def alias(x, name, is_backward=False): + if not torch.onnx.is_in_onnx_export(): + return x + assert isinstance(x, torch.Tensor) + return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward) + + +def fuse_alias_placeholder(predict_net, init_net): + """Remove AliasWithName placeholder and rename the input/output of it""" + # First we finish all the re-naming + for i, op in enumerate(predict_net.op): + if op.type == "AliasWithName": + assert len(op.input) == 1 + assert len(op.output) == 1 + name = get_pb_arg_vals(op, "name", None).decode() + is_backward = bool(get_pb_arg_vali(op, "is_backward", 0)) + rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward) + rename_op_output(predict_net, i, 0, name) + + # Remove AliasWithName, should be very safe since it's a non-op + new_ops = [] + for op in predict_net.op: + if op.type != "AliasWithName": + new_ops.append(op) + else: + # safety check + assert op.input == op.output + assert op.input[0] == op.arg[0].s.decode() + del predict_net.op[:] + predict_net.op.extend(new_ops) + + +# ==== torch/utils_caffe2/graph_transform.py =================================== + + +class IllegalGraphTransformError(ValueError): + """When a graph transform function call can't be executed.""" + + +def _rename_versioned_blob_in_proto( + proto: caffe2_pb2.NetDef, + old_name: str, + new_name: str, + version: int, + ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]], + start_versions: Dict[str, int], + end_versions: Dict[str, int], +): + """In given proto, rename all blobs with matched version""" + # Operater list + for op, i_th_ssa in zip(proto.op, ssa): + versioned_inputs, versioned_outputs = i_th_ssa + for i in range(len(op.input)): + if versioned_inputs[i] == (old_name, version): + op.input[i] = new_name + for i in range(len(op.output)): + if versioned_outputs[i] == (old_name, version): + op.output[i] = new_name + # external_input + if start_versions.get(old_name, 0) == version: + for i in range(len(proto.external_input)): + if proto.external_input[i] == old_name: + proto.external_input[i] = new_name + # external_output + if end_versions.get(old_name, 0) == version: + for i in range(len(proto.external_output)): + if proto.external_output[i] == old_name: + proto.external_output[i] = new_name + + +def rename_op_input( + predict_net: caffe2_pb2.NetDef, + init_net: caffe2_pb2.NetDef, + op_id: int, + input_id: int, + new_name: str, + from_producer: bool = False, +): + """ + Rename the op_id-th operator in predict_net, change it's input_id-th input's + name to the new_name. It also does automatic re-route and change + external_input and init_net if necessary. + - It requires the input is only consumed by this op. + - This function modifies predict_net and init_net in-place. + - When from_producer is enable, this also updates other operators that consumes + the same input. Be cautious because may trigger unintended behavior. + """ + assert isinstance(predict_net, caffe2_pb2.NetDef) + assert isinstance(init_net, caffe2_pb2.NetDef) + + init_net_ssa, init_net_versions = core.get_ssa(init_net) + predict_net_ssa, predict_net_versions = core.get_ssa( + predict_net, copy.deepcopy(init_net_versions) + ) + + versioned_inputs, versioned_outputs = predict_net_ssa[op_id] + old_name, version = versioned_inputs[input_id] + + if from_producer: + producer_map = get_producer_map(predict_net_ssa) + if not (old_name, version) in producer_map: + raise NotImplementedError( + "Can't find producer, the input {} is probably from" + " init_net, this is not supported yet.".format(old_name) + ) + producer = producer_map[(old_name, version)] + rename_op_output(predict_net, producer[0], producer[1], new_name) + return + + def contain_targets(op_ssa): + return (old_name, version) in op_ssa[0] + + is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa] + if sum(is_consumer) > 1: + raise IllegalGraphTransformError( + ( + "Input '{}' of operator(#{}) are consumed by other ops, please use" + + " rename_op_output on the producer instead. Offending op: \n{}" + ).format(old_name, op_id, predict_net.op[op_id]) + ) + + # update init_net + _rename_versioned_blob_in_proto( + init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions + ) + # update predict_net + _rename_versioned_blob_in_proto( + predict_net, + old_name, + new_name, + version, + predict_net_ssa, + init_net_versions, + predict_net_versions, + ) + + +def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str): + """ + Rename the op_id-th operator in predict_net, change it's output_id-th input's + name to the new_name. It also does automatic re-route and change + external_output and if necessary. + - It allows multiple consumers of its output. + - This function modifies predict_net in-place, doesn't need init_net. + """ + assert isinstance(predict_net, caffe2_pb2.NetDef) + + ssa, blob_versions = core.get_ssa(predict_net) + + versioned_inputs, versioned_outputs = ssa[op_id] + old_name, version = versioned_outputs[output_id] + + # update predict_net + _rename_versioned_blob_in_proto( + predict_net, old_name, new_name, version, ssa, {}, blob_versions + ) + + +def get_sub_graph_external_input_output( + predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int] +) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]: + """ + Return the list of external input/output of sub-graph, + each element is tuple of the name and corresponding version in predict_net. + + external input/output is defined the same way as caffe2 NetDef. + """ + ssa, versions = core.get_ssa(predict_net) + + all_inputs = [] + all_outputs = [] + for op_id in sub_graph_op_indices: + all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs] + all_outputs += list(ssa[op_id][1]) # ssa output won't repeat + + # for versioned blobs, external inputs are just those blob in all_inputs + # but not in all_outputs + ext_inputs = [inp for inp in all_inputs if inp not in all_outputs] + + # external outputs are essentially outputs of this subgraph that are used + # outside of this sub-graph (including predict_net.external_output) + all_other_inputs = sum( + (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices), + [(outp, versions[outp]) for outp in predict_net.external_output], + ) + ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)] + + return ext_inputs, ext_outputs + + +class DiGraph: + """A DAG representation of caffe2 graph, each vertice is a versioned blob.""" + + def __init__(self): + self.vertices = set() + self.graph = collections.defaultdict(list) + + def add_edge(self, u, v): + self.graph[u].append(v) + self.vertices.add(u) + self.vertices.add(v) + + # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/ + def get_all_paths(self, s, d): + visited = {k: False for k in self.vertices} + path = [] + all_paths = [] + + def _get_all_paths_util(graph, u, d, visited, path): + visited[u] = True + path.append(u) + if u == d: + all_paths.append(copy.deepcopy(path)) + else: + for i in graph[u]: + if not visited[i]: + _get_all_paths_util(graph, i, d, visited, path) + path.pop() + visited[u] = False + + _get_all_paths_util(self.graph, s, d, visited, path) + return all_paths + + @staticmethod + def from_ssa(ssa): + graph = DiGraph() + for op_id in range(len(ssa)): + for inp in ssa[op_id][0]: + for outp in ssa[op_id][1]: + graph.add_edge(inp, outp) + return graph + + +def _get_dependency_chain(ssa, versioned_target, versioned_source): + """ + Return the index list of relevant operator to produce target blob from source blob, + if there's no dependency, return empty list. + """ + + # finding all paths between nodes can be O(N!), thus we can only search + # in the subgraph using the op starting from the first consumer of source blob + # to the producer of the target blob. + consumer_map = get_consumer_map(ssa) + producer_map = get_producer_map(ssa) + start_op = min(x[0] for x in consumer_map[versioned_source]) - 15 + end_op = ( + producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op + ) + sub_graph_ssa = ssa[start_op : end_op + 1] + if len(sub_graph_ssa) > 30: + logger.warning( + "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it" + " might take non-trival time to find all paths between them.".format( + versioned_source, versioned_target, start_op, end_op + ) + ) + + dag = DiGraph.from_ssa(sub_graph_ssa) + paths = dag.get_all_paths(versioned_source, versioned_target) # include two ends + ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths] + return sorted(set().union(*[set(ops) for ops in ops_in_paths])) + + +def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]: + """ + Idenfity the reshape sub-graph in a protobuf. + The reshape sub-graph is defined as matching the following pattern: + + (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐ + └-------------------------------------------> Reshape -> (output_blob) + + Return: + List of sub-graphs, each sub-graph is represented as a list of indices + of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape] + """ + + ssa, _ = core.get_ssa(predict_net) + + ret = [] + for i, op in enumerate(predict_net.op): + if op.type == "Reshape": + assert len(op.input) == 2 + input_ssa = ssa[i][0] + data_source = input_ssa[0] + shape_source = input_ssa[1] + op_indices = _get_dependency_chain(ssa, shape_source, data_source) + ret.append(op_indices + [i]) + return ret + + +def remove_reshape_for_fc(predict_net, params): + """ + In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape + a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping + doesn't work well with ONNX and Int8 tools, and cause using extra + ops (eg. ExpandDims) that might not be available on mobile. + Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape + after exporting ONNX model. + """ + from caffe2.python import core + + # find all reshape sub-graph that can be removed, which is now all Reshape + # sub-graph whose output is only consumed by FC. + # TODO: to make it safer, we may need the actually value to better determine + # if a Reshape before FC is removable. + reshape_sub_graphs = identify_reshape_sub_graph(predict_net) + sub_graphs_to_remove = [] + for reshape_sub_graph in reshape_sub_graphs: + reshape_op_id = reshape_sub_graph[-1] + assert predict_net.op[reshape_op_id].type == "Reshape" + ssa, _ = core.get_ssa(predict_net) + reshape_output = ssa[reshape_op_id][1][0] + consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]] + if all(predict_net.op[consumer].type == "FC" for consumer in consumers): + # safety check if the sub-graph is isolated, for this reshape sub-graph, + # it means it has one non-param external input and one external output. + ext_inputs, ext_outputs = get_sub_graph_external_input_output( + predict_net, reshape_sub_graph + ) + non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] + if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1: + sub_graphs_to_remove.append(reshape_sub_graph) + + # perform removing subgraph by: + # 1: rename the Reshape's output to its input, then the graph can be + # seen as in-place itentify, meaning whose external input/output are the same. + # 2: simply remove those ops. + remove_op_ids = [] + params_to_remove = [] + for sub_graph in sub_graphs_to_remove: + logger.info( + "Remove Reshape sub-graph:\n{}".format( + "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph]) + ) + ) + reshape_op_id = sub_graph[-1] + new_reshap_output = predict_net.op[reshape_op_id].input[0] + rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output) + ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph) + non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] + params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0] + assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1 + assert ext_outputs[0][0] == non_params_ext_inputs[0][0] + assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1 + remove_op_ids.extend(sub_graph) + params_to_remove.extend(params_ext_inputs) + + predict_net = copy.deepcopy(predict_net) + new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids] + del predict_net.op[:] + predict_net.op.extend(new_ops) + for versioned_params in params_to_remove: + name = versioned_params[0] + logger.info("Remove params: {} from init_net and predict_net.external_input".format(name)) + del params[name] + predict_net.external_input.remove(name) + + return predict_net, params + + +def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef): + """ + In-place fuse extra copy ops between cpu/gpu for the following case: + a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1 + -CopyBToA> c2 -NextOp2-> d2 + The fused network will look like: + a -NextOp1-> d1 + -NextOp2-> d2 + """ + + _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"] + + def _fuse_once(predict_net): + ssa, blob_versions = core.get_ssa(predict_net) + consumer_map = get_consumer_map(ssa) + versioned_external_output = [ + (name, blob_versions[name]) for name in predict_net.external_output + ] + + for op_id, op in enumerate(predict_net.op): + if op.type in _COPY_OPS: + fw_copy_versioned_output = ssa[op_id][1][0] + consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]] + reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)] + + is_fusable = ( + len(consumer_ids) > 0 + and fw_copy_versioned_output not in versioned_external_output + and all( + predict_net.op[_op_id].type == reverse_op_type + and ssa[_op_id][1][0] not in versioned_external_output + for _op_id in consumer_ids + ) + ) + + if is_fusable: + for rv_copy_op_id in consumer_ids: + # making each NextOp uses "a" directly and removing Copy ops + rs_copy_versioned_output = ssa[rv_copy_op_id][1][0] + next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0] + predict_net.op[next_op_id].input[inp_id] = op.input[0] + # remove CopyOps + new_ops = [ + op + for i, op in enumerate(predict_net.op) + if i != op_id and i not in consumer_ids + ] + del predict_net.op[:] + predict_net.op.extend(new_ops) + return True + + return False + + # _fuse_once returns False is nothing can be fused + while _fuse_once(predict_net): + pass + + +def remove_dead_end_ops(net_def: caffe2_pb2.NetDef): + """remove ops if its output is not used or not in external_output""" + ssa, versions = core.get_ssa(net_def) + versioned_external_output = [(name, versions[name]) for name in net_def.external_output] + consumer_map = get_consumer_map(ssa) + removed_op_ids = set() + + def _is_dead_end(versioned_blob): + return not ( + versioned_blob in versioned_external_output + or ( + len(consumer_map[versioned_blob]) > 0 + and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob]) + ) + ) + + for i, ssa_i in reversed(list(enumerate(ssa))): + versioned_outputs = ssa_i[1] + if all(_is_dead_end(outp) for outp in versioned_outputs): + removed_op_ids.add(i) + + # simply removing those deadend ops should have no effect to external_output + new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids] + del net_def.op[:] + net_def.op.extend(new_ops) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript.py new file mode 100644 index 0000000000000000000000000000000000000000..8ce1c81e1b7abb65415055ae0d1d4b83e1ae111d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript.py @@ -0,0 +1,132 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import os +import torch + +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .torchscript_patch import freeze_training_mode, patch_instances + +__all__ = ["scripting_with_instances", "dump_torchscript_IR"] + + +def scripting_with_instances(model, fields): + """ + Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since + attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult + for scripting to support it out of the box. This function is made to support scripting + a model that uses :class:`Instances`. It does the following: + + 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``, + but with all attributes been "static". + The attributes need to be statically declared in the ``fields`` argument. + 2. Register ``new_Instances``, and force scripting compiler to + use it when trying to compile ``Instances``. + + After this function, the process will be reverted. User should be able to script another model + using different fields. + + Example: + Assume that ``Instances`` in the model consist of two attributes named + ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and + :class:`Tensor` respectively during inference. You can call this function like: + :: + fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor} + torchscipt_model = scripting_with_instances(model, fields) + + Note: + It only support models in evaluation mode. + + Args: + model (nn.Module): The input model to be exported by scripting. + fields (Dict[str, type]): Attribute names and corresponding type that + ``Instances`` will use in the model. Note that all attributes used in ``Instances`` + need to be added, regardless of whether they are inputs/outputs of the model. + Data type not defined in detectron2 is not supported for now. + + Returns: + torch.jit.ScriptModule: the model in torchscript format + """ + assert ( + not model.training + ), "Currently we only support exporting models in evaluation mode to torchscript" + + with freeze_training_mode(model), patch_instances(fields): + scripted_model = torch.jit.script(model) + return scripted_model + + +# alias for old name +export_torchscript_with_instances = scripting_with_instances + + +def dump_torchscript_IR(model, dir): + """ + Dump IR of a TracedModule/ScriptModule/Function in various format (code, graph, + inlined graph). Useful for debugging. + + Args: + model (TracedModule/ScriptModule/ScriptFUnction): traced or scripted module + dir (str): output directory to dump files. + """ + dir = os.path.expanduser(dir) + PathManager.mkdirs(dir) + + def _get_script_mod(mod): + if isinstance(mod, torch.jit.TracedModule): + return mod._actual_script_module + return mod + + # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code + with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: + + def get_code(mod): + # Try a few ways to get code using private attributes. + try: + # This contains more information than just `mod.code` + return _get_script_mod(mod)._c.code + except AttributeError: + pass + try: + return mod.code + except AttributeError: + return None + + def dump_code(prefix, mod): + code = get_code(mod) + name = prefix or "root model" + if code is None: + f.write(f"Could not found code for {name} (type={mod.original_name})\n") + f.write("\n") + else: + f.write(f"\nCode for {name}, type={mod.original_name}:\n") + f.write(code) + f.write("\n") + f.write("-" * 80) + + for name, m in mod.named_children(): + dump_code(prefix + "." + name, m) + + if isinstance(model, torch.jit.ScriptFunction): + f.write(get_code(model)) + else: + dump_code("", model) + + def _get_graph(model): + try: + # Recursively dump IR of all modules + return _get_script_mod(model)._c.dump_to_str(True, False, False) + except AttributeError: + return model.graph.str() + + with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: + f.write(_get_graph(model)) + + # Dump IR of the entire graph (all submodules inlined) + with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: + f.write(str(model.inlined_graph)) + + if not isinstance(model, torch.jit.ScriptFunction): + # Dump the model structure in pytorch style + with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: + f.write(str(model)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript_patch.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript_patch.py new file mode 100644 index 0000000000000000000000000000000000000000..24c69b25dbec19221bcd8fc2e928a8393dd3aaf6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/export/torchscript_patch.py @@ -0,0 +1,406 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import os +import sys +import tempfile +from contextlib import ExitStack, contextmanager +from copy import deepcopy +from unittest import mock +import torch +from torch import nn + +# need some explicit imports due to https://github.com/pytorch/pytorch/issues/38964 +import annotator.oneformer.detectron2 # noqa F401 +from annotator.oneformer.detectron2.structures import Boxes, Instances +from annotator.oneformer.detectron2.utils.env import _import_file + +_counter = 0 + + +def _clear_jit_cache(): + from torch.jit._recursive import concrete_type_store + from torch.jit._state import _jit_caching_layer + + concrete_type_store.type_store.clear() # for modules + _jit_caching_layer.clear() # for free functions + + +def _add_instances_conversion_methods(newInstances): + """ + Add from_instances methods to the scripted Instances class. + """ + cls_name = newInstances.__name__ + + @torch.jit.unused + def from_instances(instances: Instances): + """ + Create scripted Instances from original Instances + """ + fields = instances.get_fields() + image_size = instances.image_size + ret = newInstances(image_size) + for name, val in fields.items(): + assert hasattr(ret, f"_{name}"), f"No attribute named {name} in {cls_name}" + setattr(ret, name, deepcopy(val)) + return ret + + newInstances.from_instances = from_instances + + +@contextmanager +def patch_instances(fields): + """ + A contextmanager, under which the Instances class in detectron2 is replaced + by a statically-typed scriptable class, defined by `fields`. + See more in `scripting_with_instances`. + """ + + with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False + ) as f: + try: + # Objects that use Instances should not reuse previously-compiled + # results in cache, because `Instances` could be a new class each time. + _clear_jit_cache() + + cls_name, s = _gen_instance_module(fields) + f.write(s) + f.flush() + f.close() + + module = _import(f.name) + new_instances = getattr(module, cls_name) + _ = torch.jit.script(new_instances) + # let torchscript think Instances was scripted already + Instances.__torch_script_class__ = True + # let torchscript find new_instances when looking for the jit type of Instances + Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances) + + _add_instances_conversion_methods(new_instances) + yield new_instances + finally: + try: + del Instances.__torch_script_class__ + del Instances._jit_override_qualname + except AttributeError: + pass + sys.modules.pop(module.__name__) + + +def _gen_instance_class(fields): + """ + Args: + fields (dict[name: type]) + """ + + class _FieldType: + def __init__(self, name, type_): + assert isinstance(name, str), f"Field name must be str, got {name}" + self.name = name + self.type_ = type_ + self.annotation = f"{type_.__module__}.{type_.__name__}" + + fields = [_FieldType(k, v) for k, v in fields.items()] + + def indent(level, s): + return " " * 4 * level + s + + lines = [] + + global _counter + _counter += 1 + + cls_name = "ScriptedInstances{}".format(_counter) + + field_names = tuple(x.name for x in fields) + extra_args = ", ".join([f"{f.name}: Optional[{f.annotation}] = None" for f in fields]) + lines.append( + f""" +class {cls_name}: + def __init__(self, image_size: Tuple[int, int], {extra_args}): + self.image_size = image_size + self._field_names = {field_names} +""" + ) + + for f in fields: + lines.append( + indent(2, f"self._{f.name} = torch.jit.annotate(Optional[{f.annotation}], {f.name})") + ) + + for f in fields: + lines.append( + f""" + @property + def {f.name}(self) -> {f.annotation}: + # has to use a local for type refinement + # https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement + t = self._{f.name} + assert t is not None, "{f.name} is None and cannot be accessed!" + return t + + @{f.name}.setter + def {f.name}(self, value: {f.annotation}) -> None: + self._{f.name} = value +""" + ) + + # support method `__len__` + lines.append( + """ + def __len__(self) -> int: +""" + ) + for f in fields: + lines.append( + f""" + t = self._{f.name} + if t is not None: + return len(t) +""" + ) + lines.append( + """ + raise NotImplementedError("Empty Instances does not support __len__!") +""" + ) + + # support method `has` + lines.append( + """ + def has(self, name: str) -> bool: +""" + ) + for f in fields: + lines.append( + f""" + if name == "{f.name}": + return self._{f.name} is not None +""" + ) + lines.append( + """ + return False +""" + ) + + # support method `to` + none_args = ", None" * len(fields) + lines.append( + f""" + def to(self, device: torch.device) -> "{cls_name}": + ret = {cls_name}(self.image_size{none_args}) +""" + ) + for f in fields: + if hasattr(f.type_, "to"): + lines.append( + f""" + t = self._{f.name} + if t is not None: + ret._{f.name} = t.to(device) +""" + ) + else: + # For now, ignore fields that cannot be moved to devices. + # Maybe can support other tensor-like classes (e.g. __torch_function__) + pass + lines.append( + """ + return ret +""" + ) + + # support method `getitem` + none_args = ", None" * len(fields) + lines.append( + f""" + def __getitem__(self, item) -> "{cls_name}": + ret = {cls_name}(self.image_size{none_args}) +""" + ) + for f in fields: + lines.append( + f""" + t = self._{f.name} + if t is not None: + ret._{f.name} = t[item] +""" + ) + lines.append( + """ + return ret +""" + ) + + # support method `cat` + # this version does not contain checks that all instances have same size and fields + none_args = ", None" * len(fields) + lines.append( + f""" + def cat(self, instances: List["{cls_name}"]) -> "{cls_name}": + ret = {cls_name}(self.image_size{none_args}) +""" + ) + for f in fields: + lines.append( + f""" + t = self._{f.name} + if t is not None: + values: List[{f.annotation}] = [x.{f.name} for x in instances] + if torch.jit.isinstance(t, torch.Tensor): + ret._{f.name} = torch.cat(values, dim=0) + else: + ret._{f.name} = t.cat(values) +""" + ) + lines.append( + """ + return ret""" + ) + + # support method `get_fields()` + lines.append( + """ + def get_fields(self) -> Dict[str, Tensor]: + ret = {} + """ + ) + for f in fields: + if f.type_ == Boxes: + stmt = "t.tensor" + elif f.type_ == torch.Tensor: + stmt = "t" + else: + stmt = f'assert False, "unsupported type {str(f.type_)}"' + lines.append( + f""" + t = self._{f.name} + if t is not None: + ret["{f.name}"] = {stmt} + """ + ) + lines.append( + """ + return ret""" + ) + return cls_name, os.linesep.join(lines) + + +def _gen_instance_module(fields): + # TODO: find a more automatic way to enable import of other classes + s = """ +from copy import deepcopy +import torch +from torch import Tensor +import typing +from typing import * + +import annotator.oneformer.detectron2 +from annotator.oneformer.detectron2.structures import Boxes, Instances + +""" + + cls_name, cls_def = _gen_instance_class(fields) + s += cls_def + return cls_name, s + + +def _import(path): + return _import_file( + "{}{}".format(sys.modules[__name__].__name__, _counter), path, make_importable=True + ) + + +@contextmanager +def patch_builtin_len(modules=()): + """ + Patch the builtin len() function of a few detectron2 modules + to use __len__ instead, because __len__ does not convert values to + integers and therefore is friendly to tracing. + + Args: + modules (list[stsr]): names of extra modules to patch len(), in + addition to those in detectron2. + """ + + def _new_len(obj): + return obj.__len__() + + with ExitStack() as stack: + MODULES = [ + "detectron2.modeling.roi_heads.fast_rcnn", + "detectron2.modeling.roi_heads.mask_head", + "detectron2.modeling.roi_heads.keypoint_head", + ] + list(modules) + ctxs = [stack.enter_context(mock.patch(mod + ".len")) for mod in MODULES] + for m in ctxs: + m.side_effect = _new_len + yield + + +def patch_nonscriptable_classes(): + """ + Apply patches on a few nonscriptable detectron2 classes. + Should not have side-effects on eager usage. + """ + # __prepare_scriptable__ can also be added to models for easier maintenance. + # But it complicates the clean model code. + + from annotator.oneformer.detectron2.modeling.backbone import ResNet, FPN + + # Due to https://github.com/pytorch/pytorch/issues/36061, + # we change backbone to use ModuleList for scripting. + # (note: this changes param names in state_dict) + + def prepare_resnet(self): + ret = deepcopy(self) + ret.stages = nn.ModuleList(ret.stages) + for k in self.stage_names: + delattr(ret, k) + return ret + + ResNet.__prepare_scriptable__ = prepare_resnet + + def prepare_fpn(self): + ret = deepcopy(self) + ret.lateral_convs = nn.ModuleList(ret.lateral_convs) + ret.output_convs = nn.ModuleList(ret.output_convs) + for name, _ in self.named_children(): + if name.startswith("fpn_"): + delattr(ret, name) + return ret + + FPN.__prepare_scriptable__ = prepare_fpn + + # Annotate some attributes to be constants for the purpose of scripting, + # even though they are not constants in eager mode. + from annotator.oneformer.detectron2.modeling.roi_heads import StandardROIHeads + + if hasattr(StandardROIHeads, "__annotations__"): + # copy first to avoid editing annotations of base class + StandardROIHeads.__annotations__ = deepcopy(StandardROIHeads.__annotations__) + StandardROIHeads.__annotations__["mask_on"] = torch.jit.Final[bool] + StandardROIHeads.__annotations__["keypoint_on"] = torch.jit.Final[bool] + + +# These patches are not supposed to have side-effects. +patch_nonscriptable_classes() + + +@contextmanager +def freeze_training_mode(model): + """ + A context manager that annotates the "training" attribute of every submodule + to constant, so that the training codepath in these modules can be + meta-compiled away. Upon exiting, the annotations are reverted. + """ + classes = {type(x) for x in model.modules()} + # __constants__ is the old way to annotate constants and not compatible + # with __annotations__ . + classes = {x for x in classes if not hasattr(x, "__constants__")} + for cls in classes: + cls.__annotations__["training"] = torch.jit.Final[bool] + yield + for cls in classes: + cls.__annotations__["training"] = bool diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..761a3d1c7afa049e9779ee9fc4d299e9aae38cad --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm, CycleBatchNormList +from .deform_conv import DeformConv, ModulatedDeformConv +from .mask_ops import paste_masks_in_image +from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated +from .roi_align import ROIAlign, roi_align +from .roi_align_rotated import ROIAlignRotated, roi_align_rotated +from .shape_spec import ShapeSpec +from .wrappers import ( + BatchNorm2d, + Conv2d, + ConvTranspose2d, + cat, + interpolate, + Linear, + nonzero_tuple, + cross_entropy, + empty_input_loss_func_wrapper, + shapes_to_tensor, + move_device_like, +) +from .blocks import CNNBlockBase, DepthwiseSeparableConv2d +from .aspp import ASPP +from .losses import ciou_loss, diou_loss + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/aspp.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/aspp.py new file mode 100644 index 0000000000000000000000000000000000000000..14861aa9ede4fea6a69a49f189bcab997b558148 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/aspp.py @@ -0,0 +1,144 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +from copy import deepcopy +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from .batch_norm import get_norm +from .blocks import DepthwiseSeparableConv2d +from .wrappers import Conv2d + + +class ASPP(nn.Module): + """ + Atrous Spatial Pyramid Pooling (ASPP). + """ + + def __init__( + self, + in_channels, + out_channels, + dilations, + *, + norm, + activation, + pool_kernel_size=None, + dropout: float = 0.0, + use_depthwise_separable_conv=False, + ): + """ + Args: + in_channels (int): number of input channels for ASPP. + out_channels (int): number of output channels. + dilations (list): a list of 3 dilations in ASPP. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. norm is + applied to all conv layers except the conv following + global average pooling. + activation (callable): activation function. + pool_kernel_size (tuple, list): the average pooling size (kh, kw) + for image pooling layer in ASPP. If set to None, it always + performs global average pooling. If not None, it must be + divisible by the shape of inputs in forward(). It is recommended + to use a fixed input feature size in training, and set this + option to match this size, so that it performs global average + pooling in training, and the size of the pooling window stays + consistent in inference. + dropout (float): apply dropout on the output of ASPP. It is used in + the official DeepLab implementation with a rate of 0.1: + https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa + use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d + for 3x3 convs in ASPP, proposed in :paper:`DeepLabV3+`. + """ + super(ASPP, self).__init__() + assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations)) + self.pool_kernel_size = pool_kernel_size + self.dropout = dropout + use_bias = norm == "" + self.convs = nn.ModuleList() + # conv 1x1 + self.convs.append( + Conv2d( + in_channels, + out_channels, + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=deepcopy(activation), + ) + ) + weight_init.c2_xavier_fill(self.convs[-1]) + # atrous convs + for dilation in dilations: + if use_depthwise_separable_conv: + self.convs.append( + DepthwiseSeparableConv2d( + in_channels, + out_channels, + kernel_size=3, + padding=dilation, + dilation=dilation, + norm1=norm, + activation1=deepcopy(activation), + norm2=norm, + activation2=deepcopy(activation), + ) + ) + else: + self.convs.append( + Conv2d( + in_channels, + out_channels, + kernel_size=3, + padding=dilation, + dilation=dilation, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=deepcopy(activation), + ) + ) + weight_init.c2_xavier_fill(self.convs[-1]) + # image pooling + # We do not add BatchNorm because the spatial resolution is 1x1, + # the original TF implementation has BatchNorm. + if pool_kernel_size is None: + image_pooling = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), + ) + else: + image_pooling = nn.Sequential( + nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1), + Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), + ) + weight_init.c2_xavier_fill(image_pooling[1]) + self.convs.append(image_pooling) + + self.project = Conv2d( + 5 * out_channels, + out_channels, + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=deepcopy(activation), + ) + weight_init.c2_xavier_fill(self.project) + + def forward(self, x): + size = x.shape[-2:] + if self.pool_kernel_size is not None: + if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]: + raise ValueError( + "`pool_kernel_size` must be divisible by the shape of inputs. " + "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size) + ) + res = [] + for conv in self.convs: + res.append(conv(x)) + res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False) + res = torch.cat(res, dim=1) + res = self.project(res) + res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res + return res diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/batch_norm.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..32a1e05470065e75b6caad18d36211d27af8eec0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/batch_norm.py @@ -0,0 +1,300 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch +import torch.distributed as dist +from fvcore.nn.distributed import differentiable_all_reduce +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.utils import comm, env + +from .wrappers import BatchNorm2d + + +class FrozenBatchNorm2d(nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters are fixed. + + It contains non-trainable buffers called + "weight" and "bias", "running_mean", "running_var", + initialized to perform identity transformation. + + The pre-trained backbone models from Caffe2 only contain "weight" and "bias", + which are computed from the original four parameters of BN. + The affine transform `x * weight + bias` will perform the equivalent + computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. + When loading a backbone model from Caffe2, "running_mean" and "running_var" + will be left unchanged as identity transformation. + + Other pre-trained backbone models may contain all 4 parameters. + + The forward is implemented by `F.batch_norm(..., training=False)`. + """ + + _version = 3 + + def __init__(self, num_features, eps=1e-5): + super().__init__() + self.num_features = num_features + self.eps = eps + self.register_buffer("weight", torch.ones(num_features)) + self.register_buffer("bias", torch.zeros(num_features)) + self.register_buffer("running_mean", torch.zeros(num_features)) + self.register_buffer("running_var", torch.ones(num_features) - eps) + + def forward(self, x): + if x.requires_grad: + # When gradients are needed, F.batch_norm will use extra memory + # because its backward op computes gradients for weight/bias as well. + scale = self.weight * (self.running_var + self.eps).rsqrt() + bias = self.bias - self.running_mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + out_dtype = x.dtype # may be half + return x * scale.to(out_dtype) + bias.to(out_dtype) + else: + # When gradients are not needed, F.batch_norm is a single fused op + # and provide more optimization opportunities. + return F.batch_norm( + x, + self.running_mean, + self.running_var, + self.weight, + self.bias, + training=False, + eps=self.eps, + ) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get("version", None) + + if version is None or version < 2: + # No running_mean/var in early versions + # This will silent the warnings + if prefix + "running_mean" not in state_dict: + state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean) + if prefix + "running_var" not in state_dict: + state_dict[prefix + "running_var"] = torch.ones_like(self.running_var) + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + def __repr__(self): + return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps) + + @classmethod + def convert_frozen_batchnorm(cls, module): + """ + Convert all BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. + + Args: + module (torch.nn.Module): + + Returns: + If module is BatchNorm/SyncBatchNorm, returns a new module. + Otherwise, in-place convert module and return it. + + Similar to convert_sync_batchnorm in + https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py + """ + bn_module = nn.modules.batchnorm + bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm) + res = module + if isinstance(module, bn_module): + res = cls(module.num_features) + if module.affine: + res.weight.data = module.weight.data.clone().detach() + res.bias.data = module.bias.data.clone().detach() + res.running_mean.data = module.running_mean.data + res.running_var.data = module.running_var.data + res.eps = module.eps + else: + for name, child in module.named_children(): + new_child = cls.convert_frozen_batchnorm(child) + if new_child is not child: + res.add_module(name, new_child) + return res + + +def get_norm(norm, out_channels): + """ + Args: + norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; + or a callable that takes a channel number and returns + the normalization layer as a nn.Module. + + Returns: + nn.Module or None: the normalization layer + """ + if norm is None: + return None + if isinstance(norm, str): + if len(norm) == 0: + return None + norm = { + "BN": BatchNorm2d, + # Fixed in https://github.com/pytorch/pytorch/pull/36382 + "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, + "FrozenBN": FrozenBatchNorm2d, + "GN": lambda channels: nn.GroupNorm(32, channels), + # for debugging: + "nnSyncBN": nn.SyncBatchNorm, + "naiveSyncBN": NaiveSyncBatchNorm, + # expose stats_mode N as an option to caller, required for zero-len inputs + "naiveSyncBN_N": lambda channels: NaiveSyncBatchNorm(channels, stats_mode="N"), + "LN": lambda channels: LayerNorm(channels), + }[norm] + return norm(out_channels) + + +class NaiveSyncBatchNorm(BatchNorm2d): + """ + In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient + when the batch size on each worker is different. + (e.g., when scale augmentation is used, or when it is applied to mask head). + + This is a slower but correct alternative to `nn.SyncBatchNorm`. + + Note: + There isn't a single definition of Sync BatchNorm. + + When ``stats_mode==""``, this module computes overall statistics by using + statistics of each worker with equal weight. The result is true statistics + of all samples (as if they are all on one worker) only when all workers + have the same (N, H, W). This mode does not support inputs with zero batch size. + + When ``stats_mode=="N"``, this module computes overall statistics by weighting + the statistics of each worker by their ``N``. The result is true statistics + of all samples (as if they are all on one worker) only when all workers + have the same (H, W). It is slower than ``stats_mode==""``. + + Even though the result of this module may not be the true statistics of all samples, + it may still be reasonable because it might be preferrable to assign equal weights + to all workers, regardless of their (H, W) dimension, instead of putting larger weight + on larger images. From preliminary experiments, little difference is found between such + a simplified implementation and an accurate computation of overall mean & variance. + """ + + def __init__(self, *args, stats_mode="", **kwargs): + super().__init__(*args, **kwargs) + assert stats_mode in ["", "N"] + self._stats_mode = stats_mode + + def forward(self, input): + if comm.get_world_size() == 1 or not self.training: + return super().forward(input) + + B, C = input.shape[0], input.shape[1] + + half_input = input.dtype == torch.float16 + if half_input: + # fp16 does not have good enough numerics for the reduction here + input = input.float() + mean = torch.mean(input, dim=[0, 2, 3]) + meansqr = torch.mean(input * input, dim=[0, 2, 3]) + + if self._stats_mode == "": + assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.' + vec = torch.cat([mean, meansqr], dim=0) + vec = differentiable_all_reduce(vec) * (1.0 / dist.get_world_size()) + mean, meansqr = torch.split(vec, C) + momentum = self.momentum + else: + if B == 0: + vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype) + vec = vec + input.sum() # make sure there is gradient w.r.t input + else: + vec = torch.cat( + [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0 + ) + vec = differentiable_all_reduce(vec * B) + + total_batch = vec[-1].detach() + momentum = total_batch.clamp(max=1) * self.momentum # no update if total_batch is 0 + mean, meansqr, _ = torch.split(vec / total_batch.clamp(min=1), C) # avoid div-by-zero + + var = meansqr - mean * mean + invstd = torch.rsqrt(var + self.eps) + scale = self.weight * invstd + bias = self.bias - mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + + self.running_mean += momentum * (mean.detach() - self.running_mean) + self.running_var += momentum * (var.detach() - self.running_var) + ret = input * scale + bias + if half_input: + ret = ret.half() + return ret + + +class CycleBatchNormList(nn.ModuleList): + """ + Implement domain-specific BatchNorm by cycling. + + When a BatchNorm layer is used for multiple input domains or input + features, it might need to maintain a separate test-time statistics + for each domain. See Sec 5.2 in :paper:`rethinking-batchnorm`. + + This module implements it by using N separate BN layers + and it cycles through them every time a forward() is called. + + NOTE: The caller of this module MUST guarantee to always call + this module by multiple of N times. Otherwise its test-time statistics + will be incorrect. + """ + + def __init__(self, length: int, bn_class=nn.BatchNorm2d, **kwargs): + """ + Args: + length: number of BatchNorm layers to cycle. + bn_class: the BatchNorm class to use + kwargs: arguments of the BatchNorm class, such as num_features. + """ + self._affine = kwargs.pop("affine", True) + super().__init__([bn_class(**kwargs, affine=False) for k in range(length)]) + if self._affine: + # shared affine, domain-specific BN + channels = self[0].num_features + self.weight = nn.Parameter(torch.ones(channels)) + self.bias = nn.Parameter(torch.zeros(channels)) + self._pos = 0 + + def forward(self, x): + ret = self[self._pos](x) + self._pos = (self._pos + 1) % len(self) + + if self._affine: + w = self.weight.reshape(1, -1, 1, 1) + b = self.bias.reshape(1, -1, 1, 1) + return ret * w + b + else: + return ret + + def extra_repr(self): + return f"affine={self._affine}" + + +class LayerNorm(nn.Module): + """ + A LayerNorm variant, popularized by Transformers, that performs point-wise mean and + variance normalization over the channel dimension for inputs that have shape + (batch_size, channels, height, width). + https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950 + """ + + def __init__(self, normalized_shape, eps=1e-6): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.normalized_shape = (normalized_shape,) + + def forward(self, x): + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/blocks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..1995a4bf7339e8deb7eaaffda4f819dda55e7ac7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/blocks.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import fvcore.nn.weight_init as weight_init +from torch import nn + +from .batch_norm import FrozenBatchNorm2d, get_norm +from .wrappers import Conv2d + + +""" +CNN building blocks. +""" + + +class CNNBlockBase(nn.Module): + """ + A CNN block is assumed to have input channels, output channels and a stride. + The input and output of `forward()` method must be NCHW tensors. + The method can perform arbitrary computation but must match the given + channels and stride specification. + + Attribute: + in_channels (int): + out_channels (int): + stride (int): + """ + + def __init__(self, in_channels, out_channels, stride): + """ + The `__init__` method of any subclass should also contain these arguments. + + Args: + in_channels (int): + out_channels (int): + stride (int): + """ + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.stride = stride + + def freeze(self): + """ + Make this block not trainable. + This method sets all parameters to `requires_grad=False`, + and convert all BatchNorm layers to FrozenBatchNorm + + Returns: + the block itself + """ + for p in self.parameters(): + p.requires_grad = False + FrozenBatchNorm2d.convert_frozen_batchnorm(self) + return self + + +class DepthwiseSeparableConv2d(nn.Module): + """ + A kxk depthwise convolution + a 1x1 convolution. + + In :paper:`xception`, norm & activation are applied on the second conv. + :paper:`mobilenet` uses norm & activation on both convs. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + padding=1, + dilation=1, + *, + norm1=None, + activation1=None, + norm2=None, + activation2=None, + ): + """ + Args: + norm1, norm2 (str or callable): normalization for the two conv layers. + activation1, activation2 (callable(Tensor) -> Tensor): activation + function for the two conv layers. + """ + super().__init__() + self.depthwise = Conv2d( + in_channels, + in_channels, + kernel_size=kernel_size, + padding=padding, + dilation=dilation, + groups=in_channels, + bias=not norm1, + norm=get_norm(norm1, in_channels), + activation=activation1, + ) + self.pointwise = Conv2d( + in_channels, + out_channels, + kernel_size=1, + bias=not norm2, + norm=get_norm(norm2, out_channels), + activation=activation2, + ) + + # default initialization + weight_init.c2_msra_fill(self.depthwise) + weight_init.c2_msra_fill(self.pointwise) + + def forward(self, x): + return self.pointwise(self.depthwise(x)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..778ed3da0bae89820831bcd8a72ff7b9cad8d4dd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/README.md @@ -0,0 +1,7 @@ + + +To add a new Op: + +1. Create a new directory +2. Implement new ops there +3. Delcare its Python interface in `vision.cpp`. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h new file mode 100644 index 0000000000000000000000000000000000000000..03f4211003f42f601f0cfcf4a690f5da4a0a1f67 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h @@ -0,0 +1,115 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once +#include + +namespace detectron2 { + +at::Tensor ROIAlignRotated_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + +at::Tensor ROIAlignRotated_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor ROIAlignRotated_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + +at::Tensor ROIAlignRotated_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio); +#endif + +// Interface for Python +inline at::Tensor ROIAlignRotated_forward( + const at::Tensor& input, + const at::Tensor& rois, + const double spatial_scale, + const int64_t pooled_height, + const int64_t pooled_width, + const int64_t sampling_ratio) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlignRotated_forward_cuda( + input, + rois, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + return ROIAlignRotated_forward_cpu( + input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); +} + +inline at::Tensor ROIAlignRotated_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const double spatial_scale, + const int64_t pooled_height, + const int64_t pooled_width, + const int64_t batch_size, + const int64_t channels, + const int64_t height, + const int64_t width, + const int64_t sampling_ratio) { + if (grad.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlignRotated_backward_cuda( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + return ROIAlignRotated_backward_cpu( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a3d3056cc71a4acaafb570739a9dd247a7eb1ed --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp @@ -0,0 +1,522 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include +#include "ROIAlignRotated.h" + +// Note: this implementation originates from the Caffe2 ROIAlignRotated Op +// and PyTorch ROIAlign (non-rotated) Op implementations. +// The key difference between this implementation and those ones is +// we don't do "legacy offset" in this version, as there aren't many previous +// works, if any, using the "legacy" ROIAlignRotated Op. +// This would make the interface a bit cleaner. + +namespace detectron2 { + +namespace { +template +struct PreCalc { + int pos1; + int pos2; + int pos3; + int pos4; + T w1; + T w2; + T w3; + T w4; +}; + +template +void pre_calc_for_bilinear_interpolate( + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int iy_upper, + const int ix_upper, + T roi_start_h, + T roi_start_w, + T bin_size_h, + T bin_size_w, + int roi_bin_grid_h, + int roi_bin_grid_w, + T roi_center_h, + T roi_center_w, + T cos_theta, + T sin_theta, + std::vector>& pre_calc) { + int pre_calc_index = 0; + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + for (int iy = 0; iy < iy_upper; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < ix_upper; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + // In image space, (y, x) is the order for Right Handed System, + // and this is essentially multiplying the point by a rotation matrix + // to rotate it counterclockwise through angle theta. + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + // deal with: inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + PreCalc pc; + pc.pos1 = 0; + pc.pos2 = 0; + pc.pos3 = 0; + pc.pos4 = 0; + pc.w1 = 0; + pc.w2 = 0; + pc.w3 = 0; + pc.w4 = 0; + pre_calc[pre_calc_index] = pc; + pre_calc_index += 1; + continue; + } + + if (y < 0) { + y = 0; + } + if (x < 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + // save weights and indices + PreCalc pc; + pc.pos1 = y_low * width + x_low; + pc.pos2 = y_low * width + x_high; + pc.pos3 = y_high * width + x_low; + pc.pos4 = y_high * width + x_high; + pc.w1 = w1; + pc.w2 = w2; + pc.w3 = w3; + pc.w4 = w4; + pre_calc[pre_calc_index] = pc; + + pre_calc_index += 1; + } + } + } + } +} + +template +void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +inline void add(T* address, const T& val) { + *address += val; +} + +} // namespace + +template +void ROIAlignRotatedForward( + const int nthreads, + const T* input, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* rois, + T* output) { + int n_rois = nthreads / channels / pooled_width / pooled_height; + // (n, c, ph, pw) is an element in the pooled output + // can be parallelized using omp + // #pragma omp parallel for num_threads(32) + for (int n = 0; n < n_rois; n++) { + int index_n = n * channels * pooled_width * pooled_height; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlignRotated do not have non-negative size!"); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + // we want to precalculate indices and weights shared by all channels, + // this is the key point of optimization + std::vector> pre_calc( + roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + pre_calc_for_bilinear_interpolate( + height, + width, + pooled_height, + pooled_width, + roi_bin_grid_h, + roi_bin_grid_w, + roi_start_h, + roi_start_w, + bin_size_h, + bin_size_w, + roi_bin_grid_h, + roi_bin_grid_w, + roi_center_h, + roi_center_w, + cos_theta, + sin_theta, + pre_calc); + + for (int c = 0; c < channels; c++) { + int index_n_c = index_n + c * pooled_width * pooled_height; + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + int pre_calc_index = 0; + + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + int index = index_n_c + ph * pooled_width + pw; + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + PreCalc pc = pre_calc[pre_calc_index]; + output_val += pc.w1 * offset_input[pc.pos1] + + pc.w2 * offset_input[pc.pos2] + + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; + + pre_calc_index += 1; + } + } + output_val /= count; + + output[index] = output_val; + } // for pw + } // for ph + } // for c + } // for n +} + +template +void ROIAlignRotatedBackward( + const int nthreads, + // may not be contiguous. should index using n_stride, etc + const T* grad_output, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* grad_input, + const T* rois, + const int n_stride, + const int c_stride, + const int h_stride, + const int w_stride) { + for (int index = 0; index < nthreads; index++) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlignRotated do not have non-negative size!"); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + int output_offset = n * n_stride + c * c_stride; + const T* offset_grad_output = grad_output + output_offset; + const T grad_output_this_bin = + offset_grad_output[ph * h_stride + pw * w_stride]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + + T g1 = grad_output_this_bin * w1 / count; + T g2 = grad_output_this_bin * w2 / count; + T g3 = grad_output_this_bin * w3 / count; + T g4 = grad_output_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + // atomic add is not needed for now since it is single threaded + add(offset_grad_input + y_low * width + x_low, static_cast(g1)); + add(offset_grad_input + y_low * width + x_high, static_cast(g2)); + add(offset_grad_input + y_high * width + x_low, static_cast(g3)); + add(offset_grad_input + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // for +} // ROIAlignRotatedBackward + +at::Tensor ROIAlignRotated_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlign_forward_cpu"; + at::checkAllSameType(c, {input_t, rois_t}); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + at::Tensor output = at::zeros( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + + auto output_size = num_rois * pooled_height * pooled_width * channels; + + if (output.numel() == 0) { + return output; + } + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "ROIAlignRotated_forward", [&] { + ROIAlignRotatedForward( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr()); + }); + return output; +} + +at::Tensor ROIAlignRotated_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlignRotated_backward_cpu"; + at::checkAllSameType(c, {grad_t, rois_t}); + + at::Tensor grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + // handle possibly empty gradients + if (grad.numel() == 0) { + return grad_input; + } + + // get stride values to ensure indexing into gradients is correct. + int n_stride = grad.stride(0); + int c_stride = grad.stride(1); + int h_stride = grad.stride(2); + int w_stride = grad.stride(3); + + auto rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "ROIAlignRotated_forward", [&] { + ROIAlignRotatedBackward( + grad.numel(), + grad.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); + return grad_input; +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..fca186519143b168a912c880a4cf495a0a5a9322 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu @@ -0,0 +1,443 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include +#include +#include +#include + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +// Note: this implementation originates from the Caffe2 ROIAlignRotated Op +// and PyTorch ROIAlign (non-rotated) Op implementations. +// The key difference between this implementation and those ones is +// we don't do "legacy offset" in this version, as there aren't many previous +// works, if any, using the "legacy" ROIAlignRotated Op. +// This would make the interface a bit cleaner. + +namespace detectron2 { + +namespace { + +template +__device__ T bilinear_interpolate( + const T* input, + const int height, + const int width, + T y, + T x) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + return 0; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + T v1 = input[y_low * width + x_low]; + T v2 = input[y_low * width + x_high]; + T v3 = input[y_high * width + x_low]; + T v4 = input[y_high * width + x_high]; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +template +__device__ void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +} // namespace + +template +__global__ void RoIAlignRotatedForward( + const int nthreads, + const T* input, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* rois, + T* top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (inte gral) pooling inside a bin + const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T val = bilinear_interpolate(offset_input, height, width, y, x); + output_val += val; + } + } + output_val /= count; + + top_data[index] = output_val; + } +} + +template +__global__ void RoIAlignRotatedBackwardFeature( + const int nthreads, + const T* top_diff, + const int num_rois, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* bottom_diff, + const T* rois) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_bottom_diff = + bottom_diff + (roi_batch_ind * channels + c) * height * width; + + int top_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_top_diff = top_diff + top_offset; + const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + + T g1 = top_diff_this_bin * w1 / count; + T g2 = top_diff_this_bin * w2 / count; + T g3 = top_diff_this_bin * w3 / count; + T g4 = top_diff_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd( + offset_bottom_diff + y_low * width + x_low, static_cast(g1)); + atomicAdd( + offset_bottom_diff + y_low * width + x_high, static_cast(g2)); + atomicAdd( + offset_bottom_diff + y_high * width + x_low, static_cast(g3)); + atomicAdd( + offset_bottom_diff + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // CUDA_1D_KERNEL_LOOP +} // RoIAlignRotatedBackward + +at::Tensor ROIAlignRotated_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlignRotated_forward_cuda"; + at::checkAllSameGPU(c, {input_t, rois_t}); + at::checkAllSameType(c, {input_t, rois_t}); + at::cuda::CUDAGuard device_guard(input.device()); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(output_size), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + if (output.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return output; + } + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES( + input.scalar_type(), "ROIAlignRotated_forward", [&] { + RoIAlignRotatedForward<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr()); + }); + cudaDeviceSynchronize(); + AT_CUDA_CHECK(cudaGetLastError()); + return output; +} + +// TODO remove the dependency on input and use instead its sizes -> save memory +at::Tensor ROIAlignRotated_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + at::CheckedFrom c = "ROIAlign_backward_cuda"; + at::checkAllSameGPU(c, {grad_t, rois_t}); + at::checkAllSameType(c, {grad_t, rois_t}); + at::cuda::CUDAGuard device_guard(grad.device()); + + auto num_rois = rois.size(0); + auto grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(grad.numel()), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + // handle possibly empty gradients + if (grad.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; + } + + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES( + grad.scalar_type(), "ROIAlignRotated_backward", [&] { + RoIAlignRotatedBackwardFeature<<>>( + grad.numel(), + grad_.data_ptr(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr()); + }); + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h new file mode 100644 index 0000000000000000000000000000000000000000..3bf383b8ed9b358b5313d433a9682c294dfb77e4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h @@ -0,0 +1,35 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once +#include + +namespace detectron2 { + +at::Tensor box_iou_rotated_cpu( + const at::Tensor& boxes1, + const at::Tensor& boxes2); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor box_iou_rotated_cuda( + const at::Tensor& boxes1, + const at::Tensor& boxes2); +#endif + +// Interface for Python +// inline is needed to prevent multiple function definitions when this header is +// included by different cpps +inline at::Tensor box_iou_rotated( + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); + if (boxes1.device().is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + + return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c843487b5fa4e8077dd27402ec99009266ddda8d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include "box_iou_rotated.h" +#include "box_iou_rotated_utils.h" + +namespace detectron2 { + +template +void box_iou_rotated_cpu_kernel( + const at::Tensor& boxes1, + const at::Tensor& boxes2, + at::Tensor& ious) { + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + + for (int i = 0; i < num_boxes1; i++) { + for (int j = 0; j < num_boxes2; j++) { + ious[i * num_boxes2 + j] = single_box_iou_rotated( + boxes1[i].data_ptr(), boxes2[j].data_ptr()); + } + } +} + +at::Tensor box_iou_rotated_cpu( + // input must be contiguous: + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + at::Tensor ious = + at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); + + box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); + + // reshape from 1d array to 2d array + auto shape = std::vector{num_boxes1, num_boxes2}; + return ious.reshape(shape); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..952710e53041187907fbd113f8d0d0fa24134a86 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu @@ -0,0 +1,130 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include +#include +#include +#include +#include "box_iou_rotated_utils.h" + +namespace detectron2 { + +// 2D block with 32 * 16 = 512 threads per block +const int BLOCK_DIM_X = 32; +const int BLOCK_DIM_Y = 16; + +template +__global__ void box_iou_rotated_cuda_kernel( + const int n_boxes1, + const int n_boxes2, + const T* dev_boxes1, + const T* dev_boxes2, + T* dev_ious) { + const int row_start = blockIdx.x * blockDim.x; + const int col_start = blockIdx.y * blockDim.y; + + const int row_size = min(n_boxes1 - row_start, blockDim.x); + const int col_size = min(n_boxes2 - col_start, blockDim.y); + + __shared__ float block_boxes1[BLOCK_DIM_X * 5]; + __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; + + // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y + if (threadIdx.x < row_size && threadIdx.y == 0) { + block_boxes1[threadIdx.x * 5 + 0] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; + block_boxes1[threadIdx.x * 5 + 1] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; + block_boxes1[threadIdx.x * 5 + 2] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; + block_boxes1[threadIdx.x * 5 + 3] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; + block_boxes1[threadIdx.x * 5 + 4] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; + } + + if (threadIdx.x < col_size && threadIdx.y == 0) { + block_boxes2[threadIdx.x * 5 + 0] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; + block_boxes2[threadIdx.x * 5 + 1] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; + block_boxes2[threadIdx.x * 5 + 2] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; + block_boxes2[threadIdx.x * 5 + 3] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; + block_boxes2[threadIdx.x * 5 + 4] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size && threadIdx.y < col_size) { + int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; + dev_ious[offset] = single_box_iou_rotated( + block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); + } +} + +at::Tensor box_iou_rotated_cuda( + // input must be contiguous + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + using scalar_t = float; + AT_ASSERTM( + boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor"); + AT_ASSERTM( + boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor"); + AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); + AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(boxes1.device()); + + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + + at::Tensor ious = + at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); + + bool transpose = false; + if (num_boxes1 > 0 && num_boxes2 > 0) { + scalar_t *data1 = boxes1.data_ptr(), + *data2 = boxes2.data_ptr(); + + if (num_boxes2 > 65535 * BLOCK_DIM_Y) { + AT_ASSERTM( + num_boxes1 <= 65535 * BLOCK_DIM_Y, + "Too many boxes for box_iou_rotated_cuda!"); + // x dim is allowed to be large, but y dim cannot, + // so we transpose the two to avoid "invalid configuration argument" + // error. We assume one of them is small. Otherwise the result is hard to + // fit in memory anyway. + std::swap(num_boxes1, num_boxes2); + std::swap(data1, data2); + transpose = true; + } + + const int blocks_x = + at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X); + const int blocks_y = + at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y); + + dim3 blocks(blocks_x, blocks_y); + dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + box_iou_rotated_cuda_kernel<<>>( + num_boxes1, + num_boxes2, + data1, + data2, + (scalar_t*)ious.data_ptr()); + + AT_CUDA_CHECK(cudaGetLastError()); + } + + // reshape from 1d array to 2d array + auto shape = std::vector{num_boxes1, num_boxes2}; + if (transpose) { + return ious.view(shape).t(); + } else { + return ious.view(shape); + } +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..b54a5dde2ca11a74d29c4d8adb7fe1634f5baf9c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h @@ -0,0 +1,370 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once + +#include +#include + +#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 +// Designates functions callable from the host (CPU) and the device (GPU) +#define HOST_DEVICE __host__ __device__ +#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ +#else +#include +#define HOST_DEVICE +#define HOST_DEVICE_INLINE HOST_DEVICE inline +#endif + +namespace detectron2 { + +namespace { + +template +struct RotatedBox { + T x_ctr, y_ctr, w, h, a; +}; + +template +struct Point { + T x, y; + HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {} + HOST_DEVICE_INLINE Point operator+(const Point& p) const { + return Point(x + p.x, y + p.y); + } + HOST_DEVICE_INLINE Point& operator+=(const Point& p) { + x += p.x; + y += p.y; + return *this; + } + HOST_DEVICE_INLINE Point operator-(const Point& p) const { + return Point(x - p.x, y - p.y); + } + HOST_DEVICE_INLINE Point operator*(const T coeff) const { + return Point(x * coeff, y * coeff); + } +}; + +template +HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) { + return A.x * B.x + A.y * B.y; +} + +// R: result type. can be different from input type +template +HOST_DEVICE_INLINE R cross_2d(const Point& A, const Point& B) { + return static_cast(A.x) * static_cast(B.y) - + static_cast(B.x) * static_cast(A.y); +} + +template +HOST_DEVICE_INLINE void get_rotated_vertices( + const RotatedBox& box, + Point (&pts)[4]) { + // M_PI / 180. == 0.01745329251 + double theta = box.a * 0.01745329251; + T cosTheta2 = (T)cos(theta) * 0.5f; + T sinTheta2 = (T)sin(theta) * 0.5f; + + // y: top --> down; x: left --> right + pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w; + pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; + pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w; + pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; + pts[2].x = 2 * box.x_ctr - pts[0].x; + pts[2].y = 2 * box.y_ctr - pts[0].y; + pts[3].x = 2 * box.x_ctr - pts[1].x; + pts[3].y = 2 * box.y_ctr - pts[1].y; +} + +template +HOST_DEVICE_INLINE int get_intersection_points( + const Point (&pts1)[4], + const Point (&pts2)[4], + Point (&intersections)[24]) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + Point vec1[4], vec2[4]; + for (int i = 0; i < 4; i++) { + vec1[i] = pts1[(i + 1) % 4] - pts1[i]; + vec2[i] = pts2[(i + 1) % 4] - pts2[i]; + } + + // When computing the intersection area, it doesn't hurt if we have + // more (duplicated/approximate) intersections/vertices than needed, + // while it can cause drastic difference if we miss an intersection/vertex. + // Therefore, we add an epsilon to relax the comparisons between + // the float point numbers that decide the intersection points. + double EPS = 1e-5; + + // Line test - test all line combos for intersection + int num = 0; // number of intersections + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + // Solve for 2x2 Ax=b + T det = cross_2d(vec2[j], vec1[i]); + + // This takes care of parallel lines + if (fabs(det) <= 1e-14) { + continue; + } + + auto vec12 = pts2[j] - pts1[i]; + + T t1 = cross_2d(vec2[j], vec12) / det; + T t2 = cross_2d(vec1[i], vec12) / det; + + if (t1 > -EPS && t1 < 1.0f + EPS && t2 > -EPS && t2 < 1.0f + EPS) { + intersections[num++] = pts1[i] + vec1[i] * t1; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const auto& AB = vec2[0]; + const auto& DA = vec2[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (int i = 0; i < 4; i++) { + // assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD iff. P's projection on AB lies within AB + // and P's projection on AD lies within AD + + auto AP = pts1[i] - pts2[0]; + + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) && + (APdotAD < ADdotAD + EPS)) { + intersections[num++] = pts1[i]; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const auto& AB = vec1[0]; + const auto& DA = vec1[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (int i = 0; i < 4; i++) { + auto AP = pts2[i] - pts1[0]; + + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) && + (APdotAD < ADdotAD + EPS)) { + intersections[num++] = pts2[i]; + } + } + } + + return num; +} + +template +HOST_DEVICE_INLINE int convex_hull_graham( + const Point (&p)[24], + const int& num_in, + Point (&q)[24], + bool shift_to_zero = false) { + assert(num_in >= 2); + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + int t = 0; + for (int i = 1; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + auto& start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (int i = 0; i < num_in; i++) { + q[i] = p[i] - start; + } + + // Swap the starting point to position 0 + auto tmp = q[0]; + q[0] = q[t]; + q[t] = tmp; + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + T dist[24]; +#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 + // compute distance to origin before sort, and sort them together with the + // points + for (int i = 0; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } + + // CUDA version + // In the future, we can potentially use thrust + // for sorting here to improve speed (though not guaranteed) + for (int i = 1; i < num_in - 1; i++) { + for (int j = i + 1; j < num_in; j++) { + T crossProduct = cross_2d(q[i], q[j]); + if ((crossProduct < -1e-6) || + (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { + auto q_tmp = q[i]; + q[i] = q[j]; + q[j] = q_tmp; + auto dist_tmp = dist[i]; + dist[i] = dist[j]; + dist[j] = dist_tmp; + } + } + } +#else + // CPU version + std::sort( + q + 1, q + num_in, [](const Point& A, const Point& B) -> bool { + T temp = cross_2d(A, B); + if (fabs(temp) < 1e-6) { + return dot_2d(A, A) < dot_2d(B, B); + } else { + return temp > 0; + } + }); + // compute distance to origin after sort, since the points are now different. + for (int i = 0; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } +#endif + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + int k; // index of the non-overlapped second point + for (k = 1; k < num_in; k++) { + if (dist[k] > 1e-8) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0] = p[t]; + return 1; + } + q[1] = q[k]; + int m = 2; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (int i = k + 1; i < num_in; i++) { + while (m > 1) { + auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2]; + // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) - + // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we + // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means + // round to nearest floating point). + if (q1.x * q2.y >= q2.x * q1.y) + m--; + else + break; + } + // Using double also helps, but float can solve the issue for now. + // while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) + // >= 0) { + // m--; + // } + q[m++] = q[i]; + } + + // Step 6 (Optional): + // In general sense we need the original coordinates, so we + // need to shift the points back (reverting Step 2) + // But if we're only interested in getting the area/perimeter of the shape + // We can simply return. + if (!shift_to_zero) { + for (int i = 0; i < m; i++) { + q[i] += start; + } + } + + return m; +} + +template +HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { + if (m <= 2) { + return 0; + } + + T area = 0; + for (int i = 1; i < m - 1; i++) { + area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); + } + + return area / 2.0; +} + +template +HOST_DEVICE_INLINE T rotated_boxes_intersection( + const RotatedBox& box1, + const RotatedBox& box2) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned + // from rotated_rect_intersection_pts + Point intersectPts[24], orderedPts[24]; + + Point pts1[4]; + Point pts2[4]; + get_rotated_vertices(box1, pts1); + get_rotated_vertices(box2, pts2); + + int num = get_intersection_points(pts1, pts2, intersectPts); + + if (num <= 2) { + return 0.0; + } + + // Convex Hull to order the intersection points in clockwise order and find + // the contour area. + int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); + return polygon_area(orderedPts, num_convex); +} + +} // namespace + +template +HOST_DEVICE_INLINE T +single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) { + // shift center to the middle point to achieve higher precision in result + RotatedBox box1, box2; + auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; + auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; + box1.x_ctr = box1_raw[0] - center_shift_x; + box1.y_ctr = box1_raw[1] - center_shift_y; + box1.w = box1_raw[2]; + box1.h = box1_raw[3]; + box1.a = box1_raw[4]; + box2.x_ctr = box2_raw[0] - center_shift_x; + box2.y_ctr = box2_raw[1] - center_shift_y; + box2.w = box2_raw[2]; + box2.h = box2_raw[3]; + box2.a = box2_raw[4]; + + T area1 = box1.w * box1.h; + T area2 = box2.w * box2.h; + if (area1 < 1e-14 || area2 < 1e-14) { + return 0.f; + } + + T intersection = rotated_boxes_intersection(box1, box2); + T iou = intersection / (area1 + area2 - intersection); + return iou; +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0a5b7b907c06720fefc77b0dfd921b8ec3ecf2be --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.cpp @@ -0,0 +1,507 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include "cocoeval.h" +#include +#include +#include +#include + +using namespace pybind11::literals; + +namespace detectron2 { + +namespace COCOeval { + +// Sort detections from highest score to lowest, such that +// detection_instances[detection_sorted_indices[t]] >= +// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match +// original COCO API +void SortInstancesByDetectionScore( + const std::vector& detection_instances, + std::vector* detection_sorted_indices) { + detection_sorted_indices->resize(detection_instances.size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_instances](size_t j1, size_t j2) { + return detection_instances[j1].score > detection_instances[j2].score; + }); +} + +// Partition the ground truth objects based on whether or not to ignore them +// based on area +void SortInstancesByIgnore( + const std::array& area_range, + const std::vector& ground_truth_instances, + std::vector* ground_truth_sorted_indices, + std::vector* ignores) { + ignores->clear(); + ignores->reserve(ground_truth_instances.size()); + for (auto o : ground_truth_instances) { + ignores->push_back( + o.ignore || o.area < area_range[0] || o.area > area_range[1]); + } + + ground_truth_sorted_indices->resize(ground_truth_instances.size()); + std::iota( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + 0); + std::stable_sort( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + [&ignores](size_t j1, size_t j2) { + return (int)(*ignores)[j1] < (int)(*ignores)[j2]; + }); +} + +// For each IOU threshold, greedily match each detected instance to a ground +// truth instance (if possible) and store the results +void MatchDetectionsToGroundTruth( + const std::vector& detection_instances, + const std::vector& detection_sorted_indices, + const std::vector& ground_truth_instances, + const std::vector& ground_truth_sorted_indices, + const std::vector& ignores, + const std::vector>& ious, + const std::vector& iou_thresholds, + const std::array& area_range, + ImageEvaluation* results) { + // Initialize memory to store return data matches and ignore + const int num_iou_thresholds = iou_thresholds.size(); + const int num_ground_truth = ground_truth_sorted_indices.size(); + const int num_detections = detection_sorted_indices.size(); + std::vector ground_truth_matches( + num_iou_thresholds * num_ground_truth, 0); + std::vector& detection_matches = results->detection_matches; + std::vector& detection_ignores = results->detection_ignores; + std::vector& ground_truth_ignores = results->ground_truth_ignores; + detection_matches.resize(num_iou_thresholds * num_detections, 0); + detection_ignores.resize(num_iou_thresholds * num_detections, false); + ground_truth_ignores.resize(num_ground_truth); + for (auto g = 0; g < num_ground_truth; ++g) { + ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + for (auto d = 0; d < num_detections; ++d) { + // information about best match so far (match=-1 -> unmatched) + double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); + int match = -1; + for (auto g = 0; g < num_ground_truth; ++g) { + // if this ground truth instance is already matched and not a + // crowd, it cannot be matched to another detection + if (ground_truth_matches[t * num_ground_truth + g] > 0 && + !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { + continue; + } + + // if detected instance matched to a regular ground truth + // instance, we can break on the first ground truth instance + // tagged as ignore (because they are sorted by the ignore tag) + if (match >= 0 && !ground_truth_ignores[match] && + ground_truth_ignores[g]) { + break; + } + + // if IOU overlap is the best so far, store the match appropriately + if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { + best_iou = ious[d][ground_truth_sorted_indices[g]]; + match = g; + } + } + // if match was made, store id of match for both detection and + // ground truth + if (match >= 0) { + detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; + detection_matches[t * num_detections + d] = + ground_truth_instances[ground_truth_sorted_indices[match]].id; + ground_truth_matches[t * num_ground_truth + match] = + detection_instances[detection_sorted_indices[d]].id; + } + + // set unmatched detections outside of area range to ignore + const InstanceAnnotation& detection = + detection_instances[detection_sorted_indices[d]]; + detection_ignores[t * num_detections + d] = + detection_ignores[t * num_detections + d] || + (detection_matches[t * num_detections + d] == 0 && + (detection.area < area_range[0] || detection.area > area_range[1])); + } + } + + // store detection score results + results->detection_scores.resize(detection_sorted_indices.size()); + for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { + results->detection_scores[d] = + detection_instances[detection_sorted_indices[d]].score; + } +} + +std::vector EvaluateImages( + const std::vector>& area_ranges, + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances) { + const int num_area_ranges = area_ranges.size(); + const int num_images = image_category_ground_truth_instances.size(); + const int num_categories = + image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; + std::vector detection_sorted_indices; + std::vector ground_truth_sorted_indices; + std::vector ignores; + std::vector results_all( + num_images * num_area_ranges * num_categories); + + // Store results for each image, category, and area range combination. Results + // for each IOU threshold are packed into the same ImageEvaluation object + for (auto i = 0; i < num_images; ++i) { + for (auto c = 0; c < num_categories; ++c) { + const std::vector& ground_truth_instances = + image_category_ground_truth_instances[i][c]; + const std::vector& detection_instances = + image_category_detection_instances[i][c]; + + SortInstancesByDetectionScore( + detection_instances, &detection_sorted_indices); + if ((int)detection_sorted_indices.size() > max_detections) { + detection_sorted_indices.resize(max_detections); + } + + for (size_t a = 0; a < area_ranges.size(); ++a) { + SortInstancesByIgnore( + area_ranges[a], + ground_truth_instances, + &ground_truth_sorted_indices, + &ignores); + + MatchDetectionsToGroundTruth( + detection_instances, + detection_sorted_indices, + ground_truth_instances, + ground_truth_sorted_indices, + ignores, + image_category_ious[i][c], + iou_thresholds, + area_ranges[a], + &results_all + [c * num_area_ranges * num_images + a * num_images + i]); + } + } + } + + return results_all; +} + +// Convert a python list to a vector +template +std::vector list_to_vec(const py::list& l) { + std::vector v(py::len(l)); + for (int i = 0; i < (int)py::len(l); ++i) { + v[i] = l[i].cast(); + } + return v; +} + +// Helper function to Accumulate() +// Considers the evaluation results applicable to a particular category, area +// range, and max_detections parameter setting, which begin at +// evaluations[evaluation_index]. Extracts a sorted list of length n of all +// applicable detection instances concatenated across all images in the dataset, +// which are represented by the outputs evaluation_indices, detection_scores, +// image_detection_indices, and detection_sorted_indices--all of which are +// length n. evaluation_indices[i] stores the applicable index into +// evaluations[] for instance i, which has detection score detection_score[i], +// and is the image_detection_indices[i]'th of the list of detections +// for the image containing i. detection_sorted_indices[] defines a sorted +// permutation of the 3 other outputs +int BuildSortedDetectionList( + const std::vector& evaluations, + const int64_t evaluation_index, + const int64_t num_images, + const int max_detections, + std::vector* evaluation_indices, + std::vector* detection_scores, + std::vector* detection_sorted_indices, + std::vector* image_detection_indices) { + assert(evaluations.size() >= evaluation_index + num_images); + + // Extract a list of object instances of the applicable category, area + // range, and max detections requirements such that they can be sorted + image_detection_indices->clear(); + evaluation_indices->clear(); + detection_scores->clear(); + image_detection_indices->reserve(num_images * max_detections); + evaluation_indices->reserve(num_images * max_detections); + detection_scores->reserve(num_images * max_detections); + int num_valid_ground_truth = 0; + for (auto i = 0; i < num_images; ++i) { + const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; + + for (int d = 0; + d < (int)evaluation.detection_scores.size() && d < max_detections; + ++d) { // detected instances + evaluation_indices->push_back(evaluation_index + i); + image_detection_indices->push_back(d); + detection_scores->push_back(evaluation.detection_scores[d]); + } + for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { + if (!ground_truth_ignore) { + ++num_valid_ground_truth; + } + } + } + + // Sort detections by decreasing score, using stable sort to match + // python implementation + detection_sorted_indices->resize(detection_scores->size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_scores](size_t j1, size_t j2) { + return (*detection_scores)[j1] > (*detection_scores)[j2]; + }); + + return num_valid_ground_truth; +} + +// Helper function to Accumulate() +// Compute a precision recall curve given a sorted list of detected instances +// encoded in evaluations, evaluation_indices, detection_scores, +// detection_sorted_indices, image_detection_indices (see +// BuildSortedDetectionList()). Using vectors precisions and recalls +// and temporary storage, output the results into precisions_out, recalls_out, +// and scores_out, which are large buffers containing many precion/recall curves +// for all possible parameter settings, with precisions_out_index and +// recalls_out_index defining the applicable indices to store results. +void ComputePrecisionRecallCurve( + const int64_t precisions_out_index, + const int64_t precisions_out_stride, + const int64_t recalls_out_index, + const std::vector& recall_thresholds, + const int iou_threshold_index, + const int num_iou_thresholds, + const int num_valid_ground_truth, + const std::vector& evaluations, + const std::vector& evaluation_indices, + const std::vector& detection_scores, + const std::vector& detection_sorted_indices, + const std::vector& image_detection_indices, + std::vector* precisions, + std::vector* recalls, + std::vector* precisions_out, + std::vector* scores_out, + std::vector* recalls_out) { + assert(recalls_out->size() > recalls_out_index); + + // Compute precision/recall for each instance in the sorted list of detections + int64_t true_positives_sum = 0, false_positives_sum = 0; + precisions->clear(); + recalls->clear(); + precisions->reserve(detection_sorted_indices.size()); + recalls->reserve(detection_sorted_indices.size()); + assert(!evaluations.empty() || detection_sorted_indices.empty()); + for (auto detection_sorted_index : detection_sorted_indices) { + const ImageEvaluation& evaluation = + evaluations[evaluation_indices[detection_sorted_index]]; + const auto num_detections = + evaluation.detection_matches.size() / num_iou_thresholds; + const auto detection_index = iou_threshold_index * num_detections + + image_detection_indices[detection_sorted_index]; + assert(evaluation.detection_matches.size() > detection_index); + assert(evaluation.detection_ignores.size() > detection_index); + const int64_t detection_match = + evaluation.detection_matches[detection_index]; + const bool detection_ignores = + evaluation.detection_ignores[detection_index]; + const auto true_positive = detection_match > 0 && !detection_ignores; + const auto false_positive = detection_match == 0 && !detection_ignores; + if (true_positive) { + ++true_positives_sum; + } + if (false_positive) { + ++false_positives_sum; + } + + const double recall = + static_cast(true_positives_sum) / num_valid_ground_truth; + recalls->push_back(recall); + const int64_t num_valid_detections = + true_positives_sum + false_positives_sum; + const double precision = num_valid_detections > 0 + ? static_cast(true_positives_sum) / num_valid_detections + : 0.0; + precisions->push_back(precision); + } + + (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; + + for (int64_t i = static_cast(precisions->size()) - 1; i > 0; --i) { + if ((*precisions)[i] > (*precisions)[i - 1]) { + (*precisions)[i - 1] = (*precisions)[i]; + } + } + + // Sample the per instance precision/recall list at each recall threshold + for (size_t r = 0; r < recall_thresholds.size(); ++r) { + // first index in recalls >= recall_thresholds[r] + std::vector::iterator low = std::lower_bound( + recalls->begin(), recalls->end(), recall_thresholds[r]); + size_t precisions_index = low - recalls->begin(); + + const auto results_ind = precisions_out_index + r * precisions_out_stride; + assert(results_ind < precisions_out->size()); + assert(results_ind < scores_out->size()); + if (precisions_index < precisions->size()) { + (*precisions_out)[results_ind] = (*precisions)[precisions_index]; + (*scores_out)[results_ind] = + detection_scores[detection_sorted_indices[precisions_index]]; + } else { + (*precisions_out)[results_ind] = 0; + (*scores_out)[results_ind] = 0; + } + } +} +py::dict Accumulate( + const py::object& params, + const std::vector& evaluations) { + const std::vector recall_thresholds = + list_to_vec(params.attr("recThrs")); + const std::vector max_detections = + list_to_vec(params.attr("maxDets")); + const int num_iou_thresholds = py::len(params.attr("iouThrs")); + const int num_recall_thresholds = py::len(params.attr("recThrs")); + const int num_categories = params.attr("useCats").cast() == 1 + ? py::len(params.attr("catIds")) + : 1; + const int num_area_ranges = py::len(params.attr("areaRng")); + const int num_max_detections = py::len(params.attr("maxDets")); + const int num_images = py::len(params.attr("imgIds")); + + std::vector precisions_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + std::vector recalls_out( + num_iou_thresholds * num_categories * num_area_ranges * + num_max_detections, + -1); + std::vector scores_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + + // Consider the list of all detected instances in the entire dataset in one + // large list. evaluation_indices, detection_scores, + // image_detection_indices, and detection_sorted_indices all have the same + // length as this list, such that each entry corresponds to one detected + // instance + std::vector evaluation_indices; // indices into evaluations[] + std::vector detection_scores; // detection scores of each instance + std::vector detection_sorted_indices; // sorted indices of all + // instances in the dataset + std::vector + image_detection_indices; // indices into the list of detected instances in + // the same image as each instance + std::vector precisions, recalls; + + for (auto c = 0; c < num_categories; ++c) { + for (auto a = 0; a < num_area_ranges; ++a) { + for (auto m = 0; m < num_max_detections; ++m) { + // The COCO PythonAPI assumes evaluations[] (the return value of + // COCOeval::EvaluateImages() is one long list storing results for each + // combination of category, area range, and image id, with categories in + // the outermost loop and images in the innermost loop. + const int64_t evaluations_index = + c * num_area_ranges * num_images + a * num_images; + int num_valid_ground_truth = BuildSortedDetectionList( + evaluations, + evaluations_index, + num_images, + max_detections[m], + &evaluation_indices, + &detection_scores, + &detection_sorted_indices, + &image_detection_indices); + + if (num_valid_ground_truth == 0) { + continue; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + // recalls_out is a flattened vectors representing a + // num_iou_thresholds X num_categories X num_area_ranges X + // num_max_detections matrix + const int64_t recalls_out_index = + t * num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + // precisions_out and scores_out are flattened vectors + // representing a num_iou_thresholds X num_recall_thresholds X + // num_categories X num_area_ranges X num_max_detections matrix + const int64_t precisions_out_stride = + num_categories * num_area_ranges * num_max_detections; + const int64_t precisions_out_index = t * num_recall_thresholds * + num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + ComputePrecisionRecallCurve( + precisions_out_index, + precisions_out_stride, + recalls_out_index, + recall_thresholds, + t, + num_iou_thresholds, + num_valid_ground_truth, + evaluations, + evaluation_indices, + detection_scores, + detection_sorted_indices, + image_detection_indices, + &precisions, + &recalls, + &precisions_out, + &scores_out, + &recalls_out); + } + } + } + } + + time_t rawtime; + struct tm local_time; + std::array buffer; + time(&rawtime); +#ifdef _WIN32 + localtime_s(&local_time, &rawtime); +#else + localtime_r(&rawtime, &local_time); +#endif + strftime( + buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); + return py::dict( + "params"_a = params, + "counts"_a = std::vector( + {num_iou_thresholds, + num_recall_thresholds, + num_categories, + num_area_ranges, + num_max_detections}), + "date"_a = buffer, + "precision"_a = precisions_out, + "recall"_a = recalls_out, + "scores"_a = scores_out); +} + +} // namespace COCOeval + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.h new file mode 100644 index 0000000000000000000000000000000000000000..db246e49a026b7cd989b305f4d3d98100be3c912 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cocoeval/cocoeval.h @@ -0,0 +1,88 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once + +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace detectron2 { + +namespace COCOeval { + +// Annotation data for a single object instance in an image +struct InstanceAnnotation { + InstanceAnnotation( + uint64_t id, + double score, + double area, + bool is_crowd, + bool ignore) + : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} + uint64_t id; + double score = 0.; + double area = 0.; + bool is_crowd = false; + bool ignore = false; +}; + +// Stores intermediate results for evaluating detection results for a single +// image that has D detected instances and G ground truth instances. This stores +// matches between detected and ground truth instances +struct ImageEvaluation { + // For each of the D detected instances, the id of the matched ground truth + // instance, or 0 if unmatched + std::vector detection_matches; + + // The detection score of each of the D detected instances + std::vector detection_scores; + + // Marks whether or not each of G instances was ignored from evaluation (e.g., + // because it's outside area_range) + std::vector ground_truth_ignores; + + // Marks whether or not each of D instances was ignored from evaluation (e.g., + // because it's outside aRng) + std::vector detection_ignores; +}; + +template +using ImageCategoryInstances = std::vector>>; + +// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each +// combination of image, category, area range settings, and IOU thresholds to +// evaluate, it matches detected instances to ground truth instances and stores +// the results into a vector of ImageEvaluation results, which will be +// interpreted by the COCOeval::Accumulate() function to produce precion-recall +// curves. The parameters of nested vectors have the following semantics: +// image_category_ious[i][c][d][g] is the intersection over union of the d'th +// detected instance and g'th ground truth instance of +// category category_ids[c] in image image_ids[i] +// image_category_ground_truth_instances[i][c] is a vector of ground truth +// instances in image image_ids[i] of category category_ids[c] +// image_category_detection_instances[i][c] is a vector of detected +// instances in image image_ids[i] of category category_ids[c] +std::vector EvaluateImages( + const std::vector>& area_ranges, // vector of 2-tuples + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances); + +// C++ implementation of COCOeval.accumulate(), which generates precision +// recall curves for each set of category, IOU threshold, detection area range, +// and max number of detections parameters. It is assumed that the parameter +// evaluations is the return value of the functon COCOeval::EvaluateImages(), +// which was called with the same parameter settings params +py::dict Accumulate( + const py::object& params, + const std::vector& evalutations); + +} // namespace COCOeval +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cuda_version.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cuda_version.cu new file mode 100644 index 0000000000000000000000000000000000000000..6dfe1b90c1f65c443681813fd3e3386c9faa3360 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/cuda_version.cu @@ -0,0 +1,26 @@ +// Copyright (c) Facebook, Inc. and its affiliates. + +#include + +namespace detectron2 { +int get_cudart_version() { +// Not a ROCM platform: Either HIP is not used, or +// it is used, but platform is not ROCM (i.e. it is CUDA) +#if !defined(__HIP_PLATFORM_HCC__) + return CUDART_VERSION; +#else + int version = 0; + +#if HIP_VERSION_MAJOR != 0 + // Create a convention similar to that of CUDA, as assumed by other + // parts of the code. + + version = HIP_VERSION_MINOR; + version += (HIP_VERSION_MAJOR * 100); +#else + hipRuntimeGetVersion(&version); +#endif + return version; +#endif +} +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv.h new file mode 100644 index 0000000000000000000000000000000000000000..965c1bfd47b58f9802d1c3fd69a5962517b2da61 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv.h @@ -0,0 +1,377 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once +#include + +namespace detectron2 { + +#if defined(WITH_CUDA) || defined(WITH_HIP) +int deform_conv_forward_cuda( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step); + +int deform_conv_backward_input_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step); + +int deform_conv_backward_parameters_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step); + +void modulated_deform_conv_cuda_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias); + +void modulated_deform_conv_cuda_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias); + +#endif + +inline int deform_conv_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_forward_cuda( + input, + weight, + offset, + output, + columns, + ones, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + im2col_step); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + AT_ERROR("This operator is not implemented on CPU"); +} + +inline int deform_conv_backward_input( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + if (gradOutput.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_backward_input_cuda( + input, + offset, + gradOutput, + gradInput, + gradOffset, + weight, + columns, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + im2col_step); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + AT_ERROR("This operator is not implemented on CPU"); +} + +inline int deform_conv_backward_filter( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step) { + if (gradOutput.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_backward_parameters_cuda( + input, + offset, + gradOutput, + gradWeight, + columns, + ones, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + scale, + im2col_step); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + AT_ERROR("This operator is not implemented on CPU"); +} + +inline void modulated_deform_conv_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return modulated_deform_conv_cuda_forward( + input, + weight, + bias, + ones, + offset, + mask, + output, + columns, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group, + with_bias); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + AT_ERROR("This operator is not implemented on CPU"); +} + +inline void modulated_deform_conv_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias) { + if (grad_output.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return modulated_deform_conv_cuda_backward( + input, + weight, + bias, + ones, + offset, + mask, + columns, + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group, + with_bias); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + AT_ERROR("This operator is not implemented on CPU"); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..2072bb856ec40b61c3826cead2fb7bb7c971a089 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda.cu @@ -0,0 +1,1223 @@ +// Copyright (c) Facebook, Inc. and its affiliates. + +// modified from +// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp +// Original license: Apache 2.0 + +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c +// Original license: Apache 2.0 + +#include + +#include "deform_conv.h" + +#include +#include + +namespace detectron2 { + +void deformable_im2col( + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor data_col); + +void deformable_col2im( + const at::Tensor data_col, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_im); + +void deformable_col2im_coord( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_offset); + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor data_col); + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_im); + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, + at::Tensor grad_mask); + +void shape_check( + at::Tensor input, + at::Tensor offset, + at::Tensor* gradOutput, + at::Tensor weight, + int kH, + int kW, + int dH, + int dW, + int padH, + int padW, + int dilationH, + int dilationW, + int group, + int deformable_group) { + TORCH_CHECK( + weight.ndimension() == 4, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " + "but got: %s", + weight.ndimension()); + + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + TORCH_CHECK( + kW > 0 && kH > 0, + "kernel size should be greater than zero, but got kH: %d kW: %d", + kH, + kW); + + TORCH_CHECK( + (weight.size(2) == kH && weight.size(3) == kW), + "kernel size should be consistent with weight, ", + "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", + kH, + kW, + weight.size(2), + weight.size(3)); + + TORCH_CHECK( + dW > 0 && dH > 0, + "stride should be greater than zero, but got dH: %d dW: %d", + dH, + dW); + + TORCH_CHECK( + dilationW > 0 && dilationH > 0, + "dilation should be greater than 0, but got dilationH: %d dilationW: %d", + dilationH, + dilationW); + + int ndim = input.ndimension(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + TORCH_CHECK( + ndim == 3 || ndim == 4, + "3D or 4D input tensor expected but got: %s", + ndim); + + long nInputPlane = weight.size(1) * group; + long inputHeight = input.size(dimh); + long inputWidth = input.size(dimw); + long nOutputPlane = weight.size(0); + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + TORCH_CHECK( + nInputPlane % deformable_group == 0, + "input channels must divide deformable group size"); + + if (outputWidth < 1 || outputHeight < 1) + AT_ERROR( + "Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane, + inputHeight, + inputWidth, + nOutputPlane, + outputHeight, + outputWidth); + + TORCH_CHECK( + input.size(1) == nInputPlane, + "invalid number of input planes, expected: %d, but got: %d", + nInputPlane, + input.size(1)); + + TORCH_CHECK( + (inputHeight + 2 * padH >= kH && inputWidth + 2 * padW >= kW), + "input image is smaller than kernel"); + + TORCH_CHECK( + (offset.size(2) == outputHeight && offset.size(3) == outputWidth), + "invalid spatial size of offset, expected height: %d width: %d, but " + "got height: %d width: %d", + outputHeight, + outputWidth, + offset.size(2), + offset.size(3)); + + TORCH_CHECK( + (offset.size(1) == deformable_group * 2 * kH * kW), + "invalid number of channels of offset"); + + if (gradOutput != NULL) { + TORCH_CHECK( + gradOutput->size(dimf) == nOutputPlane, + "invalid number of gradOutput planes, expected: %d, but got: %d", + nOutputPlane, + gradOutput->size(dimf)); + + TORCH_CHECK( + (gradOutput->size(dimh) == outputHeight && + gradOutput->size(dimw) == outputWidth), + "invalid size of gradOutput, expected height: %d width: %d , but " + "got height: %d width: %d", + outputHeight, + outputWidth, + gradOutput->size(dimh), + gradOutput->size(dimw)); + } +} + +int deform_conv_forward_cuda( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + shape_check( + input, + offset, + NULL, + weight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input.unsqueeze_(0); + offset.unsqueeze_(0); + } + + // todo: assert batchsize dividable by im2col_step + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + output = output.view( + {batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < outputHeight * outputWidth) { + ones = at::ones({outputHeight, outputWidth}, input.options()); + } + + input = input.view( + {batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + offset = offset.view( + {batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + at::Tensor output_buffer = at::zeros( + {batchSize / im2col_step, + nOutputPlane, + im2col_step * outputHeight, + outputWidth}, + output.options()); + + output_buffer = output_buffer.view( + {output_buffer.size(0), + group, + output_buffer.size(1) / group, + output_buffer.size(2), + output_buffer.size(3)}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col( + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view( + {group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + + for (int g = 0; g < group; g++) { + output_buffer[elt][g] = output_buffer[elt][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output_buffer[elt][g]); + } + } + + output_buffer = output_buffer.view( + {output_buffer.size(0), + output_buffer.size(1) * output_buffer.size(2), + output_buffer.size(3), + output_buffer.size(4)}); + + output_buffer = output_buffer.view( + {batchSize / im2col_step, + nOutputPlane, + im2col_step, + outputHeight, + outputWidth}); + output_buffer.transpose_(1, 2); + output.copy_(output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_input_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + shape_check( + input, + offset, + &gradOutput, + weight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.size(0), input.size(1), input.size(2)}); + offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + // change order of grad output + gradOutput = gradOutput.view( + {batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + gradOutput.transpose_(1, 2); + + gradInput = gradInput.view( + {batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + input = input.view( + {batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + gradOffset = gradOffset.view( + {batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + offset = offset.view( + {batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view( + {group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), + group, + gradOutput.size(1) / group, + gradOutput.size(2), + gradOutput.size(3), + gradOutput.size(4)}); + + for (int g = 0; g < group; g++) { + columns[g] = columns[g].addmm_( + weight[g].flatten(1).transpose(0, 1), + gradOutput[elt][g].flatten(1), + 0.0f, + 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), + gradOutput.size(1) * gradOutput.size(2), + gradOutput.size(3), + gradOutput.size(4), + gradOutput.size(5)}); + + deformable_col2im_coord( + columns, + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + gradOffset[elt]); + + deformable_col2im( + columns, + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + gradInput[elt]); + } + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + gradOffset = + gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_parameters_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + shape_check( + input, + offset, + &gradOutput, + gradWeight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view( + at::IntList({1, input.size(0), input.size(1), input.size(2)})); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = gradWeight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + gradOutput = gradOutput.view( + {batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + gradOutput.transpose_(1, 2); + + at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); + gradOutputBuffer = gradOutputBuffer.view( + {batchSize / im2col_step, + nOutputPlane, + im2col_step, + outputHeight, + outputWidth}); + gradOutputBuffer.copy_(gradOutput); + // gradOutput is not contiguous, so we do reshape (instead of view) next + gradOutputBuffer = gradOutputBuffer.reshape( + {batchSize / im2col_step, + nOutputPlane, + im2col_step * outputHeight, + outputWidth}); + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view( + {batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + offset = offset.view( + {batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col( + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + columns); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + group, + gradOutputBuffer.size(1) / group, + gradOutputBuffer.size(2), + gradOutputBuffer.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + gradWeight = gradWeight.view( + {group, + gradWeight.size(0) / group, + gradWeight.size(1), + gradWeight.size(2), + gradWeight.size(3)}); + + for (int g = 0; g < group; g++) { + gradWeight[g] = gradWeight[g] + .flatten(1) + .addmm_( + gradOutputBuffer[elt][g].flatten(1), + columns[g].transpose(1, 0), + 1.0, + scale) + .view_as(gradWeight[g]); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + gradOutputBuffer.size(1) * gradOutputBuffer.size(2), + gradOutputBuffer.size(3), + gradOutputBuffer.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradWeight = gradWeight.view( + {gradWeight.size(0) * gradWeight.size(1), + gradWeight.size(2), + gradWeight.size(3), + gradWeight.size(4)}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } + + return 1; +} + +void modulated_deform_conv_cuda_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias) { + shape_check( + input, + offset, + NULL, + weight, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group); + + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR( + "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, + kernel_w, + kernel_h_, + kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR( + "Input shape and kernel channels wont match: (%d vs %d).", + channels, + channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + // mask shape check + TORCH_CHECK( + (mask.size(2) == height_out && mask.size(3) == width_out), + "invalid spatial size of mask, expected height: %d width: %d, but " + "got height: %d width: %d", + height_out, + width_out, + mask.size(2), + mask.size(3)); + + TORCH_CHECK( + (mask.size(1) == deformable_group * kernel_h * kernel_w), + "invalid number of channels of mask"); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}).zero_(); + // resize temporary columns + columns = at::zeros( + {channels * kernel_h * kernel_w, 1 * height_out * width_out}, + input.options()); + + output = output.view( + {output.size(0), + group, + output.size(1) / group, + output.size(2), + output.size(3)}); + + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + columns); + + // divide into group + weight = weight.view( + {group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + + for (int g = 0; g < group; g++) { + output[b][g] = output[b][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output[b][g]); + } + + weight = weight.view( + {weight.size(0) * weight.size(1), + weight.size(2), + weight.size(3), + weight.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output = output.view( + {output.size(0), + output.size(1) * output.size(2), + output.size(3), + output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } +} + +void modulated_deform_conv_cuda_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias) { + shape_check( + input, + offset, + &grad_output, + weight, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group); + + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR( + "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, + kernel_w, + kernel_h_, + kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR( + "Input shape and kernel channels wont match: (%d vs %d).", + channels, + channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + // mask shape check + TORCH_CHECK( + (mask.size(2) == height_out && mask.size(3) == width_out), + "invalid spatial size of mask, expected height: %d width: %d, but " + "got height: %d width: %d", + height_out, + width_out, + mask.size(2), + mask.size(3)); + + TORCH_CHECK( + (mask.size(1) == deformable_group * kernel_h * kernel_w), + "invalid number of channels of mask"); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = at::zeros( + {channels * kernel_h * kernel_w, height_out * width_out}, + input.options()); + + grad_output = grad_output.view( + {grad_output.size(0), + group, + grad_output.size(1) / group, + grad_output.size(2), + grad_output.size(3)}); + + for (int b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view( + {group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + + for (int g = 0; g < group; g++) { + columns[g].addmm_( + weight[g].flatten(1).transpose(0, 1), + grad_output[b][g].flatten(1), + 0.0f, + 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + weight = weight.view( + {weight.size(0) * weight.size(1), + weight.size(2), + weight.size(3), + weight.size(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + grad_offset[b], + grad_mask[b]); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + grad_input[b]); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + grad_weight = grad_weight.view( + {group, + grad_weight.size(0) / group, + grad_weight.size(1), + grad_weight.size(2), + grad_weight.size(3)}); + if (with_bias) + grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); + + for (int g = 0; g < group; g++) { + grad_weight[g] = + grad_weight[g] + .flatten(1) + .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) + .view_as(grad_weight[g]); + if (with_bias) { + grad_bias[g] = + grad_bias[g] + .view({-1, 1}) + .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) + .view(-1); + } + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + grad_weight = grad_weight.view( + {grad_weight.size(0) * grad_weight.size(1), + grad_weight.size(2), + grad_weight.size(3), + grad_weight.size(4)}); + if (with_bias) + grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); + } + grad_output = grad_output.view( + {grad_output.size(0) * grad_output.size(1), + grad_output.size(2), + grad_output.size(3), + grad_output.size(4)}); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f299c7add116685e9c87a187a85ea63f9f808867 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu @@ -0,0 +1,1288 @@ +// Copyright (c) Facebook, Inc. and its affiliates. + +// modified from +// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu +// Original license: Apache 2.0 +// clang-format off + +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +#include +#include +#include +#include +#include +#include + +using namespace at; + +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + + +namespace { + +const int CUDA_NUM_THREADS = 1024; +const int kMaxGridNum = 65535; + +inline int GET_BLOCKS(const int N) { + return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); +} + +} + +template +__device__ scalar_t deformable_im2col_bilinear( + const scalar_t* bottom_data, + const int data_width, + const int height, + const int width, + scalar_t h, + scalar_t w) { + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t get_gradient_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int h, + const int w, + const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t get_coordinate_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int height, + const int width, + const scalar_t* im_data, + const int data_width, + const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void deformable_im2col_gpu_kernel( + const int n, + const scalar_t* data_im, + const scalar_t* data_offset, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int num_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* data_col) { + CUDA_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + scalar_t* data_col_ptr = data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * + // height + h_in) * width + w_in; + const scalar_t* data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { + // const scalar_t map_h = i * dilation_h + offset_h; + // const scalar_t map_w = j * dilation_w + offset_w; + // const int cur_height = height - h_in; + // const int cur_width = width - w_in; + // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, + // cur_width, map_h, map_w); + val = deformable_im2col_bilinear( + data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + + +template +__global__ void deformable_col2im_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_offset, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_im) { + CUDA_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index]; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = get_gradient_weight( + cur_inv_h_data, + cur_inv_w_data, + cur_h + dy, + cur_w + dx, + height, + width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + + +template +__global__ void deformable_col2im_coord_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_im, + const scalar_t* data_offset, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int offset_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_offset) { + CUDA_KERNEL_LOOP(index, n) { + scalar_t val = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t* data_col_ptr = data_col + + deformable_group_index * channel_per_deformable_group * batch_size * + width_col * height_col; + const scalar_t* data_im_ptr = data_im + + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { + inv_h = inv_w = -2; + } + const scalar_t weight = get_coordinate_weight( + inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); + val += weight * data_col_ptr[col_pos]; + cnt += 1; + } + + grad_offset[index] = val; + } +} + + +namespace detectron2 { + +void deformable_im2col( + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor data_col) { + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + at::cuda::CUDAGuard device_guard(data_im.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "deformable_im2col_gpu", ([&] { + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* data_col_ = data_col.data_ptr(); + + deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_im_, + data_offset_, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + channels, + deformable_group, + height_col, + width_col, + data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); + } +} + + +void deformable_col2im( + const at::Tensor data_col, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_im) { + // todo: make sure parallel_imgs is passed in correctly + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = + channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* grad_im_ = grad_im.data_ptr(); + + deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_offset_, + channels, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + deformable_group, + height_col, + width_col, + grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); + } +} + + +void deformable_col2im_coord( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_offset) { + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * + deformable_group * parallel_imgs; + int channel_per_deformable_group = + channels * ksize_h * ksize_w / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* grad_offset_ = grad_offset.data_ptr(); + + deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_im_, + data_offset_, + channels, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + 2 * ksize_h * ksize_w * deformable_group, + deformable_group, + height_col, + width_col, + grad_offset_); + })); +} + +} // namespace detectron2 + + +template +__device__ scalar_t dmcn_im2col_bilinear( + const scalar_t* bottom_data, + const int data_width, + const int height, + const int width, + scalar_t h, + scalar_t w) { + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t dmcn_get_gradient_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int h, + const int w, + const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t dmcn_get_coordinate_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int height, + const int width, + const scalar_t* im_data, + const int data_width, + const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void modulated_deformable_im2col_gpu_kernel( + const int n, + const scalar_t* data_im, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int num_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* data_col) { + CUDA_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + + scalar_t* data_col_ptr = data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * + // height + h_in) * width + w_in; + const scalar_t* data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + const scalar_t* data_mask_ptr = data_mask + + (b_col * deformable_group + deformable_group_index) * kernel_h * + kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { + // const float map_h = i * dilation_h + offset_h; + // const float map_w = j * dilation_w + offset_w; + // const int cur_height = height - h_in; + // const int cur_width = width - w_in; + // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, + // cur_width, map_h, map_w); + val = dmcn_im2col_bilinear( + data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val * mask; + data_col_ptr += batch_size * height_col * width_col; + // data_col_ptr += height_col * width_col; + } + } + } +} + +template +__global__ void modulated_deformable_col2im_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_im) { + CUDA_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const scalar_t* data_mask_ptr = data_mask + + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * + height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index] * mask; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = dmcn_get_gradient_weight( + cur_inv_h_data, + cur_inv_w_data, + cur_h + dy, + cur_w + dx, + height, + width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void modulated_deformable_col2im_coord_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_im, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int offset_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_offset, + scalar_t* grad_mask) { + CUDA_KERNEL_LOOP(index, n) { + scalar_t val = 0, mval = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t* data_col_ptr = data_col + + deformable_group_index * channel_per_deformable_group * batch_size * + width_col * height_col; + const scalar_t* data_im_ptr = data_im + + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const scalar_t* data_mask_ptr = data_mask + + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * + height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const int data_mask_hw_ptr = + (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { + inv_h = inv_w = -2; + } else { + mval += data_col_ptr[col_pos] * + dmcn_im2col_bilinear( + data_im_ptr + cnt * height * width, + width, + height, + width, + inv_h, + inv_w); + } + const scalar_t weight = dmcn_get_coordinate_weight( + inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); + val += weight * data_col_ptr[col_pos] * mask; + cnt += 1; + } + // KERNEL_ASSIGN(grad_offset[index], offset_req, val); + grad_offset[index] = val; + if (offset_c % 2 == 0) + // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + + // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * + // height_col + h) * width_col + w], mask_req, mval); + grad_mask + [(((b * deformable_group + deformable_group_index) * kernel_h * + kernel_w + + offset_c / 2) * + height_col + + h) * + width_col + + w] = mval; + } +} + + +namespace detectron2 { + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor data_col) { + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + at::cuda::CUDAGuard device_guard(data_im.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* data_col_ = data_col.data_ptr(); + + modulated_deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_im_, + data_offset_, + data_mask_, + height_im, + width_im, + kernel_h, + kenerl_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + channels, + deformable_group, + height_col, + width_col, + data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_im2col_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_im) { + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = + channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* grad_im_ = grad_im.data_ptr(); + + modulated_deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_offset_, + data_mask_, + channels, + height_im, + width_im, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + deformable_group, + height_col, + width_col, + grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_col2im_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, + at::Tensor grad_mask) { + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * + kernel_w * deformable_group; + const int channel_per_deformable_group = + channels * kernel_h * kernel_w / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* grad_offset_ = grad_offset.data_ptr(); + scalar_t* grad_mask_ = grad_mask.data_ptr(); + + modulated_deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_im_, + data_offset_, + data_mask_, + channels, + height_im, + width_im, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + 2 * kernel_h * kernel_w * deformable_group, + deformable_group, + height_col, + width_col, + grad_offset_, + grad_mask_); + })); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_col2im_coord_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated.h new file mode 100644 index 0000000000000000000000000000000000000000..12aca388e47b12dafd20999f2991a9d42f4b904b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated.h @@ -0,0 +1,39 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#pragma once +#include + +namespace detectron2 { + +at::Tensor nms_rotated_cpu( + const at::Tensor& dets, + const at::Tensor& scores, + const double iou_threshold); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor nms_rotated_cuda( + const at::Tensor& dets, + const at::Tensor& scores, + const double iou_threshold); +#endif + +// Interface for Python +// inline is needed to prevent multiple function definitions when this header is +// included by different cpps +inline at::Tensor nms_rotated( + const at::Tensor& dets, + const at::Tensor& scores, + const double iou_threshold) { + assert(dets.device().is_cuda() == scores.device().is_cuda()); + if (dets.device().is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return nms_rotated_cuda( + dets.contiguous(), scores.contiguous(), iou_threshold); +#else + AT_ERROR("Detectron2 is not compiled with GPU support!"); +#endif + } + + return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d7556e645b604aa83d86cc702b783fd8ecedffcc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp @@ -0,0 +1,75 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include "../box_iou_rotated/box_iou_rotated_utils.h" +#include "nms_rotated.h" + +namespace detectron2 { + +template +at::Tensor nms_rotated_cpu_kernel( + const at::Tensor& dets, + const at::Tensor& scores, + const double iou_threshold) { + // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, + // however, the code in this function is much shorter because + // we delegate the IoU computation for rotated boxes to + // the single_box_iou_rotated function in box_iou_rotated_utils.h + AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); + AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); + AT_ASSERTM( + dets.scalar_type() == scores.scalar_type(), + "dets should have the same type as scores"); + + if (dets.numel() == 0) { + return at::empty({0}, dets.options().dtype(at::kLong)); + } + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + + auto ndets = dets.size(0); + at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); + at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); + + auto suppressed = suppressed_t.data_ptr(); + auto keep = keep_t.data_ptr(); + auto order = order_t.data_ptr(); + + int64_t num_to_keep = 0; + + for (int64_t _i = 0; _i < ndets; _i++) { + auto i = order[_i]; + if (suppressed[i] == 1) { + continue; + } + + keep[num_to_keep++] = i; + + for (int64_t _j = _i + 1; _j < ndets; _j++) { + auto j = order[_j]; + if (suppressed[j] == 1) { + continue; + } + + auto ovr = single_box_iou_rotated( + dets[i].data_ptr(), dets[j].data_ptr()); + if (ovr >= iou_threshold) { + suppressed[j] = 1; + } + } + } + return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); +} + +at::Tensor nms_rotated_cpu( + // input must be contiguous + const at::Tensor& dets, + const at::Tensor& scores, + const double iou_threshold) { + auto result = at::empty({0}, dets.options()); + + AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { + result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); + }); + return result; +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..2a3db5c62e7a2da52ccf5bac980653c943d630fd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu @@ -0,0 +1,145 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +#include +#include +#include +#include +#ifdef WITH_CUDA +#include "../box_iou_rotated/box_iou_rotated_utils.h" +#endif +// TODO avoid this when pytorch supports "same directory" hipification +#ifdef WITH_HIP +#include "box_iou_rotated/box_iou_rotated_utils.h" +#endif + +using namespace detectron2; + +namespace { +int const threadsPerBlock = sizeof(unsigned long long) * 8; +} + +template +__global__ void nms_rotated_cuda_kernel( + const int n_boxes, + const double iou_threshold, + const T* dev_boxes, + unsigned long long* dev_mask) { + // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + // Compared to nms_cuda_kernel, where each box is represented with 4 values + // (x1, y1, x2, y2), each rotated box is represented with 5 values + // (x_center, y_center, width, height, angle_degrees) here. + __shared__ T block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const T* cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + // Instead of devIoU used by original horizontal nms, here + // we use the single_box_iou_rotated function from box_iou_rotated_utils.h + if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > + iou_threshold) { + t |= 1ULL << i; + } + } + const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +namespace detectron2 { + +at::Tensor nms_rotated_cuda( + // input must be contiguous + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold) { + // using scalar_t = float; + AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); + AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(dets.device()); + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + auto dets_sorted = dets.index_select(0, order_t); + + auto dets_num = dets.size(0); + + const int col_blocks = + at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock); + + at::Tensor mask = + at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); + + dim3 blocks(col_blocks, col_blocks); + dim3 threads(threadsPerBlock); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES( + dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { + nms_rotated_cuda_kernel<<>>( + dets_num, + iou_threshold, + dets_sorted.data_ptr(), + (unsigned long long*)mask.data_ptr()); + }); + + at::Tensor mask_cpu = mask.to(at::kCPU); + unsigned long long* mask_host = + (unsigned long long*)mask_cpu.data_ptr(); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + at::Tensor keep = + at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); + int64_t* keep_out = keep.data_ptr(); + + int num_to_keep = 0; + for (int i = 0; i < dets_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long* p = mask_host + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + + AT_CUDA_CHECK(cudaGetLastError()); + return order_t.index( + {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) + .to(order_t.device(), keep.scalar_type())}); +} + +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/vision.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c9a2cd4f20e6f58be1c5783d67c64232dd59b560 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/csrc/vision.cpp @@ -0,0 +1,117 @@ +// Copyright (c) Facebook, Inc. and its affiliates. + +#include +#include "ROIAlignRotated/ROIAlignRotated.h" +#include "box_iou_rotated/box_iou_rotated.h" +#include "cocoeval/cocoeval.h" +#include "deformable/deform_conv.h" +#include "nms_rotated/nms_rotated.h" + +namespace detectron2 { + +#if defined(WITH_CUDA) || defined(WITH_HIP) +extern int get_cudart_version(); +#endif + +std::string get_cuda_version() { +#if defined(WITH_CUDA) || defined(WITH_HIP) + std::ostringstream oss; + +#if defined(WITH_CUDA) + oss << "CUDA "; +#else + oss << "HIP "; +#endif + + // copied from + // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 + auto printCudaStyleVersion = [&](int v) { + oss << (v / 1000) << "." << (v / 10 % 100); + if (v % 10 != 0) { + oss << "." << (v % 10); + } + }; + printCudaStyleVersion(get_cudart_version()); + return oss.str(); +#else // neither CUDA nor HIP + return std::string("not available"); +#endif +} + +bool has_cuda() { +#if defined(WITH_CUDA) + return true; +#else + return false; +#endif +} + +// similar to +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp +std::string get_compiler_version() { + std::ostringstream ss; +#if defined(__GNUC__) +#ifndef __clang__ + +#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8)) +#error "GCC >= 4.9 is required!" +#endif + + { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } +#endif +#endif + +#if defined(__clang_major__) + { + ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." + << __clang_patchlevel__; + } +#endif + +#if defined(_MSC_VER) + { ss << "MSVC " << _MSC_FULL_VER; } +#endif + return ss.str(); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); + m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); + m.def("has_cuda", &has_cuda, "has_cuda"); + + m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); + m.def( + "deform_conv_backward_input", + &deform_conv_backward_input, + "deform_conv_backward_input"); + m.def( + "deform_conv_backward_filter", + &deform_conv_backward_filter, + "deform_conv_backward_filter"); + m.def( + "modulated_deform_conv_forward", + &modulated_deform_conv_forward, + "modulated_deform_conv_forward"); + m.def( + "modulated_deform_conv_backward", + &modulated_deform_conv_backward, + "modulated_deform_conv_backward"); + + m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); + m.def( + "COCOevalEvaluateImages", + &COCOeval::EvaluateImages, + "COCOeval::EvaluateImages"); + pybind11::class_(m, "InstanceAnnotation") + .def(pybind11::init()); + pybind11::class_(m, "ImageEvaluation") + .def(pybind11::init<>()); +} + +TORCH_LIBRARY(detectron2, m) { + m.def("nms_rotated", &nms_rotated); + m.def("box_iou_rotated", &box_iou_rotated); + m.def("roi_align_rotated_forward", &ROIAlignRotated_forward); + m.def("roi_align_rotated_backward", &ROIAlignRotated_backward); +} +} // namespace detectron2 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/deform_conv.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..49547238bcc67775bbeb97184467c3c8eee8df60 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/deform_conv.py @@ -0,0 +1,514 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +from functools import lru_cache +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair +from torchvision.ops import deform_conv2d + +from annotator.oneformer.detectron2.utils.develop import create_dummy_class, create_dummy_func + +from .wrappers import _NewEmptyTensorOp + + +class _DeformConv(Function): + @staticmethod + def forward( + ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + im2col_step=64, + ): + if input is not None and input.dim() != 4: + raise ValueError( + "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()) + ) + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.im2col_step = im2col_step + + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride) + ) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + if not input.is_cuda: + # TODO: let torchvision support full features of our deformconv. + if deformable_groups != 1: + raise NotImplementedError( + "Deformable Conv with deformable_groups != 1 is not supported on CPUs!" + ) + return deform_conv2d( + input, offset, weight, stride=stride, padding=padding, dilation=dilation + ) + else: + cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) + assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" + + _C.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + cur_im2col_step, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + if not grad_output.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + else: + cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) + assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" + + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + _C.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + cur_im2col_step, + ) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + _C.deform_conv_backward_filter( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + 1, + cur_im2col_step, + ) + + return grad_input, grad_offset, grad_weight, None, None, None, None, None, None + + @staticmethod + def _output_size(input, weight, padding, dilation, stride): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = padding[d] + kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + "convolution input is too small (output would be {})".format( + "x".join(map(str, output_size)) + ) + ) + return output_size + + @staticmethod + @lru_cache(maxsize=128) + def _cal_im2col_step(input_size, default_size): + """ + Calculate proper im2col step size, which should be divisible by input_size and not larger + than prefer_size. Meanwhile the step size should be as large as possible to be more + efficient. So we choose the largest one among all divisors of input_size which are smaller + than prefer_size. + :param input_size: input batch size . + :param default_size: default preferred im2col step size. + :return: the largest proper step size. + """ + if input_size <= default_size: + return input_size + best_step = 1 + for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)): + if input_size % step == 0: + if input_size // step <= default_size: + return input_size // step + best_step = step + + return best_step + + +class _ModulatedDeformConv(Function): + @staticmethod + def forward( + ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + ): + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(1) # fake tensor + if not input.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + if ( + weight.requires_grad + or mask.requires_grad + or offset.requires_grad + or input.requires_grad + ): + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + _C.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + weight.shape[2], + weight.shape[3], + ctx.stride, + ctx.stride, + ctx.padding, + ctx.padding, + ctx.dilation, + ctx.dilation, + ctx.groups, + ctx.deformable_groups, + ctx.with_bias, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + if not grad_output.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + _C.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + weight.shape[2], + weight.shape[3], + ctx.stride, + ctx.stride, + ctx.padding, + ctx.padding, + ctx.dilation, + ctx.dilation, + ctx.groups, + ctx.deformable_groups, + ctx.with_bias, + ) + if not ctx.with_bias: + grad_bias = None + + return ( + grad_input, + grad_offset, + grad_mask, + grad_weight, + grad_bias, + None, + None, + None, + None, + None, + ) + + @staticmethod + def _infer_shape(ctx, input, weight): + n = input.size(0) + channels_out = weight.size(0) + height, width = input.shape[2:4] + kernel_h, kernel_w = weight.shape[2:4] + height_out = ( + height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1) + ) // ctx.stride + 1 + width_out = ( + width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1) + ) // ctx.stride + 1 + return n, channels_out, height_out, width_out + + +deform_conv = _DeformConv.apply +modulated_deform_conv = _ModulatedDeformConv.apply + + +class DeformConv(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=False, + norm=None, + activation=None, + ): + """ + Deformable convolution from :paper:`deformconv`. + + Arguments are similar to :class:`Conv2D`. Extra arguments: + + Args: + deformable_groups (int): number of groups used in deformable convolution. + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + """ + super(DeformConv, self).__init__() + + assert not bias + assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format( + in_channels, groups + ) + assert ( + out_channels % groups == 0 + ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups) + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deformable_groups = deformable_groups + self.norm = norm + self.activation = activation + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size) + ) + self.bias = None + + nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") + + def forward(self, x, offset): + if x.numel() == 0: + # When input is empty, we want to return a empty tensor with "correct" shape, + # So that the following operations will not panic + # if they check for the shape of the tensor. + # This computes the height and width of the output tensor + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + x = deform_conv( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deformable_groups, + ) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + def extra_repr(self): + tmpstr = "in_channels=" + str(self.in_channels) + tmpstr += ", out_channels=" + str(self.out_channels) + tmpstr += ", kernel_size=" + str(self.kernel_size) + tmpstr += ", stride=" + str(self.stride) + tmpstr += ", padding=" + str(self.padding) + tmpstr += ", dilation=" + str(self.dilation) + tmpstr += ", groups=" + str(self.groups) + tmpstr += ", deformable_groups=" + str(self.deformable_groups) + tmpstr += ", bias=False" + return tmpstr + + +class ModulatedDeformConv(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + norm=None, + activation=None, + ): + """ + Modulated deformable convolution from :paper:`deformconv2`. + + Arguments are similar to :class:`Conv2D`. Extra arguments: + + Args: + deformable_groups (int): number of groups used in deformable convolution. + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + """ + super(ModulatedDeformConv, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.norm = norm + self.activation = activation + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) + ) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = None + + nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") + if self.bias is not None: + nn.init.constant_(self.bias, 0) + + def forward(self, x, offset, mask): + if x.numel() == 0: + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + x = modulated_deform_conv( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deformable_groups, + ) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + def extra_repr(self): + tmpstr = "in_channels=" + str(self.in_channels) + tmpstr += ", out_channels=" + str(self.out_channels) + tmpstr += ", kernel_size=" + str(self.kernel_size) + tmpstr += ", stride=" + str(self.stride) + tmpstr += ", padding=" + str(self.padding) + tmpstr += ", dilation=" + str(self.dilation) + tmpstr += ", groups=" + str(self.groups) + tmpstr += ", deformable_groups=" + str(self.deformable_groups) + tmpstr += ", bias=" + str(self.with_bias) + return tmpstr + + +try: + from annotator.oneformer.detectron2 import _C +except ImportError: + # TODO: register ops natively so there is no need to import _C. + _msg = "detectron2 is not compiled successfully, please build following the instructions!" + _args = ("detectron2._C", _msg) + DeformConv = create_dummy_class("DeformConv", *_args) + ModulatedDeformConv = create_dummy_class("ModulatedDeformConv", *_args) + deform_conv = create_dummy_func("deform_conv", *_args) + modulated_deform_conv = create_dummy_func("modulated_deform_conv", *_args) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/losses.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..850a852a2f0986d4d1ce89a526d96db42c76e44f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/losses.py @@ -0,0 +1,133 @@ +import math +import torch + + +def diou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + """ + Distance Intersection over Union Loss (Zhaohui Zheng et. al) + https://arxiv.org/abs/1911.08287 + Args: + boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,). + reduction: 'none' | 'mean' | 'sum' + 'none': No reduction will be applied to the output. + 'mean': The output will be averaged. + 'sum': The output will be summed. + eps (float): small number to prevent division by zero + """ + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # TODO: use torch._assert_async() when pytorch 1.8 support is dropped + assert (x2 >= x1).all(), "bad box: x1 larger than x2" + assert (y2 >= y1).all(), "bad box: y1 larger than y2" + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsct = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps + iou = intsct / union + + # smallest enclosing box + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps + + # centers of boxes + x_p = (x2 + x1) / 2 + y_p = (y2 + y1) / 2 + x_g = (x1g + x2g) / 2 + y_g = (y1g + y2g) / 2 + distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) + + # Eqn. (7) + loss = 1 - iou + (distance / diag_len) + if reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + + return loss + + +def ciou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + """ + Complete Intersection over Union Loss (Zhaohui Zheng et. al) + https://arxiv.org/abs/1911.08287 + Args: + boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,). + reduction: 'none' | 'mean' | 'sum' + 'none': No reduction will be applied to the output. + 'mean': The output will be averaged. + 'sum': The output will be summed. + eps (float): small number to prevent division by zero + """ + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # TODO: use torch._assert_async() when pytorch 1.8 support is dropped + assert (x2 >= x1).all(), "bad box: x1 larger than x2" + assert (y2 >= y1).all(), "bad box: y1 larger than y2" + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsct = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps + iou = intsct / union + + # smallest enclosing box + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps + + # centers of boxes + x_p = (x2 + x1) / 2 + y_p = (y2 + y1) / 2 + x_g = (x1g + x2g) / 2 + y_g = (y1g + y2g) / 2 + distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) + + # width and height of boxes + w_pred = x2 - x1 + h_pred = y2 - y1 + w_gt = x2g - x1g + h_gt = y2g - y1g + v = (4 / (math.pi**2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) + with torch.no_grad(): + alpha = v / (1 - iou + v + eps) + + # Eqn. (10) + loss = 1 - iou + (distance / diag_len) + alpha * v + if reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + + return loss diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/mask_ops.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/mask_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..990d04abbb120e40fe07a21d024dfead471bc998 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/mask_ops.py @@ -0,0 +1,275 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from typing import Tuple +import torch +from PIL import Image +from torch.nn import functional as F + +__all__ = ["paste_masks_in_image"] + + +BYTES_PER_FLOAT = 4 +# TODO: This memory limit may be too much or too little. It would be better to +# determine it based on available resources. +GPU_MEM_LIMIT = 1024**3 # 1 GB memory limit + + +def _do_paste_mask(masks, boxes, img_h: int, img_w: int, skip_empty: bool = True): + """ + Args: + masks: N, 1, H, W + boxes: N, 4 + img_h, img_w (int): + skip_empty (bool): only paste masks within the region that + tightly bound all boxes, and returns the results this region only. + An important optimization for CPU. + + Returns: + if skip_empty == False, a mask of shape (N, img_h, img_w) + if skip_empty == True, a mask of shape (N, h', w'), and the slice + object for the corresponding region. + """ + # On GPU, paste all masks together (up to chunk size) + # by using the entire image to sample the masks + # Compared to pasting them one by one, + # this has more operations but is faster on COCO-scale dataset. + device = masks.device + + if skip_empty and not torch.jit.is_scripting(): + x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to( + dtype=torch.int32 + ) + x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) + y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) + else: + x0_int, y0_int = 0, 0 + x1_int, y1_int = img_w, img_h + x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 + + N = masks.shape[0] + + img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5 + img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5 + img_y = (img_y - y0) / (y1 - y0) * 2 - 1 + img_x = (img_x - x0) / (x1 - x0) * 2 - 1 + # img_x, img_y have shapes (N, w), (N, h) + + gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) + gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) + grid = torch.stack([gx, gy], dim=3) + + if not torch.jit.is_scripting(): + if not masks.dtype.is_floating_point: + masks = masks.float() + img_masks = F.grid_sample(masks, grid.to(masks.dtype), align_corners=False) + + if skip_empty and not torch.jit.is_scripting(): + return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) + else: + return img_masks[:, 0], () + + +# Annotate boxes as Tensor (but not Boxes) in order to use scripting +@torch.jit.script_if_tracing +def paste_masks_in_image( + masks: torch.Tensor, boxes: torch.Tensor, image_shape: Tuple[int, int], threshold: float = 0.5 +): + """ + Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image. + The location, height, and width for pasting each mask is determined by their + corresponding bounding boxes in boxes. + + Note: + This is a complicated but more accurate implementation. In actual deployment, it is + often enough to use a faster but less accurate implementation. + See :func:`paste_mask_in_image_old` in this file for an alternative implementation. + + Args: + masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of + detected object instances in the image and Hmask, Wmask are the mask width and mask + height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1]. + boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4). + boxes[i] and masks[i] correspond to the same object instance. + image_shape (tuple): height, width + threshold (float): A threshold in [0, 1] for converting the (soft) masks to + binary masks. + + Returns: + img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the + number of detected object instances and Himage, Wimage are the image width + and height. img_masks[i] is a binary mask for object instance i. + """ + + assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported" + N = len(masks) + if N == 0: + return masks.new_empty((0,) + image_shape, dtype=torch.uint8) + if not isinstance(boxes, torch.Tensor): + boxes = boxes.tensor + device = boxes.device + assert len(boxes) == N, boxes.shape + + img_h, img_w = image_shape + + # The actual implementation split the input into chunks, + # and paste them chunk by chunk. + if device.type == "cpu" or torch.jit.is_scripting(): + # CPU is most efficient when they are pasted one by one with skip_empty=True + # so that it performs minimal number of operations. + num_chunks = N + else: + # GPU benefits from parallelism for larger chunks, but may have memory issue + # int(img_h) because shape may be tensors in tracing + num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT)) + assert ( + num_chunks <= N + ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it" + chunks = torch.chunk(torch.arange(N, device=device), num_chunks) + + img_masks = torch.zeros( + N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8 + ) + for inds in chunks: + masks_chunk, spatial_inds = _do_paste_mask( + masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu" + ) + + if threshold >= 0: + masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool) + else: + # for visualization and debugging + masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8) + + if torch.jit.is_scripting(): # Scripting does not use the optimized codepath + img_masks[inds] = masks_chunk + else: + img_masks[(inds,) + spatial_inds] = masks_chunk + return img_masks + + +# The below are the original paste function (from Detectron1) which has +# larger quantization error. +# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample. + + +def paste_mask_in_image_old(mask, box, img_h, img_w, threshold): + """ + Paste a single mask in an image. + This is a per-box implementation of :func:`paste_masks_in_image`. + This function has larger quantization error due to incorrect pixel + modeling and is not used any more. + + Args: + mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single + object instance. Values are in [0, 1]. + box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners + of the object instance. + img_h, img_w (int): Image height and width. + threshold (float): Mask binarization threshold in [0, 1]. + + Returns: + im_mask (Tensor): + The resized and binarized object mask pasted into the original + image plane (a tensor of shape (img_h, img_w)). + """ + # Conversion from continuous box coordinates to discrete pixel coordinates + # via truncation (cast to int32). This determines which pixels to paste the + # mask onto. + box = box.to(dtype=torch.int32) # Continuous to discrete coordinate conversion + # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to + # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1 + # pixels (not x1 - x0 pixels). + samples_w = box[2] - box[0] + 1 # Number of pixel samples, *not* geometric width + samples_h = box[3] - box[1] + 1 # Number of pixel samples, *not* geometric height + + # Resample the mask from it's original grid to the new samples_w x samples_h grid + mask = Image.fromarray(mask.cpu().numpy()) + mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR) + mask = np.array(mask, copy=False) + + if threshold >= 0: + mask = np.array(mask > threshold, dtype=np.uint8) + mask = torch.from_numpy(mask) + else: + # for visualization and debugging, we also + # allow it to return an unmodified mask + mask = torch.from_numpy(mask * 255).to(torch.uint8) + + im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8) + x_0 = max(box[0], 0) + x_1 = min(box[2] + 1, img_w) + y_0 = max(box[1], 0) + y_1 = min(box[3] + 1, img_h) + + im_mask[y_0:y_1, x_0:x_1] = mask[ + (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0]) + ] + return im_mask + + +# Our pixel modeling requires extrapolation for any continuous +# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks, +# we would like this extrapolation to be an interpolation between boundary values and zero, +# instead of using absolute zero or boundary values. +# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this: +# masks, scale = pad_masks(masks[:, 0, :, :], 1) +# boxes = scale_boxes(boxes.tensor, scale) + + +def pad_masks(masks, padding): + """ + Args: + masks (tensor): A tensor of shape (B, M, M) representing B masks. + padding (int): Number of cells to pad on all sides. + + Returns: + The padded masks and the scale factor of the padding size / original size. + """ + B = masks.shape[0] + M = masks.shape[-1] + pad2 = 2 * padding + scale = float(M + pad2) / M + padded_masks = masks.new_zeros((B, M + pad2, M + pad2)) + padded_masks[:, padding:-padding, padding:-padding] = masks + return padded_masks, scale + + +def scale_boxes(boxes, scale): + """ + Args: + boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4 + coords representing the corners x0, y0, x1, y1, + scale (float): The box scaling factor. + + Returns: + Scaled boxes. + """ + w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 + h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 + x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 + y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 + + w_half *= scale + h_half *= scale + + scaled_boxes = torch.zeros_like(boxes) + scaled_boxes[:, 0] = x_c - w_half + scaled_boxes[:, 2] = x_c + w_half + scaled_boxes[:, 1] = y_c - h_half + scaled_boxes[:, 3] = y_c + h_half + return scaled_boxes + + +@torch.jit.script_if_tracing +def _paste_masks_tensor_shape( + masks: torch.Tensor, + boxes: torch.Tensor, + image_shape: Tuple[torch.Tensor, torch.Tensor], + threshold: float = 0.5, +): + """ + A wrapper of paste_masks_in_image where image_shape is Tensor. + During tracing, shapes might be tensors instead of ints. The Tensor->int + conversion should be scripted rather than traced. + """ + return paste_masks_in_image(masks, boxes, (int(image_shape[0]), int(image_shape[1])), threshold) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/nms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..1019e7f4c8c58f2def34a019e4c3a0573c5f69bb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/nms.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import torch +from torchvision.ops import boxes as box_ops +from torchvision.ops import nms # noqa . for compatibility + + +def batched_nms( + boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float +): + """ + Same as torchvision.ops.boxes.batched_nms, but with float(). + """ + assert boxes.shape[-1] == 4 + # Note: Torchvision already has a strategy (https://github.com/pytorch/vision/issues/1311) + # to decide whether to use coordinate trick or for loop to implement batched_nms. So we + # just call it directly. + # Fp16 does not have enough range for batched NMS, so adding float(). + return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold) + + +# Note: this function (nms_rotated) might be moved into +# torchvision/ops/boxes.py in the future +def nms_rotated(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float): + """ + Performs non-maximum suppression (NMS) on the rotated boxes according + to their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as + RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they + can be representing completely different objects in certain tasks, e.g., OCR. + + As for the question of whether rotated-NMS should treat them as faraway boxes + even though their IOU is 1, it depends on the application and/or ground truth annotation. + + As an extreme example, consider a single character v and the square box around it. + + If the angle is 0 degree, the object (text) would be read as 'v'; + + If the angle is 90 degrees, the object (text) would become '>'; + + If the angle is 180 degrees, the object (text) would become '^'; + + If the angle is 270/-90 degrees, the object (text) would become '<' + + All of these cases have IoU of 1 to each other, and rotated NMS that only + uses IoU as criterion would only keep one of them with the highest score - + which, practically, still makes sense in most cases because typically + only one of theses orientations is the correct one. Also, it does not matter + as much if the box is only used to classify the object (instead of transcribing + them with a sequential OCR recognition model) later. + + On the other hand, when we use IoU to filter proposals that are close to the + ground truth during training, we should definitely take the angle into account if + we know the ground truth is labeled with the strictly correct orientation (as in, + upside-down words are annotated with -180 degrees even though they can be covered + with a 0/90/-90 degree box, etc.) + + The way the original dataset is annotated also matters. For example, if the dataset + is a 4-point polygon dataset that does not enforce ordering of vertices/orientation, + we can estimate a minimum rotated bounding box to this polygon, but there's no way + we can tell the correct angle with 100% confidence (as shown above, there could be 4 different + rotated boxes, with angles differed by 90 degrees to each other, covering the exactly + same region). In that case we have to just use IoU to determine the box + proximity (as many detection benchmarks (even for text) do) unless there're other + assumptions we can make (like width is always larger than height, or the object is not + rotated by more than 90 degrees CCW/CW, etc.) + + In summary, not considering angles in rotated NMS seems to be a good option for now, + but we should be aware of its implications. + + Args: + boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in + (x_center, y_center, width, height, angle_degrees) format. + scores (Tensor[N]): Scores for each one of the rotated boxes + iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold + + Returns: + keep (Tensor): int64 tensor with the indices of the elements that have been kept + by Rotated NMS, sorted in decreasing order of scores + """ + return torch.ops.detectron2.nms_rotated(boxes, scores, iou_threshold) + + +# Note: this function (batched_nms_rotated) might be moved into +# torchvision/ops/boxes.py in the future + + +@torch.jit.script_if_tracing +def batched_nms_rotated( + boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float +): + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Args: + boxes (Tensor[N, 5]): + boxes where NMS will be performed. They + are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format + scores (Tensor[N]): + scores for each one of the boxes + idxs (Tensor[N]): + indices of the categories for each one of the boxes. + iou_threshold (float): + discards all overlapping boxes + with IoU < iou_threshold + + Returns: + Tensor: + int64 tensor with the indices of the elements that have been kept + by NMS, sorted in decreasing order of scores + """ + assert boxes.shape[-1] == 5 + + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + boxes = boxes.float() # fp16 does not have enough range for batched NMS + # Strategy: in order to perform NMS independently per class, + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + + # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate, + # which won't handle negative coordinates correctly. + # Here by using min_coordinate we can make sure the negative coordinates are + # correctly handled. + max_coordinate = ( + torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2 + ).max() + min_coordinate = ( + torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2 + ).min() + offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1) + boxes_for_nms = boxes.clone() # avoid modifying the original values in boxes + boxes_for_nms[:, :2] += offsets[:, None] + keep = nms_rotated(boxes_for_nms, scores, iou_threshold) + return keep diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..163462e1f194e1e4100da92d76d9516f7cc22e35 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align.py @@ -0,0 +1,74 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from torch import nn +from torchvision.ops import roi_align + + +# NOTE: torchvision's RoIAlign has a different default aligned=False +class ROIAlign(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): + """ + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each output + sample. 0 to take samples densely. + aligned (bool): if False, use the legacy implementation in + Detectron. If True, align the results more perfectly. + + Note: + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel indices (in our + pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, + c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled + from the underlying signal at continuous coordinates 0.5 and 1.5). But the original + roi_align (aligned=False) does not subtract the 0.5 when computing neighboring + pixel indices and therefore it uses pixels with a slightly incorrect alignment + (relative to our pixel model) when performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; see + detectron2/tests/test_roi_align.py for verification. + + The difference does not make a difference to the model's performance if + ROIAlign is used together with conv layers. + """ + super().__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.aligned = aligned + + from torchvision import __version__ + + version = tuple(int(x) for x in __version__.split(".")[:2]) + # https://github.com/pytorch/vision/pull/2438 + assert version >= (0, 7), "Require torchvision >= 0.7" + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. + """ + assert rois.dim() == 2 and rois.size(1) == 5 + if input.is_quantized: + input = input.dequantize() + return roi_align( + input, + rois.to(dtype=input.dtype), + self.output_size, + self.spatial_scale, + self.sampling_ratio, + self.aligned, + ) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ", aligned=" + str(self.aligned) + tmpstr += ")" + return tmpstr diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align_rotated.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..2a523992e7c736262ad5a158f209aae7875f6f0b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/roi_align_rotated.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + + +class _ROIAlignRotated(Function): + @staticmethod + def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): + ctx.save_for_backward(roi) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + ctx.input_shape = input.size() + output = torch.ops.detectron2.roi_align_rotated_forward( + input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + (rois,) = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + sampling_ratio = ctx.sampling_ratio + bs, ch, h, w = ctx.input_shape + grad_input = torch.ops.detectron2.roi_align_rotated_backward( + grad_output, + rois, + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + sampling_ratio, + ) + return grad_input, None, None, None, None, None + + +roi_align_rotated = _ROIAlignRotated.apply + + +class ROIAlignRotated(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio): + """ + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each output + sample. 0 to take samples densely. + + Note: + ROIAlignRotated supports continuous coordinate by default: + Given a continuous coordinate c, its two neighboring pixel indices (in our + pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, + c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled + from the underlying signal at continuous coordinates 0.5 and 1.5). + """ + super(ROIAlignRotated, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx6 boxes. First column is the index into N. + The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). + """ + assert rois.dim() == 2 and rois.size(1) == 6 + orig_dtype = input.dtype + if orig_dtype == torch.float16: + input = input.float() + rois = rois.float() + output_size = _pair(self.output_size) + + # Scripting for Autograd is currently unsupported. + # This is a quick fix without having to rewrite code on the C++ side + if torch.jit.is_scripting() or torch.jit.is_tracing(): + return torch.ops.detectron2.roi_align_rotated_forward( + input, rois, self.spatial_scale, output_size[0], output_size[1], self.sampling_ratio + ).to(dtype=orig_dtype) + + return roi_align_rotated( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio + ).to(dtype=orig_dtype) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ")" + return tmpstr diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/rotated_boxes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/rotated_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..03f73b3bb99275931a887ad9b2d8c0ac9f412bf3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/rotated_boxes.py @@ -0,0 +1,21 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from __future__ import absolute_import, division, print_function, unicode_literals +import torch + + +def pairwise_iou_rotated(boxes1, boxes2): + """ + Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + Arguments: + boxes1 (Tensor[N, 5]) + boxes2 (Tensor[M, 5]) + + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + return torch.ops.detectron2.box_iou_rotated(boxes1, boxes2) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/shape_spec.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/shape_spec.py new file mode 100644 index 0000000000000000000000000000000000000000..8dac3c59b96576710656abebe9b5eac25868abbb --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/shape_spec.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ShapeSpec: + """ + A simple structure that contains basic shape specification about a tensor. + It is often used as the auxiliary inputs/outputs of models, + to complement the lack of shape inference ability among pytorch modules. + """ + + channels: Optional[int] = None + height: Optional[int] = None + width: Optional[int] = None + stride: Optional[int] = None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/wrappers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..4367f9ab50ce3ea47616e5c4c43ac4b78164b128 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/layers/wrappers.py @@ -0,0 +1,162 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Wrappers around on some nn functions, mainly to support empty tensors. + +Ideally, add support directly in PyTorch to empty tensors in those functions. + +These can be removed once https://github.com/pytorch/pytorch/issues/12013 +is implemented +""" + +import warnings +from typing import List, Optional +import torch +from torch.nn import functional as F + +from annotator.oneformer.detectron2.utils.env import TORCH_VERSION + + +def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor: + """ + Turn a list of integer scalars or integer Tensor scalars into a vector, + in a way that's both traceable and scriptable. + + In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs. + In scripting or eager, `x` should be a list of int. + """ + if torch.jit.is_scripting(): + return torch.as_tensor(x, device=device) + if torch.jit.is_tracing(): + assert all( + [isinstance(t, torch.Tensor) for t in x] + ), "Shape should be tensor during tracing!" + # as_tensor should not be used in tracing because it records a constant + ret = torch.stack(x) + if ret.device != device: # avoid recording a hard-coded device if not necessary + ret = ret.to(device=device) + return ret + return torch.as_tensor(x, device=device) + + +def check_if_dynamo_compiling(): + if TORCH_VERSION >= (1, 14): + from torch._dynamo import is_compiling + + return is_compiling() + else: + return False + + +def cat(tensors: List[torch.Tensor], dim: int = 0): + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def empty_input_loss_func_wrapper(loss_func): + def wrapped_loss_func(input, target, *, reduction="mean", **kwargs): + """ + Same as `loss_func`, but returns 0 (instead of nan) for empty inputs. + """ + if target.numel() == 0 and reduction == "mean": + return input.sum() * 0.0 # connect the gradient + return loss_func(input, target, reduction=reduction, **kwargs) + + return wrapped_loss_func + + +cross_entropy = empty_input_loss_func_wrapper(F.cross_entropy) + + +class _NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return _NewEmptyTensorOp.apply(grad, shape), None + + +class Conv2d(torch.nn.Conv2d): + """ + A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. + """ + + def __init__(self, *args, **kwargs): + """ + Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: + + Args: + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + + It assumes that norm layer is used before activation. + """ + norm = kwargs.pop("norm", None) + activation = kwargs.pop("activation", None) + super().__init__(*args, **kwargs) + + self.norm = norm + self.activation = activation + + def forward(self, x): + # torchscript does not support SyncBatchNorm yet + # https://github.com/pytorch/pytorch/issues/40507 + # and we skip these codes in torchscript since: + # 1. currently we only support torchscript in evaluation mode + # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or + # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. + if not torch.jit.is_scripting(): + # Dynamo doesn't support context managers yet + is_dynamo_compiling = check_if_dynamo_compiling() + if not is_dynamo_compiling: + with warnings.catch_warnings(record=True): + if x.numel() == 0 and self.training: + # https://github.com/pytorch/pytorch/issues/12013 + assert not isinstance( + self.norm, torch.nn.SyncBatchNorm + ), "SyncBatchNorm does not support empty inputs!" + + x = F.conv2d( + x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups + ) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + +ConvTranspose2d = torch.nn.ConvTranspose2d +BatchNorm2d = torch.nn.BatchNorm2d +interpolate = F.interpolate +Linear = torch.nn.Linear + + +def nonzero_tuple(x): + """ + A 'as_tuple=True' version of torch.nonzero to support torchscript. + because of https://github.com/pytorch/pytorch/issues/38718 + """ + if torch.jit.is_scripting(): + if x.dim() == 0: + return x.unsqueeze(0).nonzero().unbind(1) + return x.nonzero().unbind(1) + else: + return x.nonzero(as_tuple=True) + + +@torch.jit.script_if_tracing +def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor: + """ + Tracing friendly way to cast tensor to another tensor's device. Device will be treated + as constant during tracing, scripting the casting process as whole can workaround this issue. + """ + return src.to(dst.device) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6204208198d813728cf6419e8eef4a733f20c18f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Model Zoo API for Detectron2: a collection of functions to create common model architectures +listed in `MODEL_ZOO.md `_, +and optionally load their pre-trained weights. +""" + +from .model_zoo import get, get_config_file, get_checkpoint_url, get_config + +__all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/model_zoo.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/model_zoo.py new file mode 100644 index 0000000000000000000000000000000000000000..74e11b292a725cb22a7d5b001ed30b589b74598e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/model_zoo/model_zoo.py @@ -0,0 +1,213 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import os +from typing import Optional +import pkg_resources +import torch + +from annotator.oneformer.detectron2.checkpoint import DetectionCheckpointer +from annotator.oneformer.detectron2.config import CfgNode, LazyConfig, get_cfg, instantiate +from annotator.oneformer.detectron2.modeling import build_model + + +class _ModelZooUrls(object): + """ + Mapping from names to officially released Detectron2 pre-trained models. + """ + + S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + + # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl + CONFIG_PATH_TO_URL_SUFFIX = { + # COCO Detection with Faster R-CNN + "COCO-Detection/faster_rcnn_R_50_C4_1x": "137257644/model_final_721ade.pkl", + "COCO-Detection/faster_rcnn_R_50_DC5_1x": "137847829/model_final_51d356.pkl", + "COCO-Detection/faster_rcnn_R_50_FPN_1x": "137257794/model_final_b275ba.pkl", + "COCO-Detection/faster_rcnn_R_50_C4_3x": "137849393/model_final_f97cb7.pkl", + "COCO-Detection/faster_rcnn_R_50_DC5_3x": "137849425/model_final_68d202.pkl", + "COCO-Detection/faster_rcnn_R_50_FPN_3x": "137849458/model_final_280758.pkl", + "COCO-Detection/faster_rcnn_R_101_C4_3x": "138204752/model_final_298dad.pkl", + "COCO-Detection/faster_rcnn_R_101_DC5_3x": "138204841/model_final_3e0943.pkl", + "COCO-Detection/faster_rcnn_R_101_FPN_3x": "137851257/model_final_f6e8b1.pkl", + "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x": "139173657/model_final_68b088.pkl", + # COCO Detection with RetinaNet + "COCO-Detection/retinanet_R_50_FPN_1x": "190397773/model_final_bfca0b.pkl", + "COCO-Detection/retinanet_R_50_FPN_3x": "190397829/model_final_5bd44e.pkl", + "COCO-Detection/retinanet_R_101_FPN_3x": "190397697/model_final_971ab9.pkl", + # COCO Detection with RPN and Fast R-CNN + "COCO-Detection/rpn_R_50_C4_1x": "137258005/model_final_450694.pkl", + "COCO-Detection/rpn_R_50_FPN_1x": "137258492/model_final_02ce48.pkl", + "COCO-Detection/fast_rcnn_R_50_FPN_1x": "137635226/model_final_e5f7ce.pkl", + # COCO Instance Segmentation Baselines with Mask R-CNN + "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x": "137259246/model_final_9243eb.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x": "137260150/model_final_4f86c3.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x": "137260431/model_final_a54504.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x": "137849525/model_final_4ce675.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x": "137849551/model_final_84107b.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x": "137849600/model_final_f10217.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x": "138363239/model_final_a2914c.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x": "138363294/model_final_0464b7.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x": "138205316/model_final_a3ec72.pkl", + "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x": "139653917/model_final_2d9806.pkl", # noqa + # New baselines using Large-Scale Jitter and Longer Training Schedule + "new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ": "42047764/model_final_bb69de.pkl", + "new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ": "42047638/model_final_89a8d3.pkl", + "new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ": "42019571/model_final_14d201.pkl", + "new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ": "42025812/model_final_4f7b58.pkl", + "new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ": "42131867/model_final_0bb7ae.pkl", + "new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ": "42073830/model_final_f96b26.pkl", + "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ": "42047771/model_final_b7fbab.pkl", # noqa + "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ": "42132721/model_final_5d87c1.pkl", # noqa + "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ": "42025447/model_final_f1362d.pkl", # noqa + "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ": "42047784/model_final_6ba57e.pkl", # noqa + "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ": "42047642/model_final_27b9c1.pkl", # noqa + "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ": "42045954/model_final_ef3a80.pkl", # noqa + # COCO Person Keypoint Detection Baselines with Keypoint R-CNN + "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x": "137261548/model_final_04e291.pkl", + "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x": "137849621/model_final_a6e10b.pkl", + "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x": "138363331/model_final_997cc7.pkl", + "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x": "139686956/model_final_5ad38f.pkl", + # COCO Panoptic Segmentation Baselines with Panoptic FPN + "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x": "139514544/model_final_dbfeb4.pkl", + "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x": "139514569/model_final_c10459.pkl", + "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x": "139514519/model_final_cafdb1.pkl", + # LVIS Instance Segmentation Baselines with Mask R-CNN + "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x": "144219072/model_final_571f7c.pkl", # noqa + "LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x": "144219035/model_final_824ab5.pkl", # noqa + "LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x": "144219108/model_final_5e3439.pkl", # noqa + # Cityscapes & Pascal VOC Baselines + "Cityscapes/mask_rcnn_R_50_FPN": "142423278/model_final_af9cf5.pkl", + "PascalVOC-Detection/faster_rcnn_R_50_C4": "142202221/model_final_b1acc2.pkl", + # Other Settings + "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5": "138602867/model_final_65c703.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5": "144998336/model_final_821d0b.pkl", + "Misc/cascade_mask_rcnn_R_50_FPN_1x": "138602847/model_final_e9d89b.pkl", + "Misc/cascade_mask_rcnn_R_50_FPN_3x": "144998488/model_final_480dd8.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_syncbn": "169527823/model_final_3b3c51.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_gn": "138602888/model_final_dc5d9e.pkl", + "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn": "138602908/model_final_01ca85.pkl", + "Misc/scratch_mask_rcnn_R_50_FPN_9x_gn": "183808979/model_final_da7b4c.pkl", + "Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn": "184226666/model_final_5ce33e.pkl", + "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x": "139797668/model_final_be35db.pkl", + "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv": "18131413/model_0039999_e76410.pkl", # noqa + # D1 Comparisons + "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x": "137781054/model_final_7ab50c.pkl", # noqa + "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x": "137781281/model_final_62ca52.pkl", # noqa + "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x": "137781195/model_final_cce136.pkl", + } + + @staticmethod + def query(config_path: str) -> Optional[str]: + """ + Args: + config_path: relative config filename + """ + name = config_path.replace(".yaml", "").replace(".py", "") + if name in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX: + suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[name] + return _ModelZooUrls.S3_PREFIX + name + "/" + suffix + return None + + +def get_checkpoint_url(config_path): + """ + Returns the URL to the model trained using the given config + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + + Returns: + str: a URL to the model + """ + url = _ModelZooUrls.query(config_path) + if url is None: + raise RuntimeError("Pretrained model for {} is not available!".format(config_path)) + return url + + +def get_config_file(config_path): + """ + Returns path to a builtin config file. + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + + Returns: + str: the real path to the config file. + """ + cfg_file = pkg_resources.resource_filename( + "detectron2.model_zoo", os.path.join("configs", config_path) + ) + if not os.path.exists(cfg_file): + raise RuntimeError("{} not available in Model Zoo!".format(config_path)) + return cfg_file + + +def get_config(config_path, trained: bool = False): + """ + Returns a config object for a model in model zoo. + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + trained (bool): If True, will set ``MODEL.WEIGHTS`` to trained model zoo weights. + If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used + instead; this will typically (though not always) initialize a subset of weights using + an ImageNet pre-trained model, while randomly initializing the other weights. + + Returns: + CfgNode or omegaconf.DictConfig: a config object + """ + cfg_file = get_config_file(config_path) + if cfg_file.endswith(".yaml"): + cfg = get_cfg() + cfg.merge_from_file(cfg_file) + if trained: + cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path) + return cfg + elif cfg_file.endswith(".py"): + cfg = LazyConfig.load(cfg_file) + if trained: + url = get_checkpoint_url(config_path) + if "train" in cfg and "init_checkpoint" in cfg.train: + cfg.train.init_checkpoint = url + else: + raise NotImplementedError + return cfg + + +def get(config_path, trained: bool = False, device: Optional[str] = None): + """ + Get a model specified by relative path under Detectron2's official ``configs/`` directory. + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + trained (bool): see :func:`get_config`. + device (str or None): overwrite the device in config, if given. + + Returns: + nn.Module: a detectron2 model. Will be in training mode. + + Example: + :: + from annotator.oneformer.detectron2 import model_zoo + model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True) + """ + cfg = get_config(config_path, trained) + if device is None and not torch.cuda.is_available(): + device = "cpu" + if device is not None and isinstance(cfg, CfgNode): + cfg.MODEL.DEVICE = device + + if isinstance(cfg, CfgNode): + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + else: + model = instantiate(cfg.model) + if device is not None: + model = model.to(device) + if "train" in cfg and "init_checkpoint" in cfg.train: + DetectionCheckpointer(model).load(cfg.train.init_checkpoint) + return model diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ce9ddac2f3006c7ee422aab7239060190a9d95d1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/__init__.py @@ -0,0 +1,64 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from annotator.oneformer.detectron2.layers import ShapeSpec + +from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY +from .backbone import ( + BACKBONE_REGISTRY, + FPN, + Backbone, + ResNet, + ResNetBlockBase, + build_backbone, + build_resnet_backbone, + make_stage, + ViT, + SimpleFeaturePyramid, + get_vit_lr_decay_rate, + MViT, + SwinTransformer, +) +from .meta_arch import ( + META_ARCH_REGISTRY, + SEM_SEG_HEADS_REGISTRY, + GeneralizedRCNN, + PanopticFPN, + ProposalNetwork, + RetinaNet, + SemanticSegmentor, + build_model, + build_sem_seg_head, + FCOS, +) +from .postprocessing import detector_postprocess +from .proposal_generator import ( + PROPOSAL_GENERATOR_REGISTRY, + build_proposal_generator, + RPN_HEAD_REGISTRY, + build_rpn_head, +) +from .roi_heads import ( + ROI_BOX_HEAD_REGISTRY, + ROI_HEADS_REGISTRY, + ROI_KEYPOINT_HEAD_REGISTRY, + ROI_MASK_HEAD_REGISTRY, + ROIHeads, + StandardROIHeads, + BaseMaskRCNNHead, + BaseKeypointRCNNHead, + FastRCNNOutputLayers, + build_box_head, + build_keypoint_head, + build_mask_head, + build_roi_heads, +) +from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA +from .mmdet_wrapper import MMDetBackbone, MMDetDetector + +_EXCLUDE = {"ShapeSpec"} +__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] + + +from annotator.oneformer.detectron2.utils.env import fixup_module_metadata + +fixup_module_metadata(__name__, globals(), __all__) +del fixup_module_metadata diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/anchor_generator.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..04127c4af440b4623427b4c0911ee299166d1d7d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/anchor_generator.py @@ -0,0 +1,386 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import collections +import math +from typing import List +import torch +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ShapeSpec, move_device_like +from annotator.oneformer.detectron2.structures import Boxes, RotatedBoxes +from annotator.oneformer.detectron2.utils.registry import Registry + +ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") +ANCHOR_GENERATOR_REGISTRY.__doc__ = """ +Registry for modules that creates object detection anchors for feature maps. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +class BufferList(nn.Module): + """ + Similar to nn.ParameterList, but for buffers + """ + + def __init__(self, buffers): + super().__init__() + for i, buffer in enumerate(buffers): + # Use non-persistent buffer so the values are not saved in checkpoint + self.register_buffer(str(i), buffer, persistent=False) + + def __len__(self): + return len(self._buffers) + + def __iter__(self): + return iter(self._buffers.values()) + + +def _create_grid_offsets( + size: List[int], stride: int, offset: float, target_device_tensor: torch.Tensor +): + grid_height, grid_width = size + shifts_x = move_device_like( + torch.arange(offset * stride, grid_width * stride, step=stride, dtype=torch.float32), + target_device_tensor, + ) + shifts_y = move_device_like( + torch.arange(offset * stride, grid_height * stride, step=stride, dtype=torch.float32), + target_device_tensor, + ) + + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + return shift_x, shift_y + + +def _broadcast_params(params, num_features, name): + """ + If one size (or aspect ratio) is specified and there are multiple feature + maps, we "broadcast" anchors of that single size (or aspect ratio) + over all feature maps. + + If params is list[float], or list[list[float]] with len(params) == 1, repeat + it num_features time. + + Returns: + list[list[float]]: param for each feature + """ + assert isinstance( + params, collections.abc.Sequence + ), f"{name} in anchor generator has to be a list! Got {params}." + assert len(params), f"{name} in anchor generator cannot be empty!" + if not isinstance(params[0], collections.abc.Sequence): # params is list[float] + return [params] * num_features + if len(params) == 1: + return list(params) * num_features + assert len(params) == num_features, ( + f"Got {name} of length {len(params)} in anchor generator, " + f"but the number of input features is {num_features}!" + ) + return params + + +@ANCHOR_GENERATOR_REGISTRY.register() +class DefaultAnchorGenerator(nn.Module): + """ + Compute anchors in the standard ways described in + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". + """ + + box_dim: torch.jit.Final[int] = 4 + """ + the dimension of each anchor box. + """ + + @configurable + def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): + """ + This interface is experimental. + + Args: + sizes (list[list[float]] or list[float]): + If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes + (i.e. sqrt of anchor area) to use for the i-th feature map. + If ``sizes`` is list[float], ``sizes`` is used for all feature maps. + Anchor sizes are given in absolute lengths in units of + the input image; they do not dynamically scale if the input image size changes. + aspect_ratios (list[list[float]] or list[float]): list of aspect ratios + (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. + strides (list[int]): stride of each input feature. + offset (float): Relative offset between the center of the first anchor and the top-left + corner of the image. Value has to be in [0, 1). + Recommend to use 0.5, which means half stride. + """ + super().__init__() + + self.strides = strides + self.num_features = len(self.strides) + sizes = _broadcast_params(sizes, self.num_features, "sizes") + aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") + self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) + + self.offset = offset + assert 0.0 <= self.offset < 1.0, self.offset + + @classmethod + def from_config(cls, cfg, input_shape: List[ShapeSpec]): + return { + "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, + "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, + "strides": [x.stride for x in input_shape], + "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, + } + + def _calculate_anchors(self, sizes, aspect_ratios): + cell_anchors = [ + self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) + ] + return BufferList(cell_anchors) + + @property + @torch.jit.unused + def num_cell_anchors(self): + """ + Alias of `num_anchors`. + """ + return self.num_anchors + + @property + @torch.jit.unused + def num_anchors(self): + """ + Returns: + list[int]: Each int is the number of anchors at every pixel + location, on that feature map. + For example, if at every pixel we use anchors of 3 aspect + ratios and 5 sizes, the number of anchors is 15. + (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) + + In standard RPN models, `num_anchors` on every feature map is the same. + """ + return [len(cell_anchors) for cell_anchors in self.cell_anchors] + + def _grid_anchors(self, grid_sizes: List[List[int]]): + """ + Returns: + list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 + """ + anchors = [] + # buffers() not supported by torchscript. use named_buffers() instead + buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] + for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): + shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) + + return anchors + + def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): + """ + Generate a tensor storing canonical anchor boxes, which are all anchor + boxes of different sizes and aspect_ratios centered at (0, 0). + We can later build the set of anchors for a full feature map by + shifting and tiling these tensors (see `meth:_grid_anchors`). + + Args: + sizes (tuple[float]): + aspect_ratios (tuple[float]]): + + Returns: + Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes + in XYXY format. + """ + + # This is different from the anchor generator defined in the original Faster R-CNN + # code or Detectron. They yield the same AP, however the old version defines cell + # anchors in a less natural way with a shift relative to the feature grid and + # quantization that results in slightly different sizes for different aspect ratios. + # See also https://github.com/facebookresearch/Detectron/issues/227 + + anchors = [] + for size in sizes: + area = size**2.0 + for aspect_ratio in aspect_ratios: + # s * s = w * h + # a = h / w + # ... some algebra ... + # w = sqrt(s * s / a) + # h = a * w + w = math.sqrt(area / aspect_ratio) + h = aspect_ratio * w + x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 + anchors.append([x0, y0, x1, y1]) + return torch.tensor(anchors) + + def forward(self, features: List[torch.Tensor]): + """ + Args: + features (list[Tensor]): list of backbone feature maps on which to generate anchors. + + Returns: + list[Boxes]: a list of Boxes containing all the anchors for each feature map + (i.e. the cell anchors repeated over all locations in the feature map). + The number of anchors of each feature map is Hi x Wi x num_cell_anchors, + where Hi, Wi are resolution of the feature map divided by anchor stride. + """ + grid_sizes = [feature_map.shape[-2:] for feature_map in features] + anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) + return [Boxes(x) for x in anchors_over_all_feature_maps] + + +@ANCHOR_GENERATOR_REGISTRY.register() +class RotatedAnchorGenerator(nn.Module): + """ + Compute rotated anchors used by Rotated RPN (RRPN), described in + "Arbitrary-Oriented Scene Text Detection via Rotation Proposals". + """ + + box_dim: int = 5 + """ + the dimension of each anchor box. + """ + + @configurable + def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): + """ + This interface is experimental. + + Args: + sizes (list[list[float]] or list[float]): + If sizes is list[list[float]], sizes[i] is the list of anchor sizes + (i.e. sqrt of anchor area) to use for the i-th feature map. + If sizes is list[float], the sizes are used for all feature maps. + Anchor sizes are given in absolute lengths in units of + the input image; they do not dynamically scale if the input image size changes. + aspect_ratios (list[list[float]] or list[float]): list of aspect ratios + (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. + strides (list[int]): stride of each input feature. + angles (list[list[float]] or list[float]): list of angles (in degrees CCW) + to use for anchors. Same "broadcast" rule for `sizes` applies. + offset (float): Relative offset between the center of the first anchor and the top-left + corner of the image. Value has to be in [0, 1). + Recommend to use 0.5, which means half stride. + """ + super().__init__() + + self.strides = strides + self.num_features = len(self.strides) + sizes = _broadcast_params(sizes, self.num_features, "sizes") + aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") + angles = _broadcast_params(angles, self.num_features, "angles") + self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) + + self.offset = offset + assert 0.0 <= self.offset < 1.0, self.offset + + @classmethod + def from_config(cls, cfg, input_shape: List[ShapeSpec]): + return { + "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, + "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, + "strides": [x.stride for x in input_shape], + "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, + "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, + } + + def _calculate_anchors(self, sizes, aspect_ratios, angles): + cell_anchors = [ + self.generate_cell_anchors(size, aspect_ratio, angle).float() + for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) + ] + return BufferList(cell_anchors) + + @property + def num_cell_anchors(self): + """ + Alias of `num_anchors`. + """ + return self.num_anchors + + @property + def num_anchors(self): + """ + Returns: + list[int]: Each int is the number of anchors at every pixel + location, on that feature map. + For example, if at every pixel we use anchors of 3 aspect + ratios, 2 sizes and 5 angles, the number of anchors is 30. + (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS + and ANCHOR_GENERATOR.ANGLES in config) + + In standard RRPN models, `num_anchors` on every feature map is the same. + """ + return [len(cell_anchors) for cell_anchors in self.cell_anchors] + + def _grid_anchors(self, grid_sizes): + anchors = [] + for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): + shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) + zeros = torch.zeros_like(shift_x) + shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) + + anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) + + return anchors + + def generate_cell_anchors( + self, + sizes=(32, 64, 128, 256, 512), + aspect_ratios=(0.5, 1, 2), + angles=(-90, -60, -30, 0, 30, 60, 90), + ): + """ + Generate a tensor storing canonical anchor boxes, which are all anchor + boxes of different sizes, aspect_ratios, angles centered at (0, 0). + We can later build the set of anchors for a full feature map by + shifting and tiling these tensors (see `meth:_grid_anchors`). + + Args: + sizes (tuple[float]): + aspect_ratios (tuple[float]]): + angles (tuple[float]]): + + Returns: + Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) + storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. + """ + anchors = [] + for size in sizes: + area = size**2.0 + for aspect_ratio in aspect_ratios: + # s * s = w * h + # a = h / w + # ... some algebra ... + # w = sqrt(s * s / a) + # h = a * w + w = math.sqrt(area / aspect_ratio) + h = aspect_ratio * w + anchors.extend([0, 0, w, h, a] for a in angles) + + return torch.tensor(anchors) + + def forward(self, features): + """ + Args: + features (list[Tensor]): list of backbone feature maps on which to generate anchors. + + Returns: + list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map + (i.e. the cell anchors repeated over all locations in the feature map). + The number of anchors of each feature map is Hi x Wi x num_cell_anchors, + where Hi, Wi are resolution of the feature map divided by anchor stride. + """ + grid_sizes = [feature_map.shape[-2:] for feature_map in features] + anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) + return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] + + +def build_anchor_generator(cfg, input_shape): + """ + Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. + """ + anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME + return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5b3358a4061b143c78eba8e7bf81fe9f7ffac1aa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip + +from .backbone import Backbone +from .fpn import FPN +from .regnet import RegNet +from .resnet import ( + BasicStem, + ResNet, + ResNetBlockBase, + build_resnet_backbone, + make_stage, + BottleneckBlock, +) +from .vit import ViT, SimpleFeaturePyramid, get_vit_lr_decay_rate +from .mvit import MViT +from .swin import SwinTransformer + +__all__ = [k for k in globals().keys() if not k.startswith("_")] +# TODO can expose more resnet blocks after careful consideration diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/backbone.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..04f3c3c009d972bcab46eaeab33a8bfcc05b726c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/backbone.py @@ -0,0 +1,74 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from abc import ABCMeta, abstractmethod +from typing import Dict +import torch.nn as nn + +from annotator.oneformer.detectron2.layers import ShapeSpec + +__all__ = ["Backbone"] + + +class Backbone(nn.Module, metaclass=ABCMeta): + """ + Abstract base class for network backbones. + """ + + def __init__(self): + """ + The `__init__` method of any subclass can specify its own set of arguments. + """ + super().__init__() + + @abstractmethod + def forward(self): + """ + Subclasses must override this method, but adhere to the same return type. + + Returns: + dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor + """ + pass + + @property + def size_divisibility(self) -> int: + """ + Some backbones require the input height and width to be divisible by a + specific integer. This is typically true for encoder / decoder type networks + with lateral connection (e.g., FPN) for which feature maps need to match + dimension in the "bottom up" and "top down" paths. Set to 0 if no specific + input size divisibility is required. + """ + return 0 + + @property + def padding_constraints(self) -> Dict[str, int]: + """ + This property is a generalization of size_divisibility. Some backbones and training + recipes require specific padding constraints, such as enforcing divisibility by a specific + integer (e.g., FPN) or padding to a square (e.g., ViTDet with large-scale jitter + in :paper:vitdet). `padding_constraints` contains these optional items like: + { + "size_divisibility": int, + "square_size": int, + # Future options are possible + } + `size_divisibility` will read from here if presented and `square_size` indicates the + square padding size if `square_size` > 0. + + TODO: use type of Dict[str, int] to avoid torchscipt issues. The type of padding_constraints + could be generalized as TypedDict (Python 3.8+) to support more types in the future. + """ + return {} + + def output_shape(self): + """ + Returns: + dict[str->ShapeSpec] + """ + # this is a backward-compatible default + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/build.py new file mode 100644 index 0000000000000000000000000000000000000000..63a4aaced2c2869294d2b16f4b95cdfdd01259b7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/build.py @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from annotator.oneformer.detectron2.layers import ShapeSpec +from annotator.oneformer.detectron2.utils.registry import Registry + +from .backbone import Backbone + +BACKBONE_REGISTRY = Registry("BACKBONE") +BACKBONE_REGISTRY.__doc__ = """ +Registry for backbones, which extract feature maps from images + +The registered object must be a callable that accepts two arguments: + +1. A :class:`detectron2.config.CfgNode` +2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. + +Registered object must return instance of :class:`Backbone`. +""" + + +def build_backbone(cfg, input_shape=None): + """ + Build a backbone from `cfg.MODEL.BACKBONE.NAME`. + + Returns: + an instance of :class:`Backbone` + """ + if input_shape is None: + input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) + + backbone_name = cfg.MODEL.BACKBONE.NAME + backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) + assert isinstance(backbone, Backbone) + return backbone diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/fpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a9e8ce1a5ad2e3e07111731185a60855e59b22 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/fpn.py @@ -0,0 +1,268 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +import fvcore.nn.weight_init as weight_init +import torch +import torch.nn.functional as F +from torch import nn + +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, get_norm + +from .backbone import Backbone +from .build import BACKBONE_REGISTRY +from .resnet import build_resnet_backbone + +__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"] + + +class FPN(Backbone): + """ + This module implements :paper:`FPN`. + It creates pyramid features built on top of some input feature maps. + """ + + _fuse_type: torch.jit.Final[str] + + def __init__( + self, + bottom_up, + in_features, + out_channels, + norm="", + top_block=None, + fuse_type="sum", + square_pad=0, + ): + """ + Args: + bottom_up (Backbone): module representing the bottom up subnetwork. + Must be a subclass of :class:`Backbone`. The multi-scale feature + maps generated by the bottom up network, and listed in `in_features`, + are used to generate FPN levels. + in_features (list[str]): names of the input feature maps coming + from the backbone to which FPN is attached. For example, if the + backbone produces ["res2", "res3", "res4"], any *contiguous* sublist + of these may be used; order must be from high to low resolution. + out_channels (int): number of channels in the output feature maps. + norm (str): the normalization to use. + top_block (nn.Module or None): if provided, an extra operation will + be performed on the output of the last (smallest resolution) + FPN output, and the result will extend the result list. The top_block + further downsamples the feature map. It must have an attribute + "num_levels", meaning the number of extra FPN levels added by + this block, and "in_feature", which is a string representing + its input feature (e.g., p5). + fuse_type (str): types for fusing the top down features and the lateral + ones. It can be "sum" (default), which sums up element-wise; or "avg", + which takes the element-wise mean of the two. + square_pad (int): If > 0, require input images to be padded to specific square size. + """ + super(FPN, self).__init__() + assert isinstance(bottom_up, Backbone) + assert in_features, in_features + + # Feature map strides and channels from the bottom up network (e.g. ResNet) + input_shapes = bottom_up.output_shape() + strides = [input_shapes[f].stride for f in in_features] + in_channels_per_feature = [input_shapes[f].channels for f in in_features] + + _assert_strides_are_log2_contiguous(strides) + lateral_convs = [] + output_convs = [] + + use_bias = norm == "" + for idx, in_channels in enumerate(in_channels_per_feature): + lateral_norm = get_norm(norm, out_channels) + output_norm = get_norm(norm, out_channels) + + lateral_conv = Conv2d( + in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm + ) + output_conv = Conv2d( + out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + ) + weight_init.c2_xavier_fill(lateral_conv) + weight_init.c2_xavier_fill(output_conv) + stage = int(math.log2(strides[idx])) + self.add_module("fpn_lateral{}".format(stage), lateral_conv) + self.add_module("fpn_output{}".format(stage), output_conv) + + lateral_convs.append(lateral_conv) + output_convs.append(output_conv) + # Place convs into top-down order (from low to high resolution) + # to make the top-down computation in forward clearer. + self.lateral_convs = lateral_convs[::-1] + self.output_convs = output_convs[::-1] + self.top_block = top_block + self.in_features = tuple(in_features) + self.bottom_up = bottom_up + # Return feature names are "p", like ["p2", "p3", ..., "p6"] + self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides} + # top block output feature maps. + if self.top_block is not None: + for s in range(stage, stage + self.top_block.num_levels): + self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) + + self._out_features = list(self._out_feature_strides.keys()) + self._out_feature_channels = {k: out_channels for k in self._out_features} + self._size_divisibility = strides[-1] + self._square_pad = square_pad + assert fuse_type in {"avg", "sum"} + self._fuse_type = fuse_type + + @property + def size_divisibility(self): + return self._size_divisibility + + @property + def padding_constraints(self): + return {"square_size": self._square_pad} + + def forward(self, x): + """ + Args: + input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to + feature map tensor for each feature level in high to low resolution order. + + Returns: + dict[str->Tensor]: + mapping from feature map name to FPN feature map tensor + in high to low resolution order. Returned feature names follow the FPN + paper convention: "p", where stage has stride = 2 ** stage e.g., + ["p2", "p3", ..., "p6"]. + """ + bottom_up_features = self.bottom_up(x) + results = [] + prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]]) + results.append(self.output_convs[0](prev_features)) + + # Reverse feature maps into top-down order (from low to high resolution) + for idx, (lateral_conv, output_conv) in enumerate( + zip(self.lateral_convs, self.output_convs) + ): + # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336 + # Therefore we loop over all modules but skip the first one + if idx > 0: + features = self.in_features[-idx - 1] + features = bottom_up_features[features] + top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest") + lateral_features = lateral_conv(features) + prev_features = lateral_features + top_down_features + if self._fuse_type == "avg": + prev_features /= 2 + results.insert(0, output_conv(prev_features)) + + if self.top_block is not None: + if self.top_block.in_feature in bottom_up_features: + top_block_in_feature = bottom_up_features[self.top_block.in_feature] + else: + top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] + results.extend(self.top_block(top_block_in_feature)) + assert len(self._out_features) == len(results) + return {f: res for f, res in zip(self._out_features, results)} + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + +def _assert_strides_are_log2_contiguous(strides): + """ + Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2". + """ + for i, stride in enumerate(strides[1:], 1): + assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format( + stride, strides[i - 1] + ) + + +class LastLevelMaxPool(nn.Module): + """ + This module is used in the original FPN to generate a downsampled + P6 feature from P5. + """ + + def __init__(self): + super().__init__() + self.num_levels = 1 + self.in_feature = "p5" + + def forward(self, x): + return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)] + + +class LastLevelP6P7(nn.Module): + """ + This module is used in RetinaNet to generate extra layers, P6 and P7 from + C5 feature. + """ + + def __init__(self, in_channels, out_channels, in_feature="res5"): + super().__init__() + self.num_levels = 2 + self.in_feature = in_feature + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + weight_init.c2_xavier_fill(module) + + def forward(self, c5): + p6 = self.p6(c5) + p7 = self.p7(F.relu(p6)) + return [p6, p7] + + +@BACKBONE_REGISTRY.register() +def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_resnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelMaxPool(), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone + + +@BACKBONE_REGISTRY.register() +def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_resnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + in_channels_p6p7 = bottom_up.output_shape()["res5"].channels + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelP6P7(in_channels_p6p7, out_channels), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/mvit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/mvit.py new file mode 100644 index 0000000000000000000000000000000000000000..50667a8a836b933666761cc09d4175e64098c8aa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/mvit.py @@ -0,0 +1,448 @@ +import logging +import numpy as np +import torch +import torch.nn as nn + +from .backbone import Backbone +from .utils import ( + PatchEmbed, + add_decomposed_rel_pos, + get_abs_pos, + window_partition, + window_unpartition, +) + +logger = logging.getLogger(__name__) + + +__all__ = ["MViT"] + + +def attention_pool(x, pool, norm=None): + # (B, H, W, C) -> (B, C, H, W) + x = x.permute(0, 3, 1, 2) + x = pool(x) + # (B, C, H1, W1) -> (B, H1, W1, C) + x = x.permute(0, 2, 3, 1) + if norm: + x = norm(x) + + return x + + +class MultiScaleAttention(nn.Module): + """Multiscale Multi-head Attention block.""" + + def __init__( + self, + dim, + dim_out, + num_heads, + qkv_bias=True, + norm_layer=nn.LayerNorm, + pool_kernel=(3, 3), + stride_q=1, + stride_kv=1, + residual_pooling=True, + window_size=0, + use_rel_pos=False, + rel_pos_zero_init=True, + input_size=None, + ): + """ + Args: + dim (int): Number of input channels. + dim_out (int): Number of output channels. + num_heads (int): Number of attention heads. + qkv_bias (bool: If True, add a learnable bias to query, key, value. + norm_layer (nn.Module): Normalization layer. + pool_kernel (tuple): kernel size for qkv pooling layers. + stride_q (int): stride size for q pooling layer. + stride_kv (int): stride size for kv pooling layer. + residual_pooling (bool): If true, enable residual pooling. + use_rel_pos (bool): If True, add relative postional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + input_size (int or None): Input resolution. + """ + super().__init__() + self.num_heads = num_heads + head_dim = dim_out // num_heads + self.scale = head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim_out * 3, bias=qkv_bias) + self.proj = nn.Linear(dim_out, dim_out) + + # qkv pooling + pool_padding = [k // 2 for k in pool_kernel] + dim_conv = dim_out // num_heads + self.pool_q = nn.Conv2d( + dim_conv, + dim_conv, + pool_kernel, + stride=stride_q, + padding=pool_padding, + groups=dim_conv, + bias=False, + ) + self.norm_q = norm_layer(dim_conv) + self.pool_k = nn.Conv2d( + dim_conv, + dim_conv, + pool_kernel, + stride=stride_kv, + padding=pool_padding, + groups=dim_conv, + bias=False, + ) + self.norm_k = norm_layer(dim_conv) + self.pool_v = nn.Conv2d( + dim_conv, + dim_conv, + pool_kernel, + stride=stride_kv, + padding=pool_padding, + groups=dim_conv, + bias=False, + ) + self.norm_v = norm_layer(dim_conv) + + self.window_size = window_size + if window_size: + self.q_win_size = window_size // stride_q + self.kv_win_size = window_size // stride_kv + self.residual_pooling = residual_pooling + + self.use_rel_pos = use_rel_pos + if self.use_rel_pos: + # initialize relative positional embeddings + assert input_size[0] == input_size[1] + size = input_size[0] + rel_dim = 2 * max(size // stride_q, size // stride_kv) - 1 + self.rel_pos_h = nn.Parameter(torch.zeros(rel_dim, head_dim)) + self.rel_pos_w = nn.Parameter(torch.zeros(rel_dim, head_dim)) + + if not rel_pos_zero_init: + nn.init.trunc_normal_(self.rel_pos_h, std=0.02) + nn.init.trunc_normal_(self.rel_pos_w, std=0.02) + + def forward(self, x): + B, H, W, _ = x.shape + # qkv with shape (3, B, nHead, H, W, C) + qkv = self.qkv(x).reshape(B, H, W, 3, self.num_heads, -1).permute(3, 0, 4, 1, 2, 5) + # q, k, v with shape (B * nHead, H, W, C) + q, k, v = qkv.reshape(3, B * self.num_heads, H, W, -1).unbind(0) + + q = attention_pool(q, self.pool_q, self.norm_q) + k = attention_pool(k, self.pool_k, self.norm_k) + v = attention_pool(v, self.pool_v, self.norm_v) + + ori_q = q + if self.window_size: + q, q_hw_pad = window_partition(q, self.q_win_size) + k, kv_hw_pad = window_partition(k, self.kv_win_size) + v, _ = window_partition(v, self.kv_win_size) + q_hw = (self.q_win_size, self.q_win_size) + kv_hw = (self.kv_win_size, self.kv_win_size) + else: + q_hw = q.shape[1:3] + kv_hw = k.shape[1:3] + + q = q.view(q.shape[0], np.prod(q_hw), -1) + k = k.view(k.shape[0], np.prod(kv_hw), -1) + v = v.view(v.shape[0], np.prod(kv_hw), -1) + + attn = (q * self.scale) @ k.transpose(-2, -1) + + if self.use_rel_pos: + attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, q_hw, kv_hw) + + attn = attn.softmax(dim=-1) + x = attn @ v + + x = x.view(x.shape[0], q_hw[0], q_hw[1], -1) + + if self.window_size: + x = window_unpartition(x, self.q_win_size, q_hw_pad, ori_q.shape[1:3]) + + if self.residual_pooling: + x += ori_q + + H, W = x.shape[1], x.shape[2] + x = x.view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) + x = self.proj(x) + + return x + + +class MultiScaleBlock(nn.Module): + """Multiscale Transformer blocks""" + + def __init__( + self, + dim, + dim_out, + num_heads, + mlp_ratio=4.0, + qkv_bias=True, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + qkv_pool_kernel=(3, 3), + stride_q=1, + stride_kv=1, + residual_pooling=True, + window_size=0, + use_rel_pos=False, + rel_pos_zero_init=True, + input_size=None, + ): + """ + Args: + dim (int): Number of input channels. + dim_out (int): Number of output channels. + num_heads (int): Number of attention heads in the MViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + drop_path (float): Stochastic depth rate. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + qkv_pool_kernel (tuple): kernel size for qkv pooling layers. + stride_q (int): stride size for q pooling layer. + stride_kv (int): stride size for kv pooling layer. + residual_pooling (bool): If true, enable residual pooling. + window_size (int): Window size for window attention blocks. If it equals 0, then not + use window attention. + use_rel_pos (bool): If True, add relative postional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + input_size (int or None): Input resolution. + """ + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = MultiScaleAttention( + dim, + dim_out, + num_heads=num_heads, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + pool_kernel=qkv_pool_kernel, + stride_q=stride_q, + stride_kv=stride_kv, + residual_pooling=residual_pooling, + window_size=window_size, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + input_size=input_size, + ) + + from timm.models.layers import DropPath, Mlp + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim_out) + self.mlp = Mlp( + in_features=dim_out, + hidden_features=int(dim_out * mlp_ratio), + out_features=dim_out, + act_layer=act_layer, + ) + + if dim != dim_out: + self.proj = nn.Linear(dim, dim_out) + + if stride_q > 1: + kernel_skip = stride_q + 1 + padding_skip = int(kernel_skip // 2) + self.pool_skip = nn.MaxPool2d(kernel_skip, stride_q, padding_skip, ceil_mode=False) + + def forward(self, x): + x_norm = self.norm1(x) + x_block = self.attn(x_norm) + + if hasattr(self, "proj"): + x = self.proj(x_norm) + if hasattr(self, "pool_skip"): + x = attention_pool(x, self.pool_skip) + + x = x + self.drop_path(x_block) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class MViT(Backbone): + """ + This module implements Multiscale Vision Transformer (MViT) backbone in :paper:'mvitv2'. + """ + + def __init__( + self, + img_size=224, + patch_kernel=(7, 7), + patch_stride=(4, 4), + patch_padding=(3, 3), + in_chans=3, + embed_dim=96, + depth=16, + num_heads=1, + last_block_indexes=(0, 2, 11, 15), + qkv_pool_kernel=(3, 3), + adaptive_kv_stride=4, + adaptive_window_size=56, + residual_pooling=True, + mlp_ratio=4.0, + qkv_bias=True, + drop_path_rate=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + use_abs_pos=False, + use_rel_pos=True, + rel_pos_zero_init=True, + use_act_checkpoint=False, + pretrain_img_size=224, + pretrain_use_cls_token=True, + out_features=("scale2", "scale3", "scale4", "scale5"), + ): + """ + Args: + img_size (int): Input image size. + patch_kernel (tuple): kernel size for patch embedding. + patch_stride (tuple): stride size for patch embedding. + patch_padding (tuple): padding size for patch embedding. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + depth (int): Depth of MViT. + num_heads (int): Number of base attention heads in each MViT block. + last_block_indexes (tuple): Block indexes for last blocks in each stage. + qkv_pool_kernel (tuple): kernel size for qkv pooling layers. + adaptive_kv_stride (int): adaptive stride size for kv pooling. + adaptive_window_size (int): adaptive window size for window attention blocks. + residual_pooling (bool): If true, enable residual pooling. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + drop_path_rate (float): Stochastic depth rate. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_abs_pos (bool): If True, use absolute positional embeddings. + use_rel_pos (bool): If True, add relative postional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. + use_act_checkpoint (bool): If True, use activation checkpointing. + pretrain_img_size (int): input image size for pretraining models. + pretrain_use_cls_token (bool): If True, pretrainig models use class token. + out_features (tuple): name of the feature maps from each stage. + """ + super().__init__() + self.pretrain_use_cls_token = pretrain_use_cls_token + + self.patch_embed = PatchEmbed( + kernel_size=patch_kernel, + stride=patch_stride, + padding=patch_padding, + in_chans=in_chans, + embed_dim=embed_dim, + ) + + if use_abs_pos: + # Initialize absoluate positional embedding with pretrain image size. + num_patches = (pretrain_img_size // patch_stride[0]) * ( + pretrain_img_size // patch_stride[1] + ) + num_positions = (num_patches + 1) if pretrain_use_cls_token else num_patches + self.pos_embed = nn.Parameter(torch.zeros(1, num_positions, embed_dim)) + else: + self.pos_embed = None + + # stochastic depth decay rule + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] + dim_out = embed_dim + stride_kv = adaptive_kv_stride + window_size = adaptive_window_size + input_size = (img_size // patch_stride[0], img_size // patch_stride[1]) + stage = 2 + stride = patch_stride[0] + self._out_feature_strides = {} + self._out_feature_channels = {} + self.blocks = nn.ModuleList() + for i in range(depth): + # Multiply stride_kv by 2 if it's the last block of stage2 and stage3. + if i == last_block_indexes[1] or i == last_block_indexes[2]: + stride_kv_ = stride_kv * 2 + else: + stride_kv_ = stride_kv + # hybrid window attention: global attention in last three stages. + window_size_ = 0 if i in last_block_indexes[1:] else window_size + block = MultiScaleBlock( + dim=embed_dim, + dim_out=dim_out, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop_path=dpr[i], + norm_layer=norm_layer, + qkv_pool_kernel=qkv_pool_kernel, + stride_q=2 if i - 1 in last_block_indexes else 1, + stride_kv=stride_kv_, + residual_pooling=residual_pooling, + window_size=window_size_, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + input_size=input_size, + ) + if use_act_checkpoint: + # TODO: use torch.utils.checkpoint + from fairscale.nn.checkpoint import checkpoint_wrapper + + block = checkpoint_wrapper(block) + self.blocks.append(block) + + embed_dim = dim_out + if i in last_block_indexes: + name = f"scale{stage}" + if name in out_features: + self._out_feature_channels[name] = dim_out + self._out_feature_strides[name] = stride + self.add_module(f"{name}_norm", norm_layer(dim_out)) + + dim_out *= 2 + num_heads *= 2 + stride_kv = max(stride_kv // 2, 1) + stride *= 2 + stage += 1 + if i - 1 in last_block_indexes: + window_size = window_size // 2 + input_size = [s // 2 for s in input_size] + + self._out_features = out_features + self._last_block_indexes = last_block_indexes + + if self.pos_embed is not None: + nn.init.trunc_normal_(self.pos_embed, std=0.02) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + nn.init.trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, x): + x = self.patch_embed(x) + + if self.pos_embed is not None: + x = x + get_abs_pos(self.pos_embed, self.pretrain_use_cls_token, x.shape[1:3]) + + outputs = {} + stage = 2 + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in self._last_block_indexes: + name = f"scale{stage}" + if name in self._out_features: + x_out = getattr(self, f"{name}_norm")(x) + outputs[name] = x_out.permute(0, 3, 1, 2) + stage += 1 + + return outputs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/regnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/regnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d5b1c8c2d71abccedca7c2cca1117588407e9f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/regnet.py @@ -0,0 +1,452 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Implementation of RegNet models from :paper:`dds` and :paper:`scaling`. + +This code is adapted from https://github.com/facebookresearch/pycls with minimal modifications. +Some code duplication exists between RegNet and ResNets (e.g., ResStem) in order to simplify +model loading. +""" + +import numpy as np +from torch import nn + +from annotator.oneformer.detectron2.layers import CNNBlockBase, ShapeSpec, get_norm + +from .backbone import Backbone + +__all__ = [ + "AnyNet", + "RegNet", + "ResStem", + "SimpleStem", + "VanillaBlock", + "ResBasicBlock", + "ResBottleneckBlock", +] + + +def conv2d(w_in, w_out, k, *, stride=1, groups=1, bias=False): + """Helper for building a conv2d layer.""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + s, p, g, b = stride, (k - 1) // 2, groups, bias + return nn.Conv2d(w_in, w_out, k, stride=s, padding=p, groups=g, bias=b) + + +def gap2d(): + """Helper for building a global average pooling layer.""" + return nn.AdaptiveAvgPool2d((1, 1)) + + +def pool2d(k, *, stride=1): + """Helper for building a pool2d layer.""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + return nn.MaxPool2d(k, stride=stride, padding=(k - 1) // 2) + + +def init_weights(m): + """Performs ResNet-style weight initialization.""" + if isinstance(m, nn.Conv2d): + # Note that there is no bias due to BN + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1.0) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.weight.data.normal_(mean=0.0, std=0.01) + m.bias.data.zero_() + + +class ResStem(CNNBlockBase): + """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" + + def __init__(self, w_in, w_out, norm, activation_class): + super().__init__(w_in, w_out, 4) + self.conv = conv2d(w_in, w_out, 7, stride=2) + self.bn = get_norm(norm, w_out) + self.af = activation_class() + self.pool = pool2d(3, stride=2) + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + +class SimpleStem(CNNBlockBase): + """Simple stem for ImageNet: 3x3, BN, AF.""" + + def __init__(self, w_in, w_out, norm, activation_class): + super().__init__(w_in, w_out, 2) + self.conv = conv2d(w_in, w_out, 3, stride=2) + self.bn = get_norm(norm, w_out) + self.af = activation_class() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + +class SE(nn.Module): + """Squeeze-and-Excitation (SE) block: AvgPool, FC, Act, FC, Sigmoid.""" + + def __init__(self, w_in, w_se, activation_class): + super().__init__() + self.avg_pool = gap2d() + self.f_ex = nn.Sequential( + conv2d(w_in, w_se, 1, bias=True), + activation_class(), + conv2d(w_se, w_in, 1, bias=True), + nn.Sigmoid(), + ) + + def forward(self, x): + return x * self.f_ex(self.avg_pool(x)) + + +class VanillaBlock(CNNBlockBase): + """Vanilla block: [3x3 conv, BN, Relu] x2.""" + + def __init__(self, w_in, w_out, stride, norm, activation_class, _params): + super().__init__(w_in, w_out, stride) + self.a = conv2d(w_in, w_out, 3, stride=stride) + self.a_bn = get_norm(norm, w_out) + self.a_af = activation_class() + self.b = conv2d(w_out, w_out, 3) + self.b_bn = get_norm(norm, w_out) + self.b_af = activation_class() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + +class BasicTransform(nn.Module): + """Basic transformation: [3x3 conv, BN, Relu] x2.""" + + def __init__(self, w_in, w_out, stride, norm, activation_class, _params): + super().__init__() + self.a = conv2d(w_in, w_out, 3, stride=stride) + self.a_bn = get_norm(norm, w_out) + self.a_af = activation_class() + self.b = conv2d(w_out, w_out, 3) + self.b_bn = get_norm(norm, w_out) + self.b_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + +class ResBasicBlock(CNNBlockBase): + """Residual basic block: x + f(x), f = basic transform.""" + + def __init__(self, w_in, w_out, stride, norm, activation_class, params): + super().__init__(w_in, w_out, stride) + self.proj, self.bn = None, None + if (w_in != w_out) or (stride != 1): + self.proj = conv2d(w_in, w_out, 1, stride=stride) + self.bn = get_norm(norm, w_out) + self.f = BasicTransform(w_in, w_out, stride, norm, activation_class, params) + self.af = activation_class() + + def forward(self, x): + x_p = self.bn(self.proj(x)) if self.proj else x + return self.af(x_p + self.f(x)) + + +class BottleneckTransform(nn.Module): + """Bottleneck transformation: 1x1, 3x3 [+SE], 1x1.""" + + def __init__(self, w_in, w_out, stride, norm, activation_class, params): + super().__init__() + w_b = int(round(w_out * params["bot_mul"])) + w_se = int(round(w_in * params["se_r"])) + groups = w_b // params["group_w"] + self.a = conv2d(w_in, w_b, 1) + self.a_bn = get_norm(norm, w_b) + self.a_af = activation_class() + self.b = conv2d(w_b, w_b, 3, stride=stride, groups=groups) + self.b_bn = get_norm(norm, w_b) + self.b_af = activation_class() + self.se = SE(w_b, w_se, activation_class) if w_se else None + self.c = conv2d(w_b, w_out, 1) + self.c_bn = get_norm(norm, w_out) + self.c_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + +class ResBottleneckBlock(CNNBlockBase): + """Residual bottleneck block: x + f(x), f = bottleneck transform.""" + + def __init__(self, w_in, w_out, stride, norm, activation_class, params): + super().__init__(w_in, w_out, stride) + self.proj, self.bn = None, None + if (w_in != w_out) or (stride != 1): + self.proj = conv2d(w_in, w_out, 1, stride=stride) + self.bn = get_norm(norm, w_out) + self.f = BottleneckTransform(w_in, w_out, stride, norm, activation_class, params) + self.af = activation_class() + + def forward(self, x): + x_p = self.bn(self.proj(x)) if self.proj else x + return self.af(x_p + self.f(x)) + + +class AnyStage(nn.Module): + """AnyNet stage (sequence of blocks w/ the same output shape).""" + + def __init__(self, w_in, w_out, stride, d, block_class, norm, activation_class, params): + super().__init__() + for i in range(d): + block = block_class(w_in, w_out, stride, norm, activation_class, params) + self.add_module("b{}".format(i + 1), block) + stride, w_in = 1, w_out + + def forward(self, x): + for block in self.children(): + x = block(x) + return x + + +class AnyNet(Backbone): + """AnyNet model. See :paper:`dds`.""" + + def __init__( + self, + *, + stem_class, + stem_width, + block_class, + depths, + widths, + group_widths, + strides, + bottleneck_ratios, + se_ratio, + activation_class, + freeze_at=0, + norm="BN", + out_features=None, + ): + """ + Args: + stem_class (callable): A callable taking 4 arguments (channels in, channels out, + normalization, callable returning an activation function) that returns another + callable implementing the stem module. + stem_width (int): The number of output channels that the stem produces. + block_class (callable): A callable taking 6 arguments (channels in, channels out, + stride, normalization, callable returning an activation function, a dict of + block-specific parameters) that returns another callable implementing the repeated + block module. + depths (list[int]): Number of blocks in each stage. + widths (list[int]): For each stage, the number of output channels of each block. + group_widths (list[int]): For each stage, the number of channels per group in group + convolution, if the block uses group convolution. + strides (list[int]): The stride that each network stage applies to its input. + bottleneck_ratios (list[float]): For each stage, the ratio of the number of bottleneck + channels to the number of block input channels (or, equivalently, output channels), + if the block uses a bottleneck. + se_ratio (float): The ratio of the number of channels used inside the squeeze-excitation + (SE) module to it number of input channels, if SE the block uses SE. + activation_class (callable): A callable taking no arguments that returns another + callable implementing an activation function. + freeze_at (int): The number of stages at the beginning to freeze. + see :meth:`freeze` for detailed explanation. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + out_features (list[str]): name of the layers whose outputs should + be returned in forward. RegNet's use "stem" and "s1", "s2", etc for the stages after + the stem. If None, will return the output of the last layer. + """ + super().__init__() + self.stem = stem_class(3, stem_width, norm, activation_class) + + current_stride = self.stem.stride + self._out_feature_strides = {"stem": current_stride} + self._out_feature_channels = {"stem": self.stem.out_channels} + self.stages_and_names = [] + prev_w = stem_width + + for i, (d, w, s, b, g) in enumerate( + zip(depths, widths, strides, bottleneck_ratios, group_widths) + ): + params = {"bot_mul": b, "group_w": g, "se_r": se_ratio} + stage = AnyStage(prev_w, w, s, d, block_class, norm, activation_class, params) + name = "s{}".format(i + 1) + self.add_module(name, stage) + self.stages_and_names.append((stage, name)) + self._out_feature_strides[name] = current_stride = int( + current_stride * np.prod([k.stride for k in stage.children()]) + ) + self._out_feature_channels[name] = list(stage.children())[-1].out_channels + prev_w = w + + self.apply(init_weights) + + if out_features is None: + out_features = [name] + self._out_features = out_features + assert len(self._out_features) + children = [x[0] for x in self.named_children()] + for out_feature in self._out_features: + assert out_feature in children, "Available children: {} does not include {}".format( + ", ".join(children), out_feature + ) + self.freeze(freeze_at) + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + + Returns: + dict[str->Tensor]: names and the corresponding features + """ + assert x.dim() == 4, f"Model takes an input of shape (N, C, H, W). Got {x.shape} instead!" + outputs = {} + x = self.stem(x) + if "stem" in self._out_features: + outputs["stem"] = x + for stage, name in self.stages_and_names: + x = stage(x) + if name in self._out_features: + outputs[name] = x + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + def freeze(self, freeze_at=0): + """ + Freeze the first several stages of the model. Commonly used in fine-tuning. + + Layers that produce the same feature map spatial size are defined as one + "stage" by :paper:`FPN`. + + Args: + freeze_at (int): number of stages to freeze. + `1` means freezing the stem. `2` means freezing the stem and + one residual stage, etc. + + Returns: + nn.Module: this model itself + """ + if freeze_at >= 1: + self.stem.freeze() + for idx, (stage, _) in enumerate(self.stages_and_names, start=2): + if freeze_at >= idx: + for block in stage.children(): + block.freeze() + return self + + +def adjust_block_compatibility(ws, bs, gs): + """Adjusts the compatibility of widths, bottlenecks, and groups.""" + assert len(ws) == len(bs) == len(gs) + assert all(w > 0 and b > 0 and g > 0 for w, b, g in zip(ws, bs, gs)) + vs = [int(max(1, w * b)) for w, b in zip(ws, bs)] + gs = [int(min(g, v)) for g, v in zip(gs, vs)] + ms = [np.lcm(g, b) if b > 1 else g for g, b in zip(gs, bs)] + vs = [max(m, int(round(v / m) * m)) for v, m in zip(vs, ms)] + ws = [int(v / b) for v, b in zip(vs, bs)] + assert all(w * b % g == 0 for w, b, g in zip(ws, bs, gs)) + return ws, bs, gs + + +def generate_regnet_parameters(w_a, w_0, w_m, d, q=8): + """Generates per stage widths and depths from RegNet parameters.""" + assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 + # Generate continuous per-block ws + ws_cont = np.arange(d) * w_a + w_0 + # Generate quantized per-block ws + ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) + ws_all = w_0 * np.power(w_m, ks) + ws_all = np.round(np.divide(ws_all, q)).astype(int) * q + # Generate per stage ws and ds (assumes ws_all are sorted) + ws, ds = np.unique(ws_all, return_counts=True) + # Compute number of actual stages and total possible stages + num_stages, total_stages = len(ws), ks.max() + 1 + # Convert numpy arrays to lists and return + ws, ds, ws_all, ws_cont = (x.tolist() for x in (ws, ds, ws_all, ws_cont)) + return ws, ds, num_stages, total_stages, ws_all, ws_cont + + +class RegNet(AnyNet): + """RegNet model. See :paper:`dds`.""" + + def __init__( + self, + *, + stem_class, + stem_width, + block_class, + depth, + w_a, + w_0, + w_m, + group_width, + stride=2, + bottleneck_ratio=1.0, + se_ratio=0.0, + activation_class=None, + freeze_at=0, + norm="BN", + out_features=None, + ): + """ + Build a RegNet from the parameterization described in :paper:`dds` Section 3.3. + + Args: + See :class:`AnyNet` for arguments that are not listed here. + depth (int): Total number of blocks in the RegNet. + w_a (float): Factor by which block width would increase prior to quantizing block widths + by stage. See :paper:`dds` Section 3.3. + w_0 (int): Initial block width. See :paper:`dds` Section 3.3. + w_m (float): Parameter controlling block width quantization. + See :paper:`dds` Section 3.3. + group_width (int): Number of channels per group in group convolution, if the block uses + group convolution. + bottleneck_ratio (float): The ratio of the number of bottleneck channels to the number + of block input channels (or, equivalently, output channels), if the block uses a + bottleneck. + stride (int): The stride that each network stage applies to its input. + """ + ws, ds = generate_regnet_parameters(w_a, w_0, w_m, depth)[0:2] + ss = [stride for _ in ws] + bs = [bottleneck_ratio for _ in ws] + gs = [group_width for _ in ws] + ws, bs, gs = adjust_block_compatibility(ws, bs, gs) + + def default_activation_class(): + return nn.ReLU(inplace=True) + + super().__init__( + stem_class=stem_class, + stem_width=stem_width, + block_class=block_class, + depths=ds, + widths=ws, + strides=ss, + group_widths=gs, + bottleneck_ratios=bs, + se_ratio=se_ratio, + activation_class=default_activation_class + if activation_class is None + else activation_class, + freeze_at=freeze_at, + norm=norm, + out_features=out_features, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/resnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..34d6edf2e2ec3515ed1a395658ded85c280000b0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/resnet.py @@ -0,0 +1,694 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +import fvcore.nn.weight_init as weight_init +import torch +import torch.nn.functional as F +from torch import nn + +from annotator.oneformer.detectron2.layers import ( + CNNBlockBase, + Conv2d, + DeformConv, + ModulatedDeformConv, + ShapeSpec, + get_norm, +) + +from .backbone import Backbone +from .build import BACKBONE_REGISTRY + +__all__ = [ + "ResNetBlockBase", + "BasicBlock", + "BottleneckBlock", + "DeformBottleneckBlock", + "BasicStem", + "ResNet", + "make_stage", + "build_resnet_backbone", +] + + +class BasicBlock(CNNBlockBase): + """ + The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`, + with two 3x3 conv layers and a projection shortcut if needed. + """ + + def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"): + """ + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + stride (int): Stride for the first conv. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + """ + super().__init__(in_channels, out_channels, stride) + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + self.conv1 = Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + self.conv2 = Conv2d( + out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + out = self.conv2(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class BottleneckBlock(CNNBlockBase): + """ + The standard bottleneck residual block used by ResNet-50, 101 and 152 + defined in :paper:`ResNet`. It contains 3 conv layers with kernels + 1x1, 3x3, 1x1, and a projection shortcut if needed. + """ + + def __init__( + self, + in_channels, + out_channels, + *, + bottleneck_channels, + stride=1, + num_groups=1, + norm="BN", + stride_in_1x1=False, + dilation=1, + ): + """ + Args: + bottleneck_channels (int): number of output channels for the 3x3 + "bottleneck" conv layers. + num_groups (int): number of groups for the 3x3 conv layer. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + stride_in_1x1 (bool): when stride>1, whether to put stride in the + first 1x1 convolution or the bottleneck 3x3 convolution. + dilation (int): the dilation rate of the 3x3 conv layer. + """ + super().__init__(in_channels, out_channels, stride) + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + # The original MSRA ResNet models have stride in the first 1x1 conv + # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have + # stride in the 3x3 conv + stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) + + self.conv1 = Conv2d( + in_channels, + bottleneck_channels, + kernel_size=1, + stride=stride_1x1, + bias=False, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv2 = Conv2d( + bottleneck_channels, + bottleneck_channels, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + bias=False, + groups=num_groups, + dilation=dilation, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv3 = Conv2d( + bottleneck_channels, + out_channels, + kernel_size=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + # Zero-initialize the last normalization in each residual branch, + # so that at the beginning, the residual branch starts with zeros, + # and each residual block behaves like an identity. + # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": + # "For BN layers, the learnable scaling coefficient γ is initialized + # to be 1, except for each residual block's last BN + # where γ is initialized to be 0." + + # nn.init.constant_(self.conv3.norm.weight, 0) + # TODO this somehow hurts performance when training GN models from scratch. + # Add it as an option when we need to use this code to train a backbone. + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + + out = self.conv2(out) + out = F.relu_(out) + + out = self.conv3(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class DeformBottleneckBlock(CNNBlockBase): + """ + Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv ` + in the 3x3 convolution. + """ + + def __init__( + self, + in_channels, + out_channels, + *, + bottleneck_channels, + stride=1, + num_groups=1, + norm="BN", + stride_in_1x1=False, + dilation=1, + deform_modulated=False, + deform_num_groups=1, + ): + super().__init__(in_channels, out_channels, stride) + self.deform_modulated = deform_modulated + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) + + self.conv1 = Conv2d( + in_channels, + bottleneck_channels, + kernel_size=1, + stride=stride_1x1, + bias=False, + norm=get_norm(norm, bottleneck_channels), + ) + + if deform_modulated: + deform_conv_op = ModulatedDeformConv + # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size + offset_channels = 27 + else: + deform_conv_op = DeformConv + offset_channels = 18 + + self.conv2_offset = Conv2d( + bottleneck_channels, + offset_channels * deform_num_groups, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + dilation=dilation, + ) + self.conv2 = deform_conv_op( + bottleneck_channels, + bottleneck_channels, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + bias=False, + groups=num_groups, + dilation=dilation, + deformable_groups=deform_num_groups, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv3 = Conv2d( + bottleneck_channels, + out_channels, + kernel_size=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + nn.init.constant_(self.conv2_offset.weight, 0) + nn.init.constant_(self.conv2_offset.bias, 0) + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + + if self.deform_modulated: + offset_mask = self.conv2_offset(out) + offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) + offset = torch.cat((offset_x, offset_y), dim=1) + mask = mask.sigmoid() + out = self.conv2(out, offset, mask) + else: + offset = self.conv2_offset(out) + out = self.conv2(out, offset) + out = F.relu_(out) + + out = self.conv3(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class BasicStem(CNNBlockBase): + """ + The standard ResNet stem (layers before the first residual block), + with a conv, relu and max_pool. + """ + + def __init__(self, in_channels=3, out_channels=64, norm="BN"): + """ + Args: + norm (str or callable): norm after the first conv layer. + See :func:`layers.get_norm` for supported format. + """ + super().__init__(in_channels, out_channels, 4) + self.in_channels = in_channels + self.conv1 = Conv2d( + in_channels, + out_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False, + norm=get_norm(norm, out_channels), + ) + weight_init.c2_msra_fill(self.conv1) + + def forward(self, x): + x = self.conv1(x) + x = F.relu_(x) + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + return x + + +class ResNet(Backbone): + """ + Implement :paper:`ResNet`. + """ + + def __init__(self, stem, stages, num_classes=None, out_features=None, freeze_at=0): + """ + Args: + stem (nn.Module): a stem module + stages (list[list[CNNBlockBase]]): several (typically 4) stages, + each contains multiple :class:`CNNBlockBase`. + num_classes (None or int): if None, will not perform classification. + Otherwise, will create a linear layer. + out_features (list[str]): name of the layers whose outputs should + be returned in forward. Can be anything in "stem", "linear", or "res2" ... + If None, will return the output of the last layer. + freeze_at (int): The number of stages at the beginning to freeze. + see :meth:`freeze` for detailed explanation. + """ + super().__init__() + self.stem = stem + self.num_classes = num_classes + + current_stride = self.stem.stride + self._out_feature_strides = {"stem": current_stride} + self._out_feature_channels = {"stem": self.stem.out_channels} + + self.stage_names, self.stages = [], [] + + if out_features is not None: + # Avoid keeping unused layers in this module. They consume extra memory + # and may cause allreduce to fail + num_stages = max( + [{"res2": 1, "res3": 2, "res4": 3, "res5": 4}.get(f, 0) for f in out_features] + ) + stages = stages[:num_stages] + for i, blocks in enumerate(stages): + assert len(blocks) > 0, len(blocks) + for block in blocks: + assert isinstance(block, CNNBlockBase), block + + name = "res" + str(i + 2) + stage = nn.Sequential(*blocks) + + self.add_module(name, stage) + self.stage_names.append(name) + self.stages.append(stage) + + self._out_feature_strides[name] = current_stride = int( + current_stride * np.prod([k.stride for k in blocks]) + ) + self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels + self.stage_names = tuple(self.stage_names) # Make it static for scripting + + if num_classes is not None: + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.linear = nn.Linear(curr_channels, num_classes) + + # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": + # "The 1000-way fully-connected layer is initialized by + # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." + nn.init.normal_(self.linear.weight, std=0.01) + name = "linear" + + if out_features is None: + out_features = [name] + self._out_features = out_features + assert len(self._out_features) + children = [x[0] for x in self.named_children()] + for out_feature in self._out_features: + assert out_feature in children, "Available children: {}".format(", ".join(children)) + self.freeze(freeze_at) + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + + Returns: + dict[str->Tensor]: names and the corresponding features + """ + assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!" + outputs = {} + x = self.stem(x) + if "stem" in self._out_features: + outputs["stem"] = x + for name, stage in zip(self.stage_names, self.stages): + x = stage(x) + if name in self._out_features: + outputs[name] = x + if self.num_classes is not None: + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.linear(x) + if "linear" in self._out_features: + outputs["linear"] = x + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + def freeze(self, freeze_at=0): + """ + Freeze the first several stages of the ResNet. Commonly used in + fine-tuning. + + Layers that produce the same feature map spatial size are defined as one + "stage" by :paper:`FPN`. + + Args: + freeze_at (int): number of stages to freeze. + `1` means freezing the stem. `2` means freezing the stem and + one residual stage, etc. + + Returns: + nn.Module: this ResNet itself + """ + if freeze_at >= 1: + self.stem.freeze() + for idx, stage in enumerate(self.stages, start=2): + if freeze_at >= idx: + for block in stage.children(): + block.freeze() + return self + + @staticmethod + def make_stage(block_class, num_blocks, *, in_channels, out_channels, **kwargs): + """ + Create a list of blocks of the same type that forms one ResNet stage. + + Args: + block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this + stage. A module of this type must not change spatial resolution of inputs unless its + stride != 1. + num_blocks (int): number of blocks in this stage + in_channels (int): input channels of the entire stage. + out_channels (int): output channels of **every block** in the stage. + kwargs: other arguments passed to the constructor of + `block_class`. If the argument name is "xx_per_block", the + argument is a list of values to be passed to each block in the + stage. Otherwise, the same argument is passed to every block + in the stage. + + Returns: + list[CNNBlockBase]: a list of block module. + + Examples: + :: + stage = ResNet.make_stage( + BottleneckBlock, 3, in_channels=16, out_channels=64, + bottleneck_channels=16, num_groups=1, + stride_per_block=[2, 1, 1], + dilations_per_block=[1, 1, 2] + ) + + Usually, layers that produce the same feature map spatial size are defined as one + "stage" (in :paper:`FPN`). Under such definition, ``stride_per_block[1:]`` should + all be 1. + """ + blocks = [] + for i in range(num_blocks): + curr_kwargs = {} + for k, v in kwargs.items(): + if k.endswith("_per_block"): + assert len(v) == num_blocks, ( + f"Argument '{k}' of make_stage should have the " + f"same length as num_blocks={num_blocks}." + ) + newk = k[: -len("_per_block")] + assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!" + curr_kwargs[newk] = v[i] + else: + curr_kwargs[k] = v + + blocks.append( + block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs) + ) + in_channels = out_channels + return blocks + + @staticmethod + def make_default_stages(depth, block_class=None, **kwargs): + """ + Created list of ResNet stages from pre-defined depth (one of 18, 34, 50, 101, 152). + If it doesn't create the ResNet variant you need, please use :meth:`make_stage` + instead for fine-grained customization. + + Args: + depth (int): depth of ResNet + block_class (type): the CNN block class. Has to accept + `bottleneck_channels` argument for depth > 50. + By default it is BasicBlock or BottleneckBlock, based on the + depth. + kwargs: + other arguments to pass to `make_stage`. Should not contain + stride and channels, as they are predefined for each depth. + + Returns: + list[list[CNNBlockBase]]: modules in all stages; see arguments of + :class:`ResNet.__init__`. + """ + num_blocks_per_stage = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + }[depth] + if block_class is None: + block_class = BasicBlock if depth < 50 else BottleneckBlock + if depth < 50: + in_channels = [64, 64, 128, 256] + out_channels = [64, 128, 256, 512] + else: + in_channels = [64, 256, 512, 1024] + out_channels = [256, 512, 1024, 2048] + ret = [] + for (n, s, i, o) in zip(num_blocks_per_stage, [1, 2, 2, 2], in_channels, out_channels): + if depth >= 50: + kwargs["bottleneck_channels"] = o // 4 + ret.append( + ResNet.make_stage( + block_class=block_class, + num_blocks=n, + stride_per_block=[s] + [1] * (n - 1), + in_channels=i, + out_channels=o, + **kwargs, + ) + ) + return ret + + +ResNetBlockBase = CNNBlockBase +""" +Alias for backward compatibiltiy. +""" + + +def make_stage(*args, **kwargs): + """ + Deprecated alias for backward compatibiltiy. + """ + return ResNet.make_stage(*args, **kwargs) + + +@BACKBONE_REGISTRY.register() +def build_resnet_backbone(cfg, input_shape): + """ + Create a ResNet instance from config. + + Returns: + ResNet: a :class:`ResNet` instance. + """ + # need registration of new blocks/stems? + norm = cfg.MODEL.RESNETS.NORM + stem = BasicStem( + in_channels=input_shape.channels, + out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, + norm=norm, + ) + + # fmt: off + freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT + out_features = cfg.MODEL.RESNETS.OUT_FEATURES + depth = cfg.MODEL.RESNETS.DEPTH + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + bottleneck_channels = num_groups * width_per_group + in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS + out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 + res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION + deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE + deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED + deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS + # fmt: on + assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) + + num_blocks_per_stage = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + }[depth] + + if depth in [18, 34]: + assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34" + assert not any( + deform_on_per_stage + ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34" + assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34" + assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34" + + stages = [] + + for idx, stage_idx in enumerate(range(2, 6)): + # res5_dilation is used this way as a convention in R-FCN & Deformable Conv paper + dilation = res5_dilation if stage_idx == 5 else 1 + first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 + stage_kargs = { + "num_blocks": num_blocks_per_stage[idx], + "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), + "in_channels": in_channels, + "out_channels": out_channels, + "norm": norm, + } + # Use BasicBlock for R18 and R34. + if depth in [18, 34]: + stage_kargs["block_class"] = BasicBlock + else: + stage_kargs["bottleneck_channels"] = bottleneck_channels + stage_kargs["stride_in_1x1"] = stride_in_1x1 + stage_kargs["dilation"] = dilation + stage_kargs["num_groups"] = num_groups + if deform_on_per_stage[idx]: + stage_kargs["block_class"] = DeformBottleneckBlock + stage_kargs["deform_modulated"] = deform_modulated + stage_kargs["deform_num_groups"] = deform_num_groups + else: + stage_kargs["block_class"] = BottleneckBlock + blocks = ResNet.make_stage(**stage_kargs) + in_channels = out_channels + out_channels *= 2 + bottleneck_channels *= 2 + stages.append(blocks) + return ResNet(stem, stages, out_features=out_features, freeze_at=freeze_at) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/swin.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/swin.py new file mode 100644 index 0000000000000000000000000000000000000000..d5a651d6f4d2933e8f329bd13c04286488f25753 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/swin.py @@ -0,0 +1,695 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Implementation of Swin models from :paper:`swin`. + +This code is adapted from https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/master/mmdet/models/backbones/swin_transformer.py with minimal modifications. # noqa +-------------------------------------------------------- +Swin Transformer +Copyright (c) 2021 Microsoft +Licensed under The MIT License [see LICENSE for details] +Written by Ze Liu, Yutong Lin, Yixuan Wei +-------------------------------------------------------- +LICENSE: https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/461e003166a8083d0b620beacd4662a2df306bd6/LICENSE +""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint + +from annotator.oneformer.detectron2.modeling.backbone.backbone import Backbone + +_to_2tuple = nn.modules.utils._ntuple(2) + + +class Mlp(nn.Module): + """Multilayer perceptron.""" + + def __init__( + self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0 + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + """Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. + Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + nn.init.trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """Forward function. + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = ( + self.qkv(x) + .reshape(B_, N, 3, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1 + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SwinTransformerBlock(nn.Module): + """Swin Transformer Block. + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__( + self, + dim, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=_to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + + if drop_path > 0.0: + from timm.models.layers import DropPath + + self.drop_path = DropPath(drop_path) + else: + self.drop_path = nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop + ) + + self.H = None + self.W = None + + def forward(self, x, mask_matrix): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + mask_matrix: Attention mask for cyclic shift. + """ + B, L, C = x.shape + H, W = self.H, self.W + assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + attn_mask = mask_matrix + else: + shifted_x = x + attn_mask = None + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nW*B, window_size, window_size, C + x_windows = x_windows.view( + -1, self.window_size * self.window_size, C + ) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class PatchMerging(nn.Module): + """Patch Merging Layer + Args: + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x, H, W): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + + x = x.view(B, H, W, C) + + # padding + pad_input = (H % 2 == 1) or (W % 2 == 1) + if pad_input: + x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of feature channels + depth (int): Depths of this stage. + num_heads (int): Number of attention head. + window_size (int): Local window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. + Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + dim, + depth, + num_heads, + window_size=7, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + ): + super().__init__() + self.window_size = window_size + self.shift_size = window_size // 2 + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim=dim, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, H, W): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + + # calculate attention mask for SW-MSA + Hp = int(np.ceil(H / self.window_size)) * self.window_size + Wp = int(np.ceil(W / self.window_size)) * self.window_size + img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( + attn_mask == 0, float(0.0) + ) + + for blk in self.blocks: + blk.H, blk.W = H, W + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, attn_mask) + else: + x = blk(x, attn_mask) + if self.downsample is not None: + x_down = self.downsample(x, H, W) + Wh, Ww = (H + 1) // 2, (W + 1) // 2 + return x, H, W, x_down, Wh, Ww + else: + return x, H, W, x, H, W + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding + Args: + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + patch_size = _to_2tuple(patch_size) + self.patch_size = patch_size + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + """Forward function.""" + # padding + _, _, H, W = x.size() + if W % self.patch_size[1] != 0: + x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) + if H % self.patch_size[0] != 0: + x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) + + x = self.proj(x) # B C Wh Ww + if self.norm is not None: + Wh, Ww = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) + + return x + + +class SwinTransformer(Backbone): + """Swin Transformer backbone. + A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted + Windows` - https://arxiv.org/pdf/2103.14030 + Args: + pretrain_img_size (int): Input image size for training the pretrained model, + used in absolute postion embedding. Default 224. + patch_size (int | tuple(int)): Patch size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + depths (tuple[int]): Depths of each Swin Transformer stage. + num_heads (tuple[int]): Number of attention head of each stage. + window_size (int): Window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): Dropout rate. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Default: 0.2. + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False. + patch_norm (bool): If True, add normalization after patch embedding. Default: True. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + pretrain_img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + depths=(2, 2, 6, 2), + num_heads=(3, 6, 12, 24), + window_size=7, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + use_checkpoint=False, + ): + super().__init__() + + self.pretrain_img_size = pretrain_img_size + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.out_indices = out_indices + self.frozen_stages = frozen_stages + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + patch_size=patch_size, + in_chans=in_chans, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + + # absolute position embedding + if self.ape: + pretrain_img_size = _to_2tuple(pretrain_img_size) + patch_size = _to_2tuple(patch_size) + patches_resolution = [ + pretrain_img_size[0] // patch_size[0], + pretrain_img_size[1] // patch_size[1], + ] + + self.absolute_pos_embed = nn.Parameter( + torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]) + ) + nn.init.trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2**i_layer), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint, + ) + self.layers.append(layer) + + num_features = [int(embed_dim * 2**i) for i in range(self.num_layers)] + self.num_features = num_features + + # add a norm layer for each output + for i_layer in out_indices: + layer = norm_layer(num_features[i_layer]) + layer_name = f"norm{i_layer}" + self.add_module(layer_name, layer) + + self._freeze_stages() + self._out_features = ["p{}".format(i) for i in self.out_indices] + self._out_feature_channels = { + "p{}".format(i): self.embed_dim * 2**i for i in self.out_indices + } + self._out_feature_strides = {"p{}".format(i): 2 ** (i + 2) for i in self.out_indices} + self._size_devisibility = 32 + + self.apply(self._init_weights) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + + if self.frozen_stages >= 1 and self.ape: + self.absolute_pos_embed.requires_grad = False + + if self.frozen_stages >= 2: + self.pos_drop.eval() + for i in range(0, self.frozen_stages - 1): + m = self.layers[i] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + nn.init.trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @property + def size_divisibility(self): + return self._size_divisibility + + def forward(self, x): + """Forward function.""" + x = self.patch_embed(x) + + Wh, Ww = x.size(2), x.size(3) + if self.ape: + # interpolate the position embedding to the corresponding size + absolute_pos_embed = F.interpolate( + self.absolute_pos_embed, size=(Wh, Ww), mode="bicubic" + ) + x = (x + absolute_pos_embed).flatten(2).transpose(1, 2) # B Wh*Ww C + else: + x = x.flatten(2).transpose(1, 2) + x = self.pos_drop(x) + + outs = {} + for i in range(self.num_layers): + layer = self.layers[i] + x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) + + if i in self.out_indices: + norm_layer = getattr(self, f"norm{i}") + x_out = norm_layer(x_out) + + out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() + outs["p{}".format(i)] = out + + return outs diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2b89a4c3fbe079a77fd0cef947cf9ada787fc55d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/utils.py @@ -0,0 +1,186 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = [ + "window_partition", + "window_unpartition", + "add_decomposed_rel_pos", + "get_abs_pos", + "PatchEmbed", +] + + +def window_partition(x, window_size): + """ + Partition into non-overlapping windows with padding if needed. + Args: + x (tensor): input tokens with [B, H, W, C]. + window_size (int): window size. + + Returns: + windows: windows after partition with [B * num_windows, window_size, window_size, C]. + (Hp, Wp): padded height and width before partition + """ + B, H, W, C = x.shape + + pad_h = (window_size - H % window_size) % window_size + pad_w = (window_size - W % window_size) % window_size + if pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) + Hp, Wp = H + pad_h, W + pad_w + + x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows, (Hp, Wp) + + +def window_unpartition(windows, window_size, pad_hw, hw): + """ + Window unpartition into original sequences and removing padding. + Args: + x (tensor): input tokens with [B * num_windows, window_size, window_size, C]. + window_size (int): window size. + pad_hw (Tuple): padded height and width (Hp, Wp). + hw (Tuple): original height and width (H, W) before padding. + + Returns: + x: unpartitioned sequences with [B, H, W, C]. + """ + Hp, Wp = pad_hw + H, W = hw + B = windows.shape[0] // (Hp * Wp // window_size // window_size) + x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) + + if Hp > H or Wp > W: + x = x[:, :H, :W, :].contiguous() + return x + + +def get_rel_pos(q_size, k_size, rel_pos): + """ + Get relative positional embeddings according to the relative positions of + query and key sizes. + Args: + q_size (int): size of query q. + k_size (int): size of key k. + rel_pos (Tensor): relative position embeddings (L, C). + + Returns: + Extracted positional embeddings according to relative positions. + """ + max_rel_dist = int(2 * max(q_size, k_size) - 1) + # Interpolate rel pos if needed. + if rel_pos.shape[0] != max_rel_dist: + # Interpolate rel pos. + rel_pos_resized = F.interpolate( + rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1), + size=max_rel_dist, + mode="linear", + ) + rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0) + else: + rel_pos_resized = rel_pos + + # Scale the coords with short length if shapes for q and k are different. + q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0) + k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0) + relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0) + + return rel_pos_resized[relative_coords.long()] + + +def add_decomposed_rel_pos(attn, q, rel_pos_h, rel_pos_w, q_size, k_size): + """ + Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`. + https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950 + Args: + attn (Tensor): attention map. + q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C). + rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis. + rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis. + q_size (Tuple): spatial sequence size of query q with (q_h, q_w). + k_size (Tuple): spatial sequence size of key k with (k_h, k_w). + + Returns: + attn (Tensor): attention map with added relative positional embeddings. + """ + q_h, q_w = q_size + k_h, k_w = k_size + Rh = get_rel_pos(q_h, k_h, rel_pos_h) + Rw = get_rel_pos(q_w, k_w, rel_pos_w) + + B, _, dim = q.shape + r_q = q.reshape(B, q_h, q_w, dim) + rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, Rh) + rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, Rw) + + attn = ( + attn.view(B, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] + rel_w[:, :, :, None, :] + ).view(B, q_h * q_w, k_h * k_w) + + return attn + + +def get_abs_pos(abs_pos, has_cls_token, hw): + """ + Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token + dimension for the original embeddings. + Args: + abs_pos (Tensor): absolute positional embeddings with (1, num_position, C). + has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token. + hw (Tuple): size of input image tokens. + + Returns: + Absolute positional embeddings after processing with shape (1, H, W, C) + """ + h, w = hw + if has_cls_token: + abs_pos = abs_pos[:, 1:] + xy_num = abs_pos.shape[1] + size = int(math.sqrt(xy_num)) + assert size * size == xy_num + + if size != h or size != w: + new_abs_pos = F.interpolate( + abs_pos.reshape(1, size, size, -1).permute(0, 3, 1, 2), + size=(h, w), + mode="bicubic", + align_corners=False, + ) + + return new_abs_pos.permute(0, 2, 3, 1) + else: + return abs_pos.reshape(1, h, w, -1) + + +class PatchEmbed(nn.Module): + """ + Image to Patch Embedding. + """ + + def __init__( + self, kernel_size=(16, 16), stride=(16, 16), padding=(0, 0), in_chans=3, embed_dim=768 + ): + """ + Args: + kernel_size (Tuple): kernel size of the projection layer. + stride (Tuple): stride of the projection layer. + padding (Tuple): padding size of the projection layer. + in_chans (int): Number of input image channels. + embed_dim (int): embed_dim (int): Patch embedding dimension. + """ + super().__init__() + + self.proj = nn.Conv2d( + in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding + ) + + def forward(self, x): + x = self.proj(x) + # B C H W -> B H W C + x = x.permute(0, 2, 3, 1) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/vit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..07b5e2073ae80859be59d1142394929b504cf427 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/backbone/vit.py @@ -0,0 +1,524 @@ +import logging +import math +import fvcore.nn.weight_init as weight_init +import torch +import torch.nn as nn + +from annotator.oneformer.detectron2.layers import CNNBlockBase, Conv2d, get_norm +from annotator.oneformer.detectron2.modeling.backbone.fpn import _assert_strides_are_log2_contiguous + +from .backbone import Backbone +from .utils import ( + PatchEmbed, + add_decomposed_rel_pos, + get_abs_pos, + window_partition, + window_unpartition, +) + +logger = logging.getLogger(__name__) + + +__all__ = ["ViT", "SimpleFeaturePyramid", "get_vit_lr_decay_rate"] + + +class Attention(nn.Module): + """Multi-head Attention block with relative position embeddings.""" + + def __init__( + self, + dim, + num_heads=8, + qkv_bias=True, + use_rel_pos=False, + rel_pos_zero_init=True, + input_size=None, + ): + """ + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + qkv_bias (bool: If True, add a learnable bias to query, key, value. + rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + input_size (int or None): Input resolution for calculating the relative positional + parameter size. + """ + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.proj = nn.Linear(dim, dim) + + self.use_rel_pos = use_rel_pos + if self.use_rel_pos: + # initialize relative positional embeddings + self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) + self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) + + if not rel_pos_zero_init: + nn.init.trunc_normal_(self.rel_pos_h, std=0.02) + nn.init.trunc_normal_(self.rel_pos_w, std=0.02) + + def forward(self, x): + B, H, W, _ = x.shape + # qkv with shape (3, B, nHead, H * W, C) + qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + # q, k, v with shape (B * nHead, H * W, C) + q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0) + + attn = (q * self.scale) @ k.transpose(-2, -1) + + if self.use_rel_pos: + attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W)) + + attn = attn.softmax(dim=-1) + x = (attn @ v).view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) + x = self.proj(x) + + return x + + +class ResBottleneckBlock(CNNBlockBase): + """ + The standard bottleneck residual block without the last activation layer. + It contains 3 conv layers with kernels 1x1, 3x3, 1x1. + """ + + def __init__( + self, + in_channels, + out_channels, + bottleneck_channels, + norm="LN", + act_layer=nn.GELU, + ): + """ + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + bottleneck_channels (int): number of output channels for the 3x3 + "bottleneck" conv layers. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + act_layer (callable): activation for all conv layers. + """ + super().__init__(in_channels, out_channels, 1) + + self.conv1 = Conv2d(in_channels, bottleneck_channels, 1, bias=False) + self.norm1 = get_norm(norm, bottleneck_channels) + self.act1 = act_layer() + + self.conv2 = Conv2d( + bottleneck_channels, + bottleneck_channels, + 3, + padding=1, + bias=False, + ) + self.norm2 = get_norm(norm, bottleneck_channels) + self.act2 = act_layer() + + self.conv3 = Conv2d(bottleneck_channels, out_channels, 1, bias=False) + self.norm3 = get_norm(norm, out_channels) + + for layer in [self.conv1, self.conv2, self.conv3]: + weight_init.c2_msra_fill(layer) + for layer in [self.norm1, self.norm2]: + layer.weight.data.fill_(1.0) + layer.bias.data.zero_() + # zero init last norm layer. + self.norm3.weight.data.zero_() + self.norm3.bias.data.zero_() + + def forward(self, x): + out = x + for layer in self.children(): + out = layer(out) + + out = x + out + return out + + +class Block(nn.Module): + """Transformer blocks with support of window attention and residual propagation blocks""" + + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=True, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + use_rel_pos=False, + rel_pos_zero_init=True, + window_size=0, + use_residual_block=False, + input_size=None, + ): + """ + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + drop_path (float): Stochastic depth rate. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. If it equals 0, then not + use window attention. + use_residual_block (bool): If True, use a residual block after the MLP block. + input_size (int or None): Input resolution for calculating the relative positional + parameter size. + """ + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + input_size=input_size if window_size == 0 else (window_size, window_size), + ) + + from timm.models.layers import DropPath, Mlp + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer) + + self.window_size = window_size + + self.use_residual_block = use_residual_block + if use_residual_block: + # Use a residual block with bottleneck channel as dim // 2 + self.residual = ResBottleneckBlock( + in_channels=dim, + out_channels=dim, + bottleneck_channels=dim // 2, + norm="LN", + act_layer=act_layer, + ) + + def forward(self, x): + shortcut = x + x = self.norm1(x) + # Window partition + if self.window_size > 0: + H, W = x.shape[1], x.shape[2] + x, pad_hw = window_partition(x, self.window_size) + + x = self.attn(x) + # Reverse window partition + if self.window_size > 0: + x = window_unpartition(x, self.window_size, pad_hw, (H, W)) + + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + if self.use_residual_block: + x = self.residual(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) + + return x + + +class ViT(Backbone): + """ + This module implements Vision Transformer (ViT) backbone in :paper:`vitdet`. + "Exploring Plain Vision Transformer Backbones for Object Detection", + https://arxiv.org/abs/2203.16527 + """ + + def __init__( + self, + img_size=1024, + patch_size=16, + in_chans=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4.0, + qkv_bias=True, + drop_path_rate=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + use_abs_pos=True, + use_rel_pos=False, + rel_pos_zero_init=True, + window_size=0, + window_block_indexes=(), + residual_block_indexes=(), + use_act_checkpoint=False, + pretrain_img_size=224, + pretrain_use_cls_token=True, + out_feature="last_feat", + ): + """ + Args: + img_size (int): Input image size. + patch_size (int): Patch size. + in_chans (int): Number of input image channels. + embed_dim (int): Patch embedding dimension. + depth (int): Depth of ViT. + num_heads (int): Number of attention heads in each ViT block. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. + drop_path_rate (float): Stochastic depth rate. + norm_layer (nn.Module): Normalization layer. + act_layer (nn.Module): Activation layer. + use_abs_pos (bool): If True, use absolute positional embeddings. + use_rel_pos (bool): If True, add relative positional embeddings to the attention map. + rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. + window_size (int): Window size for window attention blocks. + window_block_indexes (list): Indexes for blocks using window attention. + residual_block_indexes (list): Indexes for blocks using conv propagation. + use_act_checkpoint (bool): If True, use activation checkpointing. + pretrain_img_size (int): input image size for pretraining models. + pretrain_use_cls_token (bool): If True, pretrainig models use class token. + out_feature (str): name of the feature from the last block. + """ + super().__init__() + self.pretrain_use_cls_token = pretrain_use_cls_token + + self.patch_embed = PatchEmbed( + kernel_size=(patch_size, patch_size), + stride=(patch_size, patch_size), + in_chans=in_chans, + embed_dim=embed_dim, + ) + + if use_abs_pos: + # Initialize absolute positional embedding with pretrain image size. + num_patches = (pretrain_img_size // patch_size) * (pretrain_img_size // patch_size) + num_positions = (num_patches + 1) if pretrain_use_cls_token else num_patches + self.pos_embed = nn.Parameter(torch.zeros(1, num_positions, embed_dim)) + else: + self.pos_embed = None + + # stochastic depth decay rule + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] + + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop_path=dpr[i], + norm_layer=norm_layer, + act_layer=act_layer, + use_rel_pos=use_rel_pos, + rel_pos_zero_init=rel_pos_zero_init, + window_size=window_size if i in window_block_indexes else 0, + use_residual_block=i in residual_block_indexes, + input_size=(img_size // patch_size, img_size // patch_size), + ) + if use_act_checkpoint: + # TODO: use torch.utils.checkpoint + from fairscale.nn.checkpoint import checkpoint_wrapper + + block = checkpoint_wrapper(block) + self.blocks.append(block) + + self._out_feature_channels = {out_feature: embed_dim} + self._out_feature_strides = {out_feature: patch_size} + self._out_features = [out_feature] + + if self.pos_embed is not None: + nn.init.trunc_normal_(self.pos_embed, std=0.02) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + nn.init.trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, x): + x = self.patch_embed(x) + if self.pos_embed is not None: + x = x + get_abs_pos( + self.pos_embed, self.pretrain_use_cls_token, (x.shape[1], x.shape[2]) + ) + + for blk in self.blocks: + x = blk(x) + + outputs = {self._out_features[0]: x.permute(0, 3, 1, 2)} + return outputs + + +class SimpleFeaturePyramid(Backbone): + """ + This module implements SimpleFeaturePyramid in :paper:`vitdet`. + It creates pyramid features built on top of the input feature map. + """ + + def __init__( + self, + net, + in_feature, + out_channels, + scale_factors, + top_block=None, + norm="LN", + square_pad=0, + ): + """ + Args: + net (Backbone): module representing the subnetwork backbone. + Must be a subclass of :class:`Backbone`. + in_feature (str): names of the input feature maps coming + from the net. + out_channels (int): number of channels in the output feature maps. + scale_factors (list[float]): list of scaling factors to upsample or downsample + the input features for creating pyramid features. + top_block (nn.Module or None): if provided, an extra operation will + be performed on the output of the last (smallest resolution) + pyramid output, and the result will extend the result list. The top_block + further downsamples the feature map. It must have an attribute + "num_levels", meaning the number of extra pyramid levels added by + this block, and "in_feature", which is a string representing + its input feature (e.g., p5). + norm (str): the normalization to use. + square_pad (int): If > 0, require input images to be padded to specific square size. + """ + super(SimpleFeaturePyramid, self).__init__() + assert isinstance(net, Backbone) + + self.scale_factors = scale_factors + + input_shapes = net.output_shape() + strides = [int(input_shapes[in_feature].stride / scale) for scale in scale_factors] + _assert_strides_are_log2_contiguous(strides) + + dim = input_shapes[in_feature].channels + self.stages = [] + use_bias = norm == "" + for idx, scale in enumerate(scale_factors): + out_dim = dim + if scale == 4.0: + layers = [ + nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2), + get_norm(norm, dim // 2), + nn.GELU(), + nn.ConvTranspose2d(dim // 2, dim // 4, kernel_size=2, stride=2), + ] + out_dim = dim // 4 + elif scale == 2.0: + layers = [nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2)] + out_dim = dim // 2 + elif scale == 1.0: + layers = [] + elif scale == 0.5: + layers = [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + raise NotImplementedError(f"scale_factor={scale} is not supported yet.") + + layers.extend( + [ + Conv2d( + out_dim, + out_channels, + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + ), + Conv2d( + out_channels, + out_channels, + kernel_size=3, + padding=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + ), + ] + ) + layers = nn.Sequential(*layers) + + stage = int(math.log2(strides[idx])) + self.add_module(f"simfp_{stage}", layers) + self.stages.append(layers) + + self.net = net + self.in_feature = in_feature + self.top_block = top_block + # Return feature names are "p", like ["p2", "p3", ..., "p6"] + self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides} + # top block output feature maps. + if self.top_block is not None: + for s in range(stage, stage + self.top_block.num_levels): + self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) + + self._out_features = list(self._out_feature_strides.keys()) + self._out_feature_channels = {k: out_channels for k in self._out_features} + self._size_divisibility = strides[-1] + self._square_pad = square_pad + + @property + def padding_constraints(self): + return { + "size_divisiblity": self._size_divisibility, + "square_size": self._square_pad, + } + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + + Returns: + dict[str->Tensor]: + mapping from feature map name to pyramid feature map tensor + in high to low resolution order. Returned feature names follow the FPN + convention: "p", where stage has stride = 2 ** stage e.g., + ["p2", "p3", ..., "p6"]. + """ + bottom_up_features = self.net(x) + features = bottom_up_features[self.in_feature] + results = [] + + for stage in self.stages: + results.append(stage(features)) + + if self.top_block is not None: + if self.top_block.in_feature in bottom_up_features: + top_block_in_feature = bottom_up_features[self.top_block.in_feature] + else: + top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] + results.extend(self.top_block(top_block_in_feature)) + assert len(self._out_features) == len(results) + return {f: res for f, res in zip(self._out_features, results)} + + +def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12): + """ + Calculate lr decay rate for different ViT blocks. + Args: + name (string): parameter name. + lr_decay_rate (float): base lr decay rate. + num_layers (int): number of ViT blocks. + + Returns: + lr decay rate for the given parameter. + """ + layer_id = num_layers + 1 + if name.startswith("backbone"): + if ".pos_embed" in name or ".patch_embed" in name: + layer_id = 0 + elif ".blocks." in name and ".residual." not in name: + layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1 + + return lr_decay_rate ** (num_layers + 1 - layer_id) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/box_regression.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/box_regression.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd5668d9a72edd34df4f458f90ac72553abb955 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/box_regression.py @@ -0,0 +1,369 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +from typing import List, Tuple, Union +import torch +from fvcore.nn import giou_loss, smooth_l1_loss +from torch.nn import functional as F + +from annotator.oneformer.detectron2.layers import cat, ciou_loss, diou_loss +from annotator.oneformer.detectron2.structures import Boxes + +# Value for clamping large dw and dh predictions. The heuristic is that we clamp +# such that dw and dh are no larger than what would transform a 16px box into a +# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px). +_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16) + + +__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated", "Box2BoxTransformLinear"] + + +@torch.jit.script +class Box2BoxTransform(object): + """ + The box-to-box transform defined in R-CNN. The transformation is parameterized + by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height + by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height). + """ + + def __init__( + self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP + ): + """ + Args: + weights (4-element tuple): Scaling factors that are applied to the + (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set + such that the deltas have unit variance; now they are treated as + hyperparameters of the system. + scale_clamp (float): When predicting deltas, the predicted box scaling + factors (dw and dh) are clamped such that they are <= scale_clamp. + """ + self.weights = weights + self.scale_clamp = scale_clamp + + def get_deltas(self, src_boxes, target_boxes): + """ + Get box regression transformation deltas (dx, dy, dw, dh) that can be used + to transform the `src_boxes` into the `target_boxes`. That is, the relation + ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless + any delta is too large and is clamped). + + Args: + src_boxes (Tensor): source boxes, e.g., object proposals + target_boxes (Tensor): target of the transformation, e.g., ground-truth + boxes. + """ + assert isinstance(src_boxes, torch.Tensor), type(src_boxes) + assert isinstance(target_boxes, torch.Tensor), type(target_boxes) + + src_widths = src_boxes[:, 2] - src_boxes[:, 0] + src_heights = src_boxes[:, 3] - src_boxes[:, 1] + src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths + src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights + + target_widths = target_boxes[:, 2] - target_boxes[:, 0] + target_heights = target_boxes[:, 3] - target_boxes[:, 1] + target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths + target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights + + wx, wy, ww, wh = self.weights + dx = wx * (target_ctr_x - src_ctr_x) / src_widths + dy = wy * (target_ctr_y - src_ctr_y) / src_heights + dw = ww * torch.log(target_widths / src_widths) + dh = wh * torch.log(target_heights / src_heights) + + deltas = torch.stack((dx, dy, dw, dh), dim=1) + assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!" + return deltas + + def apply_deltas(self, deltas, boxes): + """ + Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`. + + Args: + deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. + deltas[i] represents k potentially different class-specific + box transformations for the single box boxes[i]. + boxes (Tensor): boxes to transform, of shape (N, 4) + """ + deltas = deltas.float() # ensure fp32 for decoding precision + boxes = boxes.to(deltas.dtype) + + widths = boxes[:, 2] - boxes[:, 0] + heights = boxes[:, 3] - boxes[:, 1] + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights + dx = deltas[:, 0::4] / wx + dy = deltas[:, 1::4] / wy + dw = deltas[:, 2::4] / ww + dh = deltas[:, 3::4] / wh + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.scale_clamp) + dh = torch.clamp(dh, max=self.scale_clamp) + + pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] + pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + + x1 = pred_ctr_x - 0.5 * pred_w + y1 = pred_ctr_y - 0.5 * pred_h + x2 = pred_ctr_x + 0.5 * pred_w + y2 = pred_ctr_y + 0.5 * pred_h + pred_boxes = torch.stack((x1, y1, x2, y2), dim=-1) + return pred_boxes.reshape(deltas.shape) + + +@torch.jit.script +class Box2BoxTransformRotated(object): + """ + The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized + by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height + by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height), + and rotate a box's angle by da (radians). + Note: angles of deltas are in radians while angles of boxes are in degrees. + """ + + def __init__( + self, + weights: Tuple[float, float, float, float, float], + scale_clamp: float = _DEFAULT_SCALE_CLAMP, + ): + """ + Args: + weights (5-element tuple): Scaling factors that are applied to the + (dx, dy, dw, dh, da) deltas. These are treated as + hyperparameters of the system. + scale_clamp (float): When predicting deltas, the predicted box scaling + factors (dw and dh) are clamped such that they are <= scale_clamp. + """ + self.weights = weights + self.scale_clamp = scale_clamp + + def get_deltas(self, src_boxes, target_boxes): + """ + Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used + to transform the `src_boxes` into the `target_boxes`. That is, the relation + ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless + any delta is too large and is clamped). + + Args: + src_boxes (Tensor): Nx5 source boxes, e.g., object proposals + target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth + boxes. + """ + assert isinstance(src_boxes, torch.Tensor), type(src_boxes) + assert isinstance(target_boxes, torch.Tensor), type(target_boxes) + + src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1) + + target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind( + target_boxes, dim=1 + ) + + wx, wy, ww, wh, wa = self.weights + dx = wx * (target_ctr_x - src_ctr_x) / src_widths + dy = wy * (target_ctr_y - src_ctr_y) / src_heights + dw = ww * torch.log(target_widths / src_widths) + dh = wh * torch.log(target_heights / src_heights) + # Angles of deltas are in radians while angles of boxes are in degrees. + # the conversion to radians serve as a way to normalize the values + da = target_angles - src_angles + da = (da + 180.0) % 360.0 - 180.0 # make it in [-180, 180) + da *= wa * math.pi / 180.0 + + deltas = torch.stack((dx, dy, dw, dh, da), dim=1) + assert ( + (src_widths > 0).all().item() + ), "Input boxes to Box2BoxTransformRotated are not valid!" + return deltas + + def apply_deltas(self, deltas, boxes): + """ + Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`. + + Args: + deltas (Tensor): transformation deltas of shape (N, k*5). + deltas[i] represents box transformation for the single box boxes[i]. + boxes (Tensor): boxes to transform, of shape (N, 5) + """ + assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5 + + boxes = boxes.to(deltas.dtype).unsqueeze(2) + + ctr_x = boxes[:, 0] + ctr_y = boxes[:, 1] + widths = boxes[:, 2] + heights = boxes[:, 3] + angles = boxes[:, 4] + + wx, wy, ww, wh, wa = self.weights + + dx = deltas[:, 0::5] / wx + dy = deltas[:, 1::5] / wy + dw = deltas[:, 2::5] / ww + dh = deltas[:, 3::5] / wh + da = deltas[:, 4::5] / wa + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.scale_clamp) + dh = torch.clamp(dh, max=self.scale_clamp) + + pred_boxes = torch.zeros_like(deltas) + pred_boxes[:, 0::5] = dx * widths + ctr_x # x_ctr + pred_boxes[:, 1::5] = dy * heights + ctr_y # y_ctr + pred_boxes[:, 2::5] = torch.exp(dw) * widths # width + pred_boxes[:, 3::5] = torch.exp(dh) * heights # height + + # Following original RRPN implementation, + # angles of deltas are in radians while angles of boxes are in degrees. + pred_angle = da * 180.0 / math.pi + angles + pred_angle = (pred_angle + 180.0) % 360.0 - 180.0 # make it in [-180, 180) + + pred_boxes[:, 4::5] = pred_angle + + return pred_boxes + + +class Box2BoxTransformLinear(object): + """ + The linear box-to-box transform defined in FCOS. The transformation is parameterized + by the distance from the center of (square) src box to 4 edges of the target box. + """ + + def __init__(self, normalize_by_size=True): + """ + Args: + normalize_by_size: normalize deltas by the size of src (anchor) boxes. + """ + self.normalize_by_size = normalize_by_size + + def get_deltas(self, src_boxes, target_boxes): + """ + Get box regression transformation deltas (dx1, dy1, dx2, dy2) that can be used + to transform the `src_boxes` into the `target_boxes`. That is, the relation + ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true. + The center of src must be inside target boxes. + + Args: + src_boxes (Tensor): square source boxes, e.g., anchors + target_boxes (Tensor): target of the transformation, e.g., ground-truth + boxes. + """ + assert isinstance(src_boxes, torch.Tensor), type(src_boxes) + assert isinstance(target_boxes, torch.Tensor), type(target_boxes) + + src_ctr_x = 0.5 * (src_boxes[:, 0] + src_boxes[:, 2]) + src_ctr_y = 0.5 * (src_boxes[:, 1] + src_boxes[:, 3]) + + target_l = src_ctr_x - target_boxes[:, 0] + target_t = src_ctr_y - target_boxes[:, 1] + target_r = target_boxes[:, 2] - src_ctr_x + target_b = target_boxes[:, 3] - src_ctr_y + + deltas = torch.stack((target_l, target_t, target_r, target_b), dim=1) + if self.normalize_by_size: + stride_w = src_boxes[:, 2] - src_boxes[:, 0] + stride_h = src_boxes[:, 3] - src_boxes[:, 1] + strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) + deltas = deltas / strides + + return deltas + + def apply_deltas(self, deltas, boxes): + """ + Apply transformation `deltas` (dx1, dy1, dx2, dy2) to `boxes`. + + Args: + deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. + deltas[i] represents k potentially different class-specific + box transformations for the single box boxes[i]. + boxes (Tensor): boxes to transform, of shape (N, 4) + """ + # Ensure the output is a valid box. See Sec 2.1 of https://arxiv.org/abs/2006.09214 + deltas = F.relu(deltas) + boxes = boxes.to(deltas.dtype) + + ctr_x = 0.5 * (boxes[:, 0] + boxes[:, 2]) + ctr_y = 0.5 * (boxes[:, 1] + boxes[:, 3]) + if self.normalize_by_size: + stride_w = boxes[:, 2] - boxes[:, 0] + stride_h = boxes[:, 3] - boxes[:, 1] + strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) + deltas = deltas * strides + + l = deltas[:, 0::4] + t = deltas[:, 1::4] + r = deltas[:, 2::4] + b = deltas[:, 3::4] + + pred_boxes = torch.zeros_like(deltas) + pred_boxes[:, 0::4] = ctr_x[:, None] - l # x1 + pred_boxes[:, 1::4] = ctr_y[:, None] - t # y1 + pred_boxes[:, 2::4] = ctr_x[:, None] + r # x2 + pred_boxes[:, 3::4] = ctr_y[:, None] + b # y2 + return pred_boxes + + +def _dense_box_regression_loss( + anchors: List[Union[Boxes, torch.Tensor]], + box2box_transform: Box2BoxTransform, + pred_anchor_deltas: List[torch.Tensor], + gt_boxes: List[torch.Tensor], + fg_mask: torch.Tensor, + box_reg_loss_type="smooth_l1", + smooth_l1_beta=0.0, +): + """ + Compute loss for dense multi-level box regression. + Loss is accumulated over ``fg_mask``. + + Args: + anchors: #lvl anchor boxes, each is (HixWixA, 4) + pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4) + gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A)) + fg_mask: the foreground boolean mask of shape (N, R) to compute loss on + box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou", + "diou", "ciou". + smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to + use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" + """ + if isinstance(anchors[0], Boxes): + anchors = type(anchors[0]).cat(anchors).tensor # (R, 4) + else: + anchors = cat(anchors) + if box_reg_loss_type == "smooth_l1": + gt_anchor_deltas = [box2box_transform.get_deltas(anchors, k) for k in gt_boxes] + gt_anchor_deltas = torch.stack(gt_anchor_deltas) # (N, R, 4) + loss_box_reg = smooth_l1_loss( + cat(pred_anchor_deltas, dim=1)[fg_mask], + gt_anchor_deltas[fg_mask], + beta=smooth_l1_beta, + reduction="sum", + ) + elif box_reg_loss_type == "giou": + pred_boxes = [ + box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) + ] + loss_box_reg = giou_loss( + torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" + ) + elif box_reg_loss_type == "diou": + pred_boxes = [ + box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) + ] + loss_box_reg = diou_loss( + torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" + ) + elif box_reg_loss_type == "ciou": + pred_boxes = [ + box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) + ] + loss_box_reg = ciou_loss( + torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" + ) + else: + raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'") + return loss_box_reg diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/matcher.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..2504d17a4f9707d7cdd8d47a6cb5a2faf3c397fd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/matcher.py @@ -0,0 +1,127 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import List +import torch + +from annotator.oneformer.detectron2.layers import nonzero_tuple + + +# TODO: the name is too general +class Matcher(object): + """ + This class assigns to each predicted "element" (e.g., a box) a ground-truth + element. Each predicted element will have exactly zero or one matches; each + ground-truth element may be matched to zero or more predicted elements. + + The matching is determined by the MxN match_quality_matrix, that characterizes + how well each (ground-truth, prediction)-pair match each other. For example, + if the elements are boxes, this matrix may contain box intersection-over-union + overlap values. + + The matcher returns (a) a vector of length N containing the index of the + ground-truth element m in [0, M) that matches to prediction n in [0, N). + (b) a vector of length N containing the labels for each prediction. + """ + + def __init__( + self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False + ): + """ + Args: + thresholds (list): a list of thresholds used to stratify predictions + into levels. + labels (list): a list of values to label predictions belonging at + each level. A label can be one of {-1, 0, 1} signifying + {ignore, negative class, positive class}, respectively. + allow_low_quality_matches (bool): if True, produce additional matches + for predictions with maximum match quality lower than high_threshold. + See set_low_quality_matches_ for more details. + + For example, + thresholds = [0.3, 0.5] + labels = [0, -1, 1] + All predictions with iou < 0.3 will be marked with 0 and + thus will be considered as false positives while training. + All predictions with 0.3 <= iou < 0.5 will be marked with -1 and + thus will be ignored. + All predictions with 0.5 <= iou will be marked with 1 and + thus will be considered as true positives. + """ + # Add -inf and +inf to first and last position in thresholds + thresholds = thresholds[:] + assert thresholds[0] > 0 + thresholds.insert(0, -float("inf")) + thresholds.append(float("inf")) + # Currently torchscript does not support all + generator + assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) + assert all([l in [-1, 0, 1] for l in labels]) + assert len(labels) == len(thresholds) - 1 + self.thresholds = thresholds + self.labels = labels + self.allow_low_quality_matches = allow_low_quality_matches + + def __call__(self, match_quality_matrix): + """ + Args: + match_quality_matrix (Tensor[float]): an MxN tensor, containing the + pairwise quality between M ground-truth elements and N predicted + elements. All elements must be >= 0 (due to the us of `torch.nonzero` + for selecting indices in :meth:`set_low_quality_matches_`). + + Returns: + matches (Tensor[int64]): a vector of length N, where matches[i] is a matched + ground-truth index in [0, M) + match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates + whether a prediction is a true or false positive or ignored + """ + assert match_quality_matrix.dim() == 2 + if match_quality_matrix.numel() == 0: + default_matches = match_quality_matrix.new_full( + (match_quality_matrix.size(1),), 0, dtype=torch.int64 + ) + # When no gt boxes exist, we define IOU = 0 and therefore set labels + # to `self.labels[0]`, which usually defaults to background class 0 + # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds + default_match_labels = match_quality_matrix.new_full( + (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 + ) + return default_matches, default_match_labels + + assert torch.all(match_quality_matrix >= 0) + + # match_quality_matrix is M (gt) x N (predicted) + # Max over gt elements (dim 0) to find best gt candidate for each prediction + matched_vals, matches = match_quality_matrix.max(dim=0) + + match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) + + for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): + low_high = (matched_vals >= low) & (matched_vals < high) + match_labels[low_high] = l + + if self.allow_low_quality_matches: + self.set_low_quality_matches_(match_labels, match_quality_matrix) + + return matches, match_labels + + def set_low_quality_matches_(self, match_labels, match_quality_matrix): + """ + Produce additional matches for predictions that have only low-quality matches. + Specifically, for each ground-truth G find the set of predictions that have + maximum overlap with it (including ties); for each prediction in that set, if + it is unmatched, then match it to the ground-truth G. + + This function implements the RPN assignment case (i) in Sec. 3.1.2 of + :paper:`Faster R-CNN`. + """ + # For each gt, find the prediction with which it has highest quality + highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) + # Find the highest quality match available, even if it is low, including ties. + # Note that the matches qualities must be positive due to the use of + # `torch.nonzero`. + _, pred_inds_with_highest_quality = nonzero_tuple( + match_quality_matrix == highest_quality_foreach_gt[:, None] + ) + # If an anchor was labeled positive only due to a low-quality match + # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. + # This follows the implementation in Detectron, and is found to have no significant impact. + match_labels[pred_inds_with_highest_quality] = 1 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6b0668157052ce7b796ef50bc7ee85361e7605b9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +from .build import META_ARCH_REGISTRY, build_model # isort:skip + +from .panoptic_fpn import PanopticFPN + +# import all the meta_arch, so they will be registered +from .rcnn import GeneralizedRCNN, ProposalNetwork +from .dense_detector import DenseDetector +from .retinanet import RetinaNet +from .fcos import FCOS +from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head + + +__all__ = list(globals().keys()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/build.py new file mode 100644 index 0000000000000000000000000000000000000000..52229b11acf4a8f07c173feb51c45c30e9567903 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/build.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch + +from annotator.oneformer.detectron2.utils.logger import _log_api_usage +from annotator.oneformer.detectron2.utils.registry import Registry + +META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip +META_ARCH_REGISTRY.__doc__ = """ +Registry for meta-architectures, i.e. the whole model. + +The registered object will be called with `obj(cfg)` +and expected to return a `nn.Module` object. +""" + + +def build_model(cfg): + """ + Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. + Note that it does not load any weights from ``cfg``. + """ + meta_arch = cfg.MODEL.META_ARCHITECTURE + model = META_ARCH_REGISTRY.get(meta_arch)(cfg) + _log_api_usage("modeling.meta_arch." + meta_arch) + return model diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/dense_detector.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/dense_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..461c370fe9e5fab5c634b029d5176cf4dc68de2f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/dense_detector.py @@ -0,0 +1,294 @@ +import numpy as np +from typing import Dict, List, Optional, Tuple +import torch +from torch import Tensor, nn + +from annotator.oneformer.detectron2.data.detection_utils import convert_image_to_rgb +from annotator.oneformer.detectron2.layers import move_device_like +from annotator.oneformer.detectron2.modeling import Backbone +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from ..postprocessing import detector_postprocess + + +def permute_to_N_HWA_K(tensor, K: int): + """ + Transpose/reshape a tensor from (N, (Ai x K), H, W) to (N, (HxWxAi), K) + """ + assert tensor.dim() == 4, tensor.shape + N, _, H, W = tensor.shape + tensor = tensor.view(N, -1, K, H, W) + tensor = tensor.permute(0, 3, 4, 1, 2) + tensor = tensor.reshape(N, -1, K) # Size=(N,HWA,K) + return tensor + + +class DenseDetector(nn.Module): + """ + Base class for dense detector. We define a dense detector as a fully-convolutional model that + makes per-pixel (i.e. dense) predictions. + """ + + def __init__( + self, + backbone: Backbone, + head: nn.Module, + head_in_features: Optional[List[str]] = None, + *, + pixel_mean, + pixel_std, + ): + """ + Args: + backbone: backbone module + head: head module + head_in_features: backbone features to use in head. Default to all backbone features. + pixel_mean (Tuple[float]): + Values to be used for image normalization (BGR order). + To train on images of different number of channels, set different mean & std. + Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] + pixel_std (Tuple[float]): + When using pre-trained models in Detectron1 or any MSRA models, + std has been absorbed into its conv1 weights, so the std needs to be set 1. + Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) + """ + super().__init__() + + self.backbone = backbone + self.head = head + if head_in_features is None: + shapes = self.backbone.output_shape() + self.head_in_features = sorted(shapes.keys(), key=lambda x: shapes[x].stride) + else: + self.head_in_features = head_in_features + self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) + + @property + def device(self): + return self.pixel_mean.device + + def _move_to_current_device(self, x): + return move_device_like(x, self.pixel_mean) + + def forward(self, batched_inputs: List[Dict[str, Tensor]]): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper` . + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + + * image: Tensor, image in (C, H, W) format. + * instances: Instances + + Other information that's included in the original dicts, such as: + + * "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + + Returns: + In training, dict[str, Tensor]: mapping from a named loss to a tensor storing the + loss. Used during training only. In inference, the standard output format, described + in :doc:`/tutorials/models`. + """ + images = self.preprocess_image(batched_inputs) + features = self.backbone(images.tensor) + features = [features[f] for f in self.head_in_features] + predictions = self.head(features) + + if self.training: + assert not torch.jit.is_scripting(), "Not supported" + assert "instances" in batched_inputs[0], "Instance annotations are missing in training!" + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + return self.forward_training(images, features, predictions, gt_instances) + else: + results = self.forward_inference(images, features, predictions) + if torch.jit.is_scripting(): + return results + + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + results, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"instances": r}) + return processed_results + + def forward_training(self, images, features, predictions, gt_instances): + raise NotImplementedError() + + def preprocess_image(self, batched_inputs: List[Dict[str, Tensor]]): + """ + Normalize, pad and batch the input images. + """ + images = [self._move_to_current_device(x["image"]) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors( + images, + self.backbone.size_divisibility, + padding_constraints=self.backbone.padding_constraints, + ) + return images + + def _transpose_dense_predictions( + self, predictions: List[List[Tensor]], dims_per_anchor: List[int] + ) -> List[List[Tensor]]: + """ + Transpose the dense per-level predictions. + + Args: + predictions: a list of outputs, each is a list of per-level + predictions with shape (N, Ai x K, Hi, Wi), where N is the + number of images, Ai is the number of anchors per location on + level i, K is the dimension of predictions per anchor. + dims_per_anchor: the value of K for each predictions. e.g. 4 for + box prediction, #classes for classification prediction. + + Returns: + List[List[Tensor]]: each prediction is transposed to (N, Hi x Wi x Ai, K). + """ + assert len(predictions) == len(dims_per_anchor) + res: List[List[Tensor]] = [] + for pred, dim_per_anchor in zip(predictions, dims_per_anchor): + pred = [permute_to_N_HWA_K(x, dim_per_anchor) for x in pred] + res.append(pred) + return res + + def _ema_update(self, name: str, value: float, initial_value: float, momentum: float = 0.9): + """ + Apply EMA update to `self.name` using `value`. + + This is mainly used for loss normalizer. In Detectron1, loss is normalized by number + of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a + large variance and using it lead to lower performance. Therefore we maintain an EMA of + #foreground to stabilize the normalizer. + + Args: + name: name of the normalizer + value: the new value to update + initial_value: the initial value to start with + momentum: momentum of EMA + + Returns: + float: the updated EMA value + """ + if hasattr(self, name): + old = getattr(self, name) + else: + old = initial_value + new = old * momentum + value * (1 - momentum) + setattr(self, name, new) + return new + + def _decode_per_level_predictions( + self, + anchors: Boxes, + pred_scores: Tensor, + pred_deltas: Tensor, + score_thresh: float, + topk_candidates: int, + image_size: Tuple[int, int], + ) -> Instances: + """ + Decode boxes and classification predictions of one featuer level, by + the following steps: + 1. filter the predictions based on score threshold and top K scores. + 2. transform the box regression outputs + 3. return the predicted scores, classes and boxes + + Args: + anchors: Boxes, anchor for this feature level + pred_scores: HxWxA,K + pred_deltas: HxWxA,4 + + Returns: + Instances: with field "scores", "pred_boxes", "pred_classes". + """ + # Apply two filtering to make NMS faster. + # 1. Keep boxes with confidence score higher than threshold + keep_idxs = pred_scores > score_thresh + pred_scores = pred_scores[keep_idxs] + topk_idxs = torch.nonzero(keep_idxs) # Kx2 + + # 2. Keep top k top scoring boxes only + topk_idxs_size = topk_idxs.shape[0] + if isinstance(topk_idxs_size, Tensor): + # It's a tensor in tracing + num_topk = torch.clamp(topk_idxs_size, max=topk_candidates) + else: + num_topk = min(topk_idxs_size, topk_candidates) + pred_scores, idxs = pred_scores.topk(num_topk) + topk_idxs = topk_idxs[idxs] + + anchor_idxs, classes_idxs = topk_idxs.unbind(dim=1) + + pred_boxes = self.box2box_transform.apply_deltas( + pred_deltas[anchor_idxs], anchors.tensor[anchor_idxs] + ) + return Instances( + image_size, pred_boxes=Boxes(pred_boxes), scores=pred_scores, pred_classes=classes_idxs + ) + + def _decode_multi_level_predictions( + self, + anchors: List[Boxes], + pred_scores: List[Tensor], + pred_deltas: List[Tensor], + score_thresh: float, + topk_candidates: int, + image_size: Tuple[int, int], + ) -> Instances: + """ + Run `_decode_per_level_predictions` for all feature levels and concat the results. + """ + predictions = [ + self._decode_per_level_predictions( + anchors_i, + box_cls_i, + box_reg_i, + self.test_score_thresh, + self.test_topk_candidates, + image_size, + ) + # Iterate over every feature level + for box_cls_i, box_reg_i, anchors_i in zip(pred_scores, pred_deltas, anchors) + ] + return predictions[0].cat(predictions) # 'Instances.cat' is not scriptale but this is + + def visualize_training(self, batched_inputs, results): + """ + A function used to visualize ground truth images and final network predictions. + It shows ground truth bounding boxes on the original image and up to 20 + predicted object bounding boxes on the original image. + + Args: + batched_inputs (list): a list that contains input to the model. + results (List[Instances]): a list of #images elements returned by forward_inference(). + """ + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + + assert len(batched_inputs) == len( + results + ), "Cannot visualize inputs and results of different sizes" + storage = get_event_storage() + max_boxes = 20 + + image_index = 0 # only visualize a single image + img = batched_inputs[image_index]["image"] + img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) + v_gt = Visualizer(img, None) + v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes) + anno_img = v_gt.get_image() + processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) + predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy() + + v_pred = Visualizer(img, None) + v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) + prop_img = v_pred.get_image() + vis_img = np.vstack((anno_img, prop_img)) + vis_img = vis_img.transpose(2, 0, 1) + vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" + storage.put_image(vis_name, vis_img) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/fcos.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/fcos.py new file mode 100644 index 0000000000000000000000000000000000000000..150726a459b99c1aa26213043b8e609213218201 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/fcos.py @@ -0,0 +1,328 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +from typing import List, Optional, Tuple +import torch +from fvcore.nn import sigmoid_focal_loss_jit +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.layers import ShapeSpec, batched_nms +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, pairwise_point_box_distance +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from ..anchor_generator import DefaultAnchorGenerator +from ..backbone import Backbone +from ..box_regression import Box2BoxTransformLinear, _dense_box_regression_loss +from .dense_detector import DenseDetector +from .retinanet import RetinaNetHead + +__all__ = ["FCOS"] + +logger = logging.getLogger(__name__) + + +class FCOS(DenseDetector): + """ + Implement FCOS in :paper:`fcos`. + """ + + def __init__( + self, + *, + backbone: Backbone, + head: nn.Module, + head_in_features: Optional[List[str]] = None, + box2box_transform=None, + num_classes, + center_sampling_radius: float = 1.5, + focal_loss_alpha=0.25, + focal_loss_gamma=2.0, + test_score_thresh=0.2, + test_topk_candidates=1000, + test_nms_thresh=0.6, + max_detections_per_image=100, + pixel_mean, + pixel_std, + ): + """ + Args: + center_sampling_radius: radius of the "center" of a groundtruth box, + within which all anchor points are labeled positive. + Other arguments mean the same as in :class:`RetinaNet`. + """ + super().__init__( + backbone, head, head_in_features, pixel_mean=pixel_mean, pixel_std=pixel_std + ) + + self.num_classes = num_classes + + # FCOS uses one anchor point per location. + # We represent the anchor point by a box whose size equals the anchor stride. + feature_shapes = backbone.output_shape() + fpn_strides = [feature_shapes[k].stride for k in self.head_in_features] + self.anchor_generator = DefaultAnchorGenerator( + sizes=[[k] for k in fpn_strides], aspect_ratios=[1.0], strides=fpn_strides + ) + + # FCOS parameterizes box regression by a linear transform, + # where predictions are normalized by anchor stride (equal to anchor size). + if box2box_transform is None: + box2box_transform = Box2BoxTransformLinear(normalize_by_size=True) + self.box2box_transform = box2box_transform + + self.center_sampling_radius = float(center_sampling_radius) + + # Loss parameters: + self.focal_loss_alpha = focal_loss_alpha + self.focal_loss_gamma = focal_loss_gamma + + # Inference parameters: + self.test_score_thresh = test_score_thresh + self.test_topk_candidates = test_topk_candidates + self.test_nms_thresh = test_nms_thresh + self.max_detections_per_image = max_detections_per_image + + def forward_training(self, images, features, predictions, gt_instances): + # Transpose the Hi*Wi*A dimension to the middle: + pred_logits, pred_anchor_deltas, pred_centerness = self._transpose_dense_predictions( + predictions, [self.num_classes, 4, 1] + ) + anchors = self.anchor_generator(features) + gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) + return self.losses( + anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes, pred_centerness + ) + + @torch.no_grad() + def _match_anchors(self, gt_boxes: Boxes, anchors: List[Boxes]): + """ + Match ground-truth boxes to a set of multi-level anchors. + + Args: + gt_boxes: Ground-truth boxes from instances of an image. + anchors: List of anchors for each feature map (of different scales). + + Returns: + torch.Tensor + A tensor of shape `(M, R)`, given `M` ground-truth boxes and total + `R` anchor points from all feature levels, indicating the quality + of match between m-th box and r-th anchor. Higher value indicates + better match. + """ + # Naming convention: (M = ground-truth boxes, R = anchor points) + # Anchor points are represented as square boxes of size = stride. + num_anchors_per_level = [len(x) for x in anchors] + anchors = Boxes.cat(anchors) # (R, 4) + anchor_centers = anchors.get_centers() # (R, 2) + anchor_sizes = anchors.tensor[:, 2] - anchors.tensor[:, 0] # (R, ) + + lower_bound = anchor_sizes * 4 + lower_bound[: num_anchors_per_level[0]] = 0 + upper_bound = anchor_sizes * 8 + upper_bound[-num_anchors_per_level[-1] :] = float("inf") + + gt_centers = gt_boxes.get_centers() + + # FCOS with center sampling: anchor point must be close enough to + # ground-truth box center. + center_dists = (anchor_centers[None, :, :] - gt_centers[:, None, :]).abs_() + sampling_regions = self.center_sampling_radius * anchor_sizes[None, :] + + match_quality_matrix = center_dists.max(dim=2).values < sampling_regions + + pairwise_dist = pairwise_point_box_distance(anchor_centers, gt_boxes) + pairwise_dist = pairwise_dist.permute(1, 0, 2) # (M, R, 4) + + # The original FCOS anchor matching rule: anchor point must be inside GT. + match_quality_matrix &= pairwise_dist.min(dim=2).values > 0 + + # Multilevel anchor matching in FCOS: each anchor is only responsible + # for certain scale range. + pairwise_dist = pairwise_dist.max(dim=2).values + match_quality_matrix &= (pairwise_dist > lower_bound[None, :]) & ( + pairwise_dist < upper_bound[None, :] + ) + # Match the GT box with minimum area, if there are multiple GT matches. + gt_areas = gt_boxes.area() # (M, ) + + match_quality_matrix = match_quality_matrix.to(torch.float32) + match_quality_matrix *= 1e8 - gt_areas[:, None] + return match_quality_matrix # (M, R) + + @torch.no_grad() + def label_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]): + """ + Same interface as :meth:`RetinaNet.label_anchors`, but implemented with FCOS + anchor matching rule. + + Unlike RetinaNet, there are no ignored anchors. + """ + + gt_labels, matched_gt_boxes = [], [] + + for inst in gt_instances: + if len(inst) > 0: + match_quality_matrix = self._match_anchors(inst.gt_boxes, anchors) + + # Find matched ground-truth box per anchor. Un-matched anchors are + # assigned -1. This is equivalent to using an anchor matcher as used + # in R-CNN/RetinaNet: `Matcher(thresholds=[1e-5], labels=[0, 1])` + match_quality, matched_idxs = match_quality_matrix.max(dim=0) + matched_idxs[match_quality < 1e-5] = -1 + + matched_gt_boxes_i = inst.gt_boxes.tensor[matched_idxs.clip(min=0)] + gt_labels_i = inst.gt_classes[matched_idxs.clip(min=0)] + + # Anchors with matched_idxs = -1 are labeled background. + gt_labels_i[matched_idxs < 0] = self.num_classes + else: + matched_gt_boxes_i = torch.zeros_like(Boxes.cat(anchors).tensor) + gt_labels_i = torch.full( + (len(matched_gt_boxes_i),), + fill_value=self.num_classes, + dtype=torch.long, + device=matched_gt_boxes_i.device, + ) + + gt_labels.append(gt_labels_i) + matched_gt_boxes.append(matched_gt_boxes_i) + + return gt_labels, matched_gt_boxes + + def losses( + self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes, pred_centerness + ): + """ + This method is almost identical to :meth:`RetinaNet.losses`, with an extra + "loss_centerness" in the returned dict. + """ + num_images = len(gt_labels) + gt_labels = torch.stack(gt_labels) # (M, R) + + pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes) + num_pos_anchors = pos_mask.sum().item() + get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images) + normalizer = self._ema_update("loss_normalizer", max(num_pos_anchors, 1), 300) + + # classification and regression loss + gt_labels_target = F.one_hot(gt_labels, num_classes=self.num_classes + 1)[ + :, :, :-1 + ] # no loss for the last (background) class + loss_cls = sigmoid_focal_loss_jit( + torch.cat(pred_logits, dim=1), + gt_labels_target.to(pred_logits[0].dtype), + alpha=self.focal_loss_alpha, + gamma=self.focal_loss_gamma, + reduction="sum", + ) + + loss_box_reg = _dense_box_regression_loss( + anchors, + self.box2box_transform, + pred_anchor_deltas, + gt_boxes, + pos_mask, + box_reg_loss_type="giou", + ) + + ctrness_targets = self.compute_ctrness_targets(anchors, gt_boxes) # (M, R) + pred_centerness = torch.cat(pred_centerness, dim=1).squeeze(dim=2) # (M, R) + ctrness_loss = F.binary_cross_entropy_with_logits( + pred_centerness[pos_mask], ctrness_targets[pos_mask], reduction="sum" + ) + return { + "loss_fcos_cls": loss_cls / normalizer, + "loss_fcos_loc": loss_box_reg / normalizer, + "loss_fcos_ctr": ctrness_loss / normalizer, + } + + def compute_ctrness_targets(self, anchors: List[Boxes], gt_boxes: List[torch.Tensor]): + anchors = Boxes.cat(anchors).tensor # Rx4 + reg_targets = [self.box2box_transform.get_deltas(anchors, m) for m in gt_boxes] + reg_targets = torch.stack(reg_targets, dim=0) # NxRx4 + if len(reg_targets) == 0: + return reg_targets.new_zeros(len(reg_targets)) + left_right = reg_targets[:, :, [0, 2]] + top_bottom = reg_targets[:, :, [1, 3]] + ctrness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * ( + top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0] + ) + return torch.sqrt(ctrness) + + def forward_inference( + self, + images: ImageList, + features: List[torch.Tensor], + predictions: List[List[torch.Tensor]], + ): + pred_logits, pred_anchor_deltas, pred_centerness = self._transpose_dense_predictions( + predictions, [self.num_classes, 4, 1] + ) + anchors = self.anchor_generator(features) + + results: List[Instances] = [] + for img_idx, image_size in enumerate(images.image_sizes): + scores_per_image = [ + # Multiply and sqrt centerness & classification scores + # (See eqn. 4 in https://arxiv.org/abs/2006.09214) + torch.sqrt(x[img_idx].sigmoid_() * y[img_idx].sigmoid_()) + for x, y in zip(pred_logits, pred_centerness) + ] + deltas_per_image = [x[img_idx] for x in pred_anchor_deltas] + results_per_image = self.inference_single_image( + anchors, scores_per_image, deltas_per_image, image_size + ) + results.append(results_per_image) + return results + + def inference_single_image( + self, + anchors: List[Boxes], + box_cls: List[torch.Tensor], + box_delta: List[torch.Tensor], + image_size: Tuple[int, int], + ): + """ + Identical to :meth:`RetinaNet.inference_single_image. + """ + pred = self._decode_multi_level_predictions( + anchors, + box_cls, + box_delta, + self.test_score_thresh, + self.test_topk_candidates, + image_size, + ) + keep = batched_nms( + pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh + ) + return pred[keep[: self.max_detections_per_image]] + + +class FCOSHead(RetinaNetHead): + """ + The head used in :paper:`fcos`. It adds an additional centerness + prediction branch on top of :class:`RetinaNetHead`. + """ + + def __init__(self, *, input_shape: List[ShapeSpec], conv_dims: List[int], **kwargs): + super().__init__(input_shape=input_shape, conv_dims=conv_dims, num_anchors=1, **kwargs) + # Unlike original FCOS, we do not add an additional learnable scale layer + # because it's found to have no benefits after normalizing regression targets by stride. + self._num_features = len(input_shape) + self.ctrness = nn.Conv2d(conv_dims[-1], 1, kernel_size=3, stride=1, padding=1) + torch.nn.init.normal_(self.ctrness.weight, std=0.01) + torch.nn.init.constant_(self.ctrness.bias, 0) + + def forward(self, features): + assert len(features) == self._num_features + logits = [] + bbox_reg = [] + ctrness = [] + for feature in features: + logits.append(self.cls_score(self.cls_subnet(feature))) + bbox_feature = self.bbox_subnet(feature) + bbox_reg.append(self.bbox_pred(bbox_feature)) + ctrness.append(self.ctrness(bbox_feature)) + return logits, bbox_reg, ctrness diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/panoptic_fpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/panoptic_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..1ca5f19a0ce0099a49aad8bb6b659355c4f6e200 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/panoptic_fpn.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +from typing import Dict, List +import torch +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.structures import ImageList + +from ..postprocessing import detector_postprocess, sem_seg_postprocess +from .build import META_ARCH_REGISTRY +from .rcnn import GeneralizedRCNN +from .semantic_seg import build_sem_seg_head + +__all__ = ["PanopticFPN"] + + +@META_ARCH_REGISTRY.register() +class PanopticFPN(GeneralizedRCNN): + """ + Implement the paper :paper:`PanopticFPN`. + """ + + @configurable + def __init__( + self, + *, + sem_seg_head: nn.Module, + combine_overlap_thresh: float = 0.5, + combine_stuff_area_thresh: float = 4096, + combine_instances_score_thresh: float = 0.5, + **kwargs, + ): + """ + NOTE: this interface is experimental. + + Args: + sem_seg_head: a module for the semantic segmentation head. + combine_overlap_thresh: combine masks into one instances if + they have enough overlap + combine_stuff_area_thresh: ignore stuff areas smaller than this threshold + combine_instances_score_thresh: ignore instances whose score is + smaller than this threshold + + Other arguments are the same as :class:`GeneralizedRCNN`. + """ + super().__init__(**kwargs) + self.sem_seg_head = sem_seg_head + # options when combining instance & semantic outputs + self.combine_overlap_thresh = combine_overlap_thresh + self.combine_stuff_area_thresh = combine_stuff_area_thresh + self.combine_instances_score_thresh = combine_instances_score_thresh + + @classmethod + def from_config(cls, cfg): + ret = super().from_config(cfg) + ret.update( + { + "combine_overlap_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH, + "combine_stuff_area_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT, + "combine_instances_score_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH, # noqa + } + ) + ret["sem_seg_head"] = build_sem_seg_head(cfg, ret["backbone"].output_shape()) + logger = logging.getLogger(__name__) + if not cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED: + logger.warning( + "PANOPTIC_FPN.COMBINED.ENABLED is no longer used. " + " model.inference(do_postprocess=) should be used to toggle postprocessing." + ) + if cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT != 1.0: + w = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT + logger.warning( + "PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT should be replaced by weights on each ROI head." + ) + + def update_weight(x): + if isinstance(x, dict): + return {k: v * w for k, v in x.items()} + else: + return x * w + + roi_heads = ret["roi_heads"] + roi_heads.box_predictor.loss_weight = update_weight(roi_heads.box_predictor.loss_weight) + roi_heads.mask_head.loss_weight = update_weight(roi_heads.mask_head.loss_weight) + return ret + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + + For now, each item in the list is a dict that contains: + + * "image": Tensor, image in (C, H, W) format. + * "instances": Instances + * "sem_seg": semantic segmentation ground truth. + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + + Returns: + list[dict]: + each dict has the results for one image. The dict contains the following keys: + + * "instances": see :meth:`GeneralizedRCNN.forward` for its format. + * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. + * "panoptic_seg": See the return value of + :func:`combine_semantic_and_instance_outputs` for its format. + """ + if not self.training: + return self.inference(batched_inputs) + images = self.preprocess_image(batched_inputs) + features = self.backbone(images.tensor) + + assert "sem_seg" in batched_inputs[0] + gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] + gt_sem_seg = ImageList.from_tensors( + gt_sem_seg, + self.backbone.size_divisibility, + self.sem_seg_head.ignore_value, + self.backbone.padding_constraints, + ).tensor + sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg) + + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + detector_results, detector_losses = self.roi_heads( + images, features, proposals, gt_instances + ) + + losses = sem_seg_losses + losses.update(proposal_losses) + losses.update(detector_losses) + return losses + + def inference(self, batched_inputs: List[Dict[str, torch.Tensor]], do_postprocess: bool = True): + """ + Run inference on the given inputs. + + Args: + batched_inputs (list[dict]): same as in :meth:`forward` + do_postprocess (bool): whether to apply post-processing on the outputs. + + Returns: + When do_postprocess=True, see docs in :meth:`forward`. + Otherwise, returns a (list[Instances], list[Tensor]) that contains + the raw detector outputs, and raw semantic segmentation outputs. + """ + images = self.preprocess_image(batched_inputs) + features = self.backbone(images.tensor) + sem_seg_results, sem_seg_losses = self.sem_seg_head(features, None) + proposals, _ = self.proposal_generator(images, features, None) + detector_results, _ = self.roi_heads(images, features, proposals, None) + + if do_postprocess: + processed_results = [] + for sem_seg_result, detector_result, input_per_image, image_size in zip( + sem_seg_results, detector_results, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) + detector_r = detector_postprocess(detector_result, height, width) + + processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) + + panoptic_r = combine_semantic_and_instance_outputs( + detector_r, + sem_seg_r.argmax(dim=0), + self.combine_overlap_thresh, + self.combine_stuff_area_thresh, + self.combine_instances_score_thresh, + ) + processed_results[-1]["panoptic_seg"] = panoptic_r + return processed_results + else: + return detector_results, sem_seg_results + + +def combine_semantic_and_instance_outputs( + instance_results, + semantic_results, + overlap_threshold, + stuff_area_thresh, + instances_score_thresh, +): + """ + Implement a simple combining logic following + "combine_semantic_and_instance_predictions.py" in panopticapi + to produce panoptic segmentation outputs. + + Args: + instance_results: output of :func:`detector_postprocess`. + semantic_results: an (H, W) tensor, each element is the contiguous semantic + category id + + Returns: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. + segments_info (list[dict]): Describe each segment in `panoptic_seg`. + Each dict contains keys "id", "category_id", "isthing". + """ + panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32) + + # sort instance outputs by scores + sorted_inds = torch.argsort(-instance_results.scores) + + current_segment_id = 0 + segments_info = [] + + instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device) + + # Add instances one-by-one, check for overlaps with existing ones + for inst_id in sorted_inds: + score = instance_results.scores[inst_id].item() + if score < instances_score_thresh: + break + mask = instance_masks[inst_id] # H,W + mask_area = mask.sum().item() + + if mask_area == 0: + continue + + intersect = (mask > 0) & (panoptic_seg > 0) + intersect_area = intersect.sum().item() + + if intersect_area * 1.0 / mask_area > overlap_threshold: + continue + + if intersect_area > 0: + mask = mask & (panoptic_seg == 0) + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + segments_info.append( + { + "id": current_segment_id, + "isthing": True, + "score": score, + "category_id": instance_results.pred_classes[inst_id].item(), + "instance_id": inst_id.item(), + } + ) + + # Add semantic results to remaining empty areas + semantic_labels = torch.unique(semantic_results).cpu().tolist() + for semantic_label in semantic_labels: + if semantic_label == 0: # 0 is a special "thing" class + continue + mask = (semantic_results == semantic_label) & (panoptic_seg == 0) + mask_area = mask.sum().item() + if mask_area < stuff_area_thresh: + continue + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + segments_info.append( + { + "id": current_segment_id, + "isthing": False, + "category_id": semantic_label, + "area": mask_area, + } + ) + + return panoptic_seg, segments_info diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/rcnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7cacf065ed2803686f80c8e6f562ebfeb5d584d5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/rcnn.py @@ -0,0 +1,341 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +from typing import Dict, List, Optional, Tuple +import torch +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data.detection_utils import convert_image_to_rgb +from annotator.oneformer.detectron2.layers import move_device_like +from annotator.oneformer.detectron2.structures import ImageList, Instances +from annotator.oneformer.detectron2.utils.events import get_event_storage +from annotator.oneformer.detectron2.utils.logger import log_first_n + +from ..backbone import Backbone, build_backbone +from ..postprocessing import detector_postprocess +from ..proposal_generator import build_proposal_generator +from ..roi_heads import build_roi_heads +from .build import META_ARCH_REGISTRY + +__all__ = ["GeneralizedRCNN", "ProposalNetwork"] + + +@META_ARCH_REGISTRY.register() +class GeneralizedRCNN(nn.Module): + """ + Generalized R-CNN. Any models that contains the following three components: + 1. Per-image feature extraction (aka backbone) + 2. Region proposal generation + 3. Per-region feature extraction and prediction + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + proposal_generator: nn.Module, + roi_heads: nn.Module, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + input_format: Optional[str] = None, + vis_period: int = 0, + ): + """ + Args: + backbone: a backbone module, must follow detectron2's backbone interface + proposal_generator: a module that generates proposals using backbone features + roi_heads: a ROI head that performs per-region computation + pixel_mean, pixel_std: list or tuple with #channels element, representing + the per-channel mean and std to be used to normalize the input image + input_format: describe the meaning of channels of input. Needed by visualization + vis_period: the period to run visualization. Set to 0 to disable. + """ + super().__init__() + self.backbone = backbone + self.proposal_generator = proposal_generator + self.roi_heads = roi_heads + + self.input_format = input_format + self.vis_period = vis_period + if vis_period > 0: + assert input_format is not None, "input_format is required for visualization!" + + self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) + assert ( + self.pixel_mean.shape == self.pixel_std.shape + ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + return { + "backbone": backbone, + "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), + "roi_heads": build_roi_heads(cfg, backbone.output_shape()), + "input_format": cfg.INPUT.FORMAT, + "vis_period": cfg.VIS_PERIOD, + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + } + + @property + def device(self): + return self.pixel_mean.device + + def _move_to_current_device(self, x): + return move_device_like(x, self.pixel_mean) + + def visualize_training(self, batched_inputs, proposals): + """ + A function used to visualize images and proposals. It shows ground truth + bounding boxes on the original image and up to 20 top-scoring predicted + object proposals on the original image. Users can implement different + visualization functions for different models. + + Args: + batched_inputs (list): a list that contains input to the model. + proposals (list): a list that contains predicted proposals. Both + batched_inputs and proposals should have the same length. + """ + from annotator.oneformer.detectron2.utils.visualizer import Visualizer + + storage = get_event_storage() + max_vis_prop = 20 + + for input, prop in zip(batched_inputs, proposals): + img = input["image"] + img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) + v_gt = Visualizer(img, None) + v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) + anno_img = v_gt.get_image() + box_size = min(len(prop.proposal_boxes), max_vis_prop) + v_pred = Visualizer(img, None) + v_pred = v_pred.overlay_instances( + boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() + ) + prop_img = v_pred.get_image() + vis_img = np.concatenate((anno_img, prop_img), axis=1) + vis_img = vis_img.transpose(2, 0, 1) + vis_name = "Left: GT bounding boxes; Right: Predicted proposals" + storage.put_image(vis_name, vis_img) + break # only visualize one image in a batch + + def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper` . + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + + * image: Tensor, image in (C, H, W) format. + * instances (optional): groundtruth :class:`Instances` + * proposals (optional): :class:`Instances`, precomputed proposals. + + Other information that's included in the original dicts, such as: + + * "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "instances" whose value is a :class:`Instances`. + The :class:`Instances` object has the following keys: + "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" + """ + if not self.training: + return self.inference(batched_inputs) + + images = self.preprocess_image(batched_inputs) + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + else: + gt_instances = None + + features = self.backbone(images.tensor) + + if self.proposal_generator is not None: + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + else: + assert "proposals" in batched_inputs[0] + proposals = [x["proposals"].to(self.device) for x in batched_inputs] + proposal_losses = {} + + _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) + if self.vis_period > 0: + storage = get_event_storage() + if storage.iter % self.vis_period == 0: + self.visualize_training(batched_inputs, proposals) + + losses = {} + losses.update(detector_losses) + losses.update(proposal_losses) + return losses + + def inference( + self, + batched_inputs: List[Dict[str, torch.Tensor]], + detected_instances: Optional[List[Instances]] = None, + do_postprocess: bool = True, + ): + """ + Run inference on the given inputs. + + Args: + batched_inputs (list[dict]): same as in :meth:`forward` + detected_instances (None or list[Instances]): if not None, it + contains an `Instances` object per image. The `Instances` + object contains "pred_boxes" and "pred_classes" which are + known boxes in the image. + The inference will then skip the detection of bounding boxes, + and only predict other per-ROI outputs. + do_postprocess (bool): whether to apply post-processing on the outputs. + + Returns: + When do_postprocess=True, same as in :meth:`forward`. + Otherwise, a list[Instances] containing raw network outputs. + """ + assert not self.training + + images = self.preprocess_image(batched_inputs) + features = self.backbone(images.tensor) + + if detected_instances is None: + if self.proposal_generator is not None: + proposals, _ = self.proposal_generator(images, features, None) + else: + assert "proposals" in batched_inputs[0] + proposals = [x["proposals"].to(self.device) for x in batched_inputs] + + results, _ = self.roi_heads(images, features, proposals, None) + else: + detected_instances = [x.to(self.device) for x in detected_instances] + results = self.roi_heads.forward_with_given_boxes(features, detected_instances) + + if do_postprocess: + assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess." + return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes) + return results + + def preprocess_image(self, batched_inputs: List[Dict[str, torch.Tensor]]): + """ + Normalize, pad and batch the input images. + """ + images = [self._move_to_current_device(x["image"]) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors( + images, + self.backbone.size_divisibility, + padding_constraints=self.backbone.padding_constraints, + ) + return images + + @staticmethod + def _postprocess(instances, batched_inputs: List[Dict[str, torch.Tensor]], image_sizes): + """ + Rescale the output instances to the target size. + """ + # note: private function; subject to changes + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + instances, batched_inputs, image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"instances": r}) + return processed_results + + +@META_ARCH_REGISTRY.register() +class ProposalNetwork(nn.Module): + """ + A meta architecture that only predicts object proposals. + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + proposal_generator: nn.Module, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + ): + """ + Args: + backbone: a backbone module, must follow detectron2's backbone interface + proposal_generator: a module that generates proposals using backbone features + pixel_mean, pixel_std: list or tuple with #channels element, representing + the per-channel mean and std to be used to normalize the input image + """ + super().__init__() + self.backbone = backbone + self.proposal_generator = proposal_generator + self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + return { + "backbone": backbone, + "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + } + + @property + def device(self): + return self.pixel_mean.device + + def _move_to_current_device(self, x): + return move_device_like(x, self.pixel_mean) + + def forward(self, batched_inputs): + """ + Args: + Same as in :class:`GeneralizedRCNN.forward` + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "proposals" whose value is a + :class:`Instances` with keys "proposal_boxes" and "objectness_logits". + """ + images = [self._move_to_current_device(x["image"]) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors( + images, + self.backbone.size_divisibility, + padding_constraints=self.backbone.padding_constraints, + ) + features = self.backbone(images.tensor) + + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + elif "targets" in batched_inputs[0]: + log_first_n( + logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 + ) + gt_instances = [x["targets"].to(self.device) for x in batched_inputs] + else: + gt_instances = None + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + # In training, the proposals are not useful at all but we generate them anyway. + # This makes RPN-only models about 5% slower. + if self.training: + return proposal_losses + + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + proposals, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"proposals": r}) + return processed_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/retinanet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/retinanet.py new file mode 100644 index 0000000000000000000000000000000000000000..46e0fda48254f2d1e6b8c796e00467df669e4216 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/retinanet.py @@ -0,0 +1,439 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import math +from typing import List, Tuple +import torch +from fvcore.nn import sigmoid_focal_loss_jit +from torch import Tensor, nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import CycleBatchNormList, ShapeSpec, batched_nms, cat, get_norm +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, pairwise_iou +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from ..anchor_generator import build_anchor_generator +from ..backbone import Backbone, build_backbone +from ..box_regression import Box2BoxTransform, _dense_box_regression_loss +from ..matcher import Matcher +from .build import META_ARCH_REGISTRY +from .dense_detector import DenseDetector, permute_to_N_HWA_K # noqa + +__all__ = ["RetinaNet"] + + +logger = logging.getLogger(__name__) + + +@META_ARCH_REGISTRY.register() +class RetinaNet(DenseDetector): + """ + Implement RetinaNet in :paper:`RetinaNet`. + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + head: nn.Module, + head_in_features, + anchor_generator, + box2box_transform, + anchor_matcher, + num_classes, + focal_loss_alpha=0.25, + focal_loss_gamma=2.0, + smooth_l1_beta=0.0, + box_reg_loss_type="smooth_l1", + test_score_thresh=0.05, + test_topk_candidates=1000, + test_nms_thresh=0.5, + max_detections_per_image=100, + pixel_mean, + pixel_std, + vis_period=0, + input_format="BGR", + ): + """ + NOTE: this interface is experimental. + + Args: + backbone: a backbone module, must follow detectron2's backbone interface + head (nn.Module): a module that predicts logits and regression deltas + for each level from a list of per-level features + head_in_features (Tuple[str]): Names of the input feature maps to be used in head + anchor_generator (nn.Module): a module that creates anchors from a + list of features. Usually an instance of :class:`AnchorGenerator` + box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to + instance boxes + anchor_matcher (Matcher): label the anchors by matching them with ground truth. + num_classes (int): number of classes. Used to label background proposals. + + # Loss parameters: + focal_loss_alpha (float): focal_loss_alpha + focal_loss_gamma (float): focal_loss_gamma + smooth_l1_beta (float): smooth_l1_beta + box_reg_loss_type (str): Options are "smooth_l1", "giou", "diou", "ciou" + + # Inference parameters: + test_score_thresh (float): Inference cls score threshold, only anchors with + score > INFERENCE_TH are considered for inference (to improve speed) + test_topk_candidates (int): Select topk candidates before NMS + test_nms_thresh (float): Overlap threshold used for non-maximum suppression + (suppress boxes with IoU >= this threshold) + max_detections_per_image (int): + Maximum number of detections to return per image during inference + (100 is based on the limit established for the COCO dataset). + + pixel_mean, pixel_std: see :class:`DenseDetector`. + """ + super().__init__( + backbone, head, head_in_features, pixel_mean=pixel_mean, pixel_std=pixel_std + ) + self.num_classes = num_classes + + # Anchors + self.anchor_generator = anchor_generator + self.box2box_transform = box2box_transform + self.anchor_matcher = anchor_matcher + + # Loss parameters: + self.focal_loss_alpha = focal_loss_alpha + self.focal_loss_gamma = focal_loss_gamma + self.smooth_l1_beta = smooth_l1_beta + self.box_reg_loss_type = box_reg_loss_type + # Inference parameters: + self.test_score_thresh = test_score_thresh + self.test_topk_candidates = test_topk_candidates + self.test_nms_thresh = test_nms_thresh + self.max_detections_per_image = max_detections_per_image + # Vis parameters + self.vis_period = vis_period + self.input_format = input_format + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + backbone_shape = backbone.output_shape() + feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES] + head = RetinaNetHead(cfg, feature_shapes) + anchor_generator = build_anchor_generator(cfg, feature_shapes) + return { + "backbone": backbone, + "head": head, + "anchor_generator": anchor_generator, + "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS), + "anchor_matcher": Matcher( + cfg.MODEL.RETINANET.IOU_THRESHOLDS, + cfg.MODEL.RETINANET.IOU_LABELS, + allow_low_quality_matches=True, + ), + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES, + "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES, + # Loss parameters: + "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA, + "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA, + "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA, + "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE, + # Inference parameters: + "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST, + "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST, + "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST, + "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, + # Vis parameters + "vis_period": cfg.VIS_PERIOD, + "input_format": cfg.INPUT.FORMAT, + } + + def forward_training(self, images, features, predictions, gt_instances): + # Transpose the Hi*Wi*A dimension to the middle: + pred_logits, pred_anchor_deltas = self._transpose_dense_predictions( + predictions, [self.num_classes, 4] + ) + anchors = self.anchor_generator(features) + gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) + return self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes) + + def losses(self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes): + """ + Args: + anchors (list[Boxes]): a list of #feature level Boxes + gt_labels, gt_boxes: see output of :meth:`RetinaNet.label_anchors`. + Their shapes are (N, R) and (N, R, 4), respectively, where R is + the total number of anchors across levels, i.e. sum(Hi x Wi x Ai) + pred_logits, pred_anchor_deltas: both are list[Tensor]. Each element in the + list corresponds to one level and has shape (N, Hi * Wi * Ai, K or 4). + Where K is the number of classes used in `pred_logits`. + + Returns: + dict[str, Tensor]: + mapping from a named loss to a scalar tensor storing the loss. + Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" + """ + num_images = len(gt_labels) + gt_labels = torch.stack(gt_labels) # (N, R) + + valid_mask = gt_labels >= 0 + pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes) + num_pos_anchors = pos_mask.sum().item() + get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images) + normalizer = self._ema_update("loss_normalizer", max(num_pos_anchors, 1), 100) + + # classification and regression loss + gt_labels_target = F.one_hot(gt_labels[valid_mask], num_classes=self.num_classes + 1)[ + :, :-1 + ] # no loss for the last (background) class + loss_cls = sigmoid_focal_loss_jit( + cat(pred_logits, dim=1)[valid_mask], + gt_labels_target.to(pred_logits[0].dtype), + alpha=self.focal_loss_alpha, + gamma=self.focal_loss_gamma, + reduction="sum", + ) + + loss_box_reg = _dense_box_regression_loss( + anchors, + self.box2box_transform, + pred_anchor_deltas, + gt_boxes, + pos_mask, + box_reg_loss_type=self.box_reg_loss_type, + smooth_l1_beta=self.smooth_l1_beta, + ) + + return { + "loss_cls": loss_cls / normalizer, + "loss_box_reg": loss_box_reg / normalizer, + } + + @torch.no_grad() + def label_anchors(self, anchors, gt_instances): + """ + Args: + anchors (list[Boxes]): A list of #feature level Boxes. + The Boxes contains anchors of this image on the specific feature level. + gt_instances (list[Instances]): a list of N `Instances`s. The i-th + `Instances` contains the ground-truth per-instance annotations + for the i-th input image. + + Returns: + list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across all feature maps (sum(Hi * Wi * A)). + Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. + + list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors + across feature maps. The values are the matched gt boxes for each anchor. + Values are undefined for those anchors not labeled as foreground. + """ + anchors = Boxes.cat(anchors) # Rx4 + + gt_labels = [] + matched_gt_boxes = [] + for gt_per_image in gt_instances: + match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) + matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix) + del match_quality_matrix + + if len(gt_per_image) > 0: + matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] + + gt_labels_i = gt_per_image.gt_classes[matched_idxs] + # Anchors with label 0 are treated as background. + gt_labels_i[anchor_labels == 0] = self.num_classes + # Anchors with label -1 are ignored. + gt_labels_i[anchor_labels == -1] = -1 + else: + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes + + gt_labels.append(gt_labels_i) + matched_gt_boxes.append(matched_gt_boxes_i) + + return gt_labels, matched_gt_boxes + + def forward_inference( + self, images: ImageList, features: List[Tensor], predictions: List[List[Tensor]] + ): + pred_logits, pred_anchor_deltas = self._transpose_dense_predictions( + predictions, [self.num_classes, 4] + ) + anchors = self.anchor_generator(features) + + results: List[Instances] = [] + for img_idx, image_size in enumerate(images.image_sizes): + scores_per_image = [x[img_idx].sigmoid_() for x in pred_logits] + deltas_per_image = [x[img_idx] for x in pred_anchor_deltas] + results_per_image = self.inference_single_image( + anchors, scores_per_image, deltas_per_image, image_size + ) + results.append(results_per_image) + return results + + def inference_single_image( + self, + anchors: List[Boxes], + box_cls: List[Tensor], + box_delta: List[Tensor], + image_size: Tuple[int, int], + ): + """ + Single-image inference. Return bounding-box detection results by thresholding + on scores and applying non-maximum suppression (NMS). + + Arguments: + anchors (list[Boxes]): list of #feature levels. Each entry contains + a Boxes object, which contains all the anchors in that feature level. + box_cls (list[Tensor]): list of #feature levels. Each entry contains + tensor of size (H x W x A, K) + box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. + image_size (tuple(H, W)): a tuple of the image height and width. + + Returns: + Same as `inference`, but for only one image. + """ + pred = self._decode_multi_level_predictions( + anchors, + box_cls, + box_delta, + self.test_score_thresh, + self.test_topk_candidates, + image_size, + ) + keep = batched_nms( # per-class NMS + pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh + ) + return pred[keep[: self.max_detections_per_image]] + + +class RetinaNetHead(nn.Module): + """ + The head used in RetinaNet for object classification and box regression. + It has two subnets for the two tasks, with a common structure but separate parameters. + """ + + @configurable + def __init__( + self, + *, + input_shape: List[ShapeSpec], + num_classes, + num_anchors, + conv_dims: List[int], + norm="", + prior_prob=0.01, + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape (List[ShapeSpec]): input shape + num_classes (int): number of classes. Used to label background proposals. + num_anchors (int): number of generated anchors + conv_dims (List[int]): dimensions for each convolution layer + norm (str or callable): + Normalization for conv layers except for the two output layers. + See :func:`detectron2.layers.get_norm` for supported types. + prior_prob (float): Prior weight for computing bias + """ + super().__init__() + + self._num_features = len(input_shape) + if norm == "BN" or norm == "SyncBN": + logger.info( + f"Using domain-specific {norm} in RetinaNetHead with len={self._num_features}." + ) + bn_class = nn.BatchNorm2d if norm == "BN" else nn.SyncBatchNorm + + def norm(c): + return CycleBatchNormList( + length=self._num_features, bn_class=bn_class, num_features=c + ) + + else: + norm_name = str(type(get_norm(norm, 32))) + if "BN" in norm_name: + logger.warning( + f"Shared BatchNorm (type={norm_name}) may not work well in RetinaNetHead." + ) + + cls_subnet = [] + bbox_subnet = [] + for in_channels, out_channels in zip( + [input_shape[0].channels] + list(conv_dims), conv_dims + ): + cls_subnet.append( + nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + ) + if norm: + cls_subnet.append(get_norm(norm, out_channels)) + cls_subnet.append(nn.ReLU()) + bbox_subnet.append( + nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + ) + if norm: + bbox_subnet.append(get_norm(norm, out_channels)) + bbox_subnet.append(nn.ReLU()) + + self.cls_subnet = nn.Sequential(*cls_subnet) + self.bbox_subnet = nn.Sequential(*bbox_subnet) + self.cls_score = nn.Conv2d( + conv_dims[-1], num_anchors * num_classes, kernel_size=3, stride=1, padding=1 + ) + self.bbox_pred = nn.Conv2d( + conv_dims[-1], num_anchors * 4, kernel_size=3, stride=1, padding=1 + ) + + # Initialization + for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]: + for layer in modules.modules(): + if isinstance(layer, nn.Conv2d): + torch.nn.init.normal_(layer.weight, mean=0, std=0.01) + torch.nn.init.constant_(layer.bias, 0) + + # Use prior in model initialization to improve stability + bias_value = -(math.log((1 - prior_prob) / prior_prob)) + torch.nn.init.constant_(self.cls_score.bias, bias_value) + + @classmethod + def from_config(cls, cfg, input_shape: List[ShapeSpec]): + num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors + assert ( + len(set(num_anchors)) == 1 + ), "Using different number of anchors between levels is not currently supported!" + num_anchors = num_anchors[0] + + return { + "input_shape": input_shape, + "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES, + "conv_dims": [input_shape[0].channels] * cfg.MODEL.RETINANET.NUM_CONVS, + "prior_prob": cfg.MODEL.RETINANET.PRIOR_PROB, + "norm": cfg.MODEL.RETINANET.NORM, + "num_anchors": num_anchors, + } + + def forward(self, features: List[Tensor]): + """ + Arguments: + features (list[Tensor]): FPN feature map tensors in high to low resolution. + Each tensor in the list correspond to different feature levels. + + Returns: + logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). + The tensor predicts the classification probability + at each spatial position for each of the A anchors and K object + classes. + bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). + The tensor predicts 4-vector (dx,dy,dw,dh) box + regression values for every anchor. These values are the + relative offset between the anchor and the ground truth box. + """ + assert len(features) == self._num_features + logits = [] + bbox_reg = [] + for feature in features: + logits.append(self.cls_score(self.cls_subnet(feature))) + bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature))) + return logits, bbox_reg diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/semantic_seg.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/semantic_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..b4be86864c11c2b73a56a879746fce18a88260af --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/meta_arch/semantic_seg.py @@ -0,0 +1,267 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from typing import Callable, Dict, Optional, Tuple, Union +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, get_norm +from annotator.oneformer.detectron2.structures import ImageList +from annotator.oneformer.detectron2.utils.registry import Registry + +from ..backbone import Backbone, build_backbone +from ..postprocessing import sem_seg_postprocess +from .build import META_ARCH_REGISTRY + +__all__ = [ + "SemanticSegmentor", + "SEM_SEG_HEADS_REGISTRY", + "SemSegFPNHead", + "build_sem_seg_head", +] + + +SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS") +SEM_SEG_HEADS_REGISTRY.__doc__ = """ +Registry for semantic segmentation heads, which make semantic segmentation predictions +from feature maps. +""" + + +@META_ARCH_REGISTRY.register() +class SemanticSegmentor(nn.Module): + """ + Main class for semantic segmentation architectures. + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + sem_seg_head: nn.Module, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + ): + """ + Args: + backbone: a backbone module, must follow detectron2's backbone interface + sem_seg_head: a module that predicts semantic segmentation from backbone features + pixel_mean, pixel_std: list or tuple with #channels element, representing + the per-channel mean and std to be used to normalize the input image + """ + super().__init__() + self.backbone = backbone + self.sem_seg_head = sem_seg_head + self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + sem_seg_head = build_sem_seg_head(cfg, backbone.output_shape()) + return { + "backbone": backbone, + "sem_seg_head": sem_seg_head, + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + } + + @property + def device(self): + return self.pixel_mean.device + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + + For now, each item in the list is a dict that contains: + + * "image": Tensor, image in (C, H, W) format. + * "sem_seg": semantic segmentation ground truth + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model (may be different + from input resolution), used in inference. + + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "sem_seg" whose value is a + Tensor that represents the + per-pixel segmentation prediced by the head. + The prediction has shape KxHxW that represents the logits of + each class for each pixel. + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors( + images, + self.backbone.size_divisibility, + padding_constraints=self.backbone.padding_constraints, + ) + + features = self.backbone(images.tensor) + + if "sem_seg" in batched_inputs[0]: + targets = [x["sem_seg"].to(self.device) for x in batched_inputs] + targets = ImageList.from_tensors( + targets, + self.backbone.size_divisibility, + self.sem_seg_head.ignore_value, + self.backbone.padding_constraints, + ).tensor + else: + targets = None + results, losses = self.sem_seg_head(features, targets) + + if self.training: + return losses + + processed_results = [] + for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = sem_seg_postprocess(result, image_size, height, width) + processed_results.append({"sem_seg": r}) + return processed_results + + +def build_sem_seg_head(cfg, input_shape): + """ + Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`. + """ + name = cfg.MODEL.SEM_SEG_HEAD.NAME + return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape) + + +@SEM_SEG_HEADS_REGISTRY.register() +class SemSegFPNHead(nn.Module): + """ + A semantic segmentation head described in :paper:`PanopticFPN`. + It takes a list of FPN features as input, and applies a sequence of + 3x3 convs and upsampling to scale all of them to the stride defined by + ``common_stride``. Then these features are added and used to make final + predictions by another 1x1 conv layer. + """ + + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + num_classes: int, + conv_dims: int, + common_stride: int, + loss_weight: float = 1.0, + norm: Optional[Union[str, Callable]] = None, + ignore_value: int = -1, + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape: shapes (channels and stride) of the input features + num_classes: number of classes to predict + conv_dims: number of output channels for the intermediate conv layers. + common_stride: the common stride that all features will be upscaled to + loss_weight: loss weight + norm (str or callable): normalization for all conv layers + ignore_value: category id to be ignored during training. + """ + super().__init__() + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + if not len(input_shape): + raise ValueError("SemSegFPNHead(input_shape=) cannot be empty!") + self.in_features = [k for k, v in input_shape] + feature_strides = [v.stride for k, v in input_shape] + feature_channels = [v.channels for k, v in input_shape] + + self.ignore_value = ignore_value + self.common_stride = common_stride + self.loss_weight = loss_weight + + self.scale_heads = [] + for in_feature, stride, channels in zip( + self.in_features, feature_strides, feature_channels + ): + head_ops = [] + head_length = max(1, int(np.log2(stride) - np.log2(self.common_stride))) + for k in range(head_length): + norm_module = get_norm(norm, conv_dims) + conv = Conv2d( + channels if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=1, + padding=1, + bias=not norm, + norm=norm_module, + activation=F.relu, + ) + weight_init.c2_msra_fill(conv) + head_ops.append(conv) + if stride != self.common_stride: + head_ops.append( + nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) + ) + self.scale_heads.append(nn.Sequential(*head_ops)) + self.add_module(in_feature, self.scale_heads[-1]) + self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) + weight_init.c2_msra_fill(self.predictor) + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + return { + "input_shape": { + k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + }, + "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + "conv_dims": cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM, + "common_stride": cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE, + "norm": cfg.MODEL.SEM_SEG_HEAD.NORM, + "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT, + } + + def forward(self, features, targets=None): + """ + Returns: + In training, returns (None, dict of losses) + In inference, returns (CxHxW logits, {}) + """ + x = self.layers(features) + if self.training: + return None, self.losses(x, targets) + else: + x = F.interpolate( + x, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + return x, {} + + def layers(self, features): + for i, f in enumerate(self.in_features): + if i == 0: + x = self.scale_heads[i](features[f]) + else: + x = x + self.scale_heads[i](features[f]) + x = self.predictor(x) + return x + + def losses(self, predictions, targets): + predictions = predictions.float() # https://github.com/pytorch/pytorch/issues/48163 + predictions = F.interpolate( + predictions, + scale_factor=self.common_stride, + mode="bilinear", + align_corners=False, + ) + loss = F.cross_entropy( + predictions, targets, reduction="mean", ignore_index=self.ignore_value + ) + losses = {"loss_sem_seg": loss * self.loss_weight} + return losses diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/mmdet_wrapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/mmdet_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..5a60958cdc07e0170e4dfe02684bce259d42bdbc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/mmdet_wrapper.py @@ -0,0 +1,273 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import logging +import numpy as np +from collections import OrderedDict +from collections.abc import Mapping +from typing import Dict, List, Optional, Tuple, Union +import torch +from omegaconf import DictConfig, OmegaConf +from torch import Tensor, nn + +from annotator.oneformer.detectron2.layers import ShapeSpec +from annotator.oneformer.detectron2.structures import BitMasks, Boxes, ImageList, Instances +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from .backbone import Backbone + +logger = logging.getLogger(__name__) + + +def _to_container(cfg): + """ + mmdet will assert the type of dict/list. + So convert omegaconf objects to dict/list. + """ + if isinstance(cfg, DictConfig): + cfg = OmegaConf.to_container(cfg, resolve=True) + from mmcv.utils import ConfigDict + + return ConfigDict(cfg) + + +class MMDetBackbone(Backbone): + """ + Wrapper of mmdetection backbones to use in detectron2. + + mmdet backbones produce list/tuple of tensors, while detectron2 backbones + produce a dict of tensors. This class wraps the given backbone to produce + output in detectron2's convention, so it can be used in place of detectron2 + backbones. + """ + + def __init__( + self, + backbone: Union[nn.Module, Mapping], + neck: Union[nn.Module, Mapping, None] = None, + *, + output_shapes: List[ShapeSpec], + output_names: Optional[List[str]] = None, + ): + """ + Args: + backbone: either a backbone module or a mmdet config dict that defines a + backbone. The backbone takes a 4D image tensor and returns a + sequence of tensors. + neck: either a backbone module or a mmdet config dict that defines a + neck. The neck takes outputs of backbone and returns a + sequence of tensors. If None, no neck is used. + output_shapes: shape for every output of the backbone (or neck, if given). + stride and channels are often needed. + output_names: names for every output of the backbone (or neck, if given). + By default, will use "out0", "out1", ... + """ + super().__init__() + if isinstance(backbone, Mapping): + from mmdet.models import build_backbone + + backbone = build_backbone(_to_container(backbone)) + self.backbone = backbone + + if isinstance(neck, Mapping): + from mmdet.models import build_neck + + neck = build_neck(_to_container(neck)) + self.neck = neck + + # "Neck" weights, if any, are part of neck itself. This is the interface + # of mmdet so we follow it. Reference: + # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py + logger.info("Initializing mmdet backbone weights...") + self.backbone.init_weights() + # train() in mmdet modules is non-trivial, and has to be explicitly + # called. Reference: + # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/backbones/resnet.py + self.backbone.train() + if self.neck is not None: + logger.info("Initializing mmdet neck weights ...") + if isinstance(self.neck, nn.Sequential): + for m in self.neck: + m.init_weights() + else: + self.neck.init_weights() + self.neck.train() + + self._output_shapes = output_shapes + if not output_names: + output_names = [f"out{i}" for i in range(len(output_shapes))] + self._output_names = output_names + + def forward(self, x) -> Dict[str, Tensor]: + outs = self.backbone(x) + if self.neck is not None: + outs = self.neck(outs) + assert isinstance( + outs, (list, tuple) + ), "mmdet backbone should return a list/tuple of tensors!" + if len(outs) != len(self._output_shapes): + raise ValueError( + "Length of output_shapes does not match outputs from the mmdet backbone: " + f"{len(outs)} != {len(self._output_shapes)}" + ) + return {k: v for k, v in zip(self._output_names, outs)} + + def output_shape(self) -> Dict[str, ShapeSpec]: + return {k: v for k, v in zip(self._output_names, self._output_shapes)} + + +class MMDetDetector(nn.Module): + """ + Wrapper of a mmdetection detector model, for detection and instance segmentation. + Input/output formats of this class follow detectron2's convention, so a + mmdetection model can be trained and evaluated in detectron2. + """ + + def __init__( + self, + detector: Union[nn.Module, Mapping], + *, + # Default is 32 regardless of model: + # https://github.com/open-mmlab/mmdetection/tree/master/configs/_base_/datasets + size_divisibility=32, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + ): + """ + Args: + detector: a mmdet detector, or a mmdet config dict that defines a detector. + size_divisibility: pad input images to multiple of this number + pixel_mean: per-channel mean to normalize input image + pixel_std: per-channel stddev to normalize input image + """ + super().__init__() + if isinstance(detector, Mapping): + from mmdet.models import build_detector + + detector = build_detector(_to_container(detector)) + self.detector = detector + self.detector.init_weights() + self.size_divisibility = size_divisibility + + self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) + assert ( + self.pixel_mean.shape == self.pixel_std.shape + ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" + + def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]): + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor + metas = [] + rescale = {"height" in x for x in batched_inputs} + if len(rescale) != 1: + raise ValueError("Some inputs have original height/width, but some don't!") + rescale = list(rescale)[0] + output_shapes = [] + for input in batched_inputs: + meta = {} + c, h, w = input["image"].shape + meta["img_shape"] = meta["ori_shape"] = (h, w, c) + if rescale: + scale_factor = np.array( + [w / input["width"], h / input["height"]] * 2, dtype="float32" + ) + ori_shape = (input["height"], input["width"]) + output_shapes.append(ori_shape) + meta["ori_shape"] = ori_shape + (c,) + else: + scale_factor = 1.0 + output_shapes.append((h, w)) + meta["scale_factor"] = scale_factor + meta["flip"] = False + padh, padw = images.shape[-2:] + meta["pad_shape"] = (padh, padw, c) + metas.append(meta) + + if self.training: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + if gt_instances[0].has("gt_masks"): + from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks + + def convert_mask(m, shape): + # mmdet mask format + if isinstance(m, BitMasks): + return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1]) + else: + return mm_PolygonMasks(m.polygons, shape[0], shape[1]) + + gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances] + losses_and_metrics = self.detector.forward_train( + images, + metas, + [x.gt_boxes.tensor for x in gt_instances], + [x.gt_classes for x in gt_instances], + gt_masks=gt_masks, + ) + else: + losses_and_metrics = self.detector.forward_train( + images, + metas, + [x.gt_boxes.tensor for x in gt_instances], + [x.gt_classes for x in gt_instances], + ) + return _parse_losses(losses_and_metrics) + else: + results = self.detector.simple_test(images, metas, rescale=rescale) + results = [ + {"instances": _convert_mmdet_result(r, shape)} + for r, shape in zip(results, output_shapes) + ] + return results + + @property + def device(self): + return self.pixel_mean.device + + +# Reference: show_result() in +# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py +def _convert_mmdet_result(result, shape: Tuple[int, int]) -> Instances: + if isinstance(result, tuple): + bbox_result, segm_result = result + if isinstance(segm_result, tuple): + segm_result = segm_result[0] + else: + bbox_result, segm_result = result, None + + bboxes = torch.from_numpy(np.vstack(bbox_result)) # Nx5 + bboxes, scores = bboxes[:, :4], bboxes[:, -1] + labels = [ + torch.full((bbox.shape[0],), i, dtype=torch.int32) for i, bbox in enumerate(bbox_result) + ] + labels = torch.cat(labels) + inst = Instances(shape) + inst.pred_boxes = Boxes(bboxes) + inst.scores = scores + inst.pred_classes = labels + + if segm_result is not None and len(labels) > 0: + segm_result = list(itertools.chain(*segm_result)) + segm_result = [torch.from_numpy(x) if isinstance(x, np.ndarray) else x for x in segm_result] + segm_result = torch.stack(segm_result, dim=0) + inst.pred_masks = segm_result + return inst + + +# reference: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py +def _parse_losses(losses: Dict[str, Tensor]) -> Dict[str, Tensor]: + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError(f"{loss_name} is not a tensor or list of tensors") + + if "loss" not in loss_name: + # put metrics to storage; don't return them + storage = get_event_storage() + value = log_vars.pop(loss_name).cpu().item() + storage.put_scalar(loss_name, value) + return log_vars diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/poolers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/poolers.py new file mode 100644 index 0000000000000000000000000000000000000000..109ab47eb975b2302966eeb698ac6b4aff5e0a4d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/poolers.py @@ -0,0 +1,263 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +from typing import List, Optional +import torch +from torch import nn +from torchvision.ops import RoIPool + +from annotator.oneformer.detectron2.layers import ROIAlign, ROIAlignRotated, cat, nonzero_tuple, shapes_to_tensor +from annotator.oneformer.detectron2.structures import Boxes +from annotator.oneformer.detectron2.utils.tracing import assert_fx_safe, is_fx_tracing + +""" +To export ROIPooler to torchscript, in this file, variables that should be annotated with +`Union[List[Boxes], List[RotatedBoxes]]` are only annotated with `List[Boxes]`. + +TODO: Correct these annotations when torchscript support `Union`. +https://github.com/pytorch/pytorch/issues/41412 +""" + +__all__ = ["ROIPooler"] + + +def assign_boxes_to_levels( + box_lists: List[Boxes], + min_level: int, + max_level: int, + canonical_box_size: int, + canonical_level: int, +): + """ + Map each box in `box_lists` to a feature map level index and return the assignment + vector. + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, + where N is the number of images in the batch. + min_level (int): Smallest feature map level index. The input is considered index 0, + the output of stage 1 is index 1, and so. + max_level (int): Largest feature map level index. + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). + canonical_level (int): The feature map level index on which a canonically-sized box + should be placed. + + Returns: + A tensor of length M, where M is the total number of boxes aggregated over all + N batch images. The memory layout corresponds to the concatenation of boxes + from all images. Each element is the feature map index, as an offset from + `self.min_level`, for the corresponding box (so value i means the box is at + `self.min_level + i`). + """ + box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists])) + # Eqn.(1) in FPN paper + level_assignments = torch.floor( + canonical_level + torch.log2(box_sizes / canonical_box_size + 1e-8) + ) + # clamp level to (min, max), in case the box size is too large or too small + # for the available feature maps + level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) + return level_assignments.to(torch.int64) - min_level + + +# script the module to avoid hardcoded device type +@torch.jit.script_if_tracing +def _convert_boxes_to_pooler_format(boxes: torch.Tensor, sizes: torch.Tensor) -> torch.Tensor: + sizes = sizes.to(device=boxes.device) + indices = torch.repeat_interleave( + torch.arange(len(sizes), dtype=boxes.dtype, device=boxes.device), sizes + ) + return cat([indices[:, None], boxes], dim=1) + + +def convert_boxes_to_pooler_format(box_lists: List[Boxes]): + """ + Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops + (see description under Returns). + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): + A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. + + Returns: + When input is list[Boxes]: + A tensor of shape (M, 5), where M is the total number of boxes aggregated over all + N batch images. + The 5 columns are (batch index, x0, y0, x1, y1), where batch index + is the index in [0, N) identifying which batch image the box with corners at + (x0, y0, x1, y1) comes from. + When input is list[RotatedBoxes]: + A tensor of shape (M, 6), where M is the total number of boxes aggregated over all + N batch images. + The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees), + where batch index is the index in [0, N) identifying which batch image the + rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from. + """ + boxes = torch.cat([x.tensor for x in box_lists], dim=0) + # __len__ returns Tensor in tracing. + sizes = shapes_to_tensor([x.__len__() for x in box_lists]) + return _convert_boxes_to_pooler_format(boxes, sizes) + + +@torch.jit.script_if_tracing +def _create_zeros( + batch_target: Optional[torch.Tensor], + channels: int, + height: int, + width: int, + like_tensor: torch.Tensor, +) -> torch.Tensor: + batches = batch_target.shape[0] if batch_target is not None else 0 + sizes = (batches, channels, height, width) + return torch.zeros(sizes, dtype=like_tensor.dtype, device=like_tensor.device) + + +class ROIPooler(nn.Module): + """ + Region of interest feature map pooler that supports pooling from one or more + feature maps. + """ + + def __init__( + self, + output_size, + scales, + sampling_ratio, + pooler_type, + canonical_box_size=224, + canonical_level=4, + ): + """ + Args: + output_size (int, tuple[int] or list[int]): output size of the pooled region, + e.g., 14 x 14. If tuple or list is given, the length must be 2. + scales (list[float]): The scale for each low-level pooling op relative to + the input image. For a feature map with stride s relative to the input + image, scale is defined as 1/s. The stride must be power of 2. + When there are multiple scales, they must form a pyramid, i.e. they must be + a monotically decreasing geometric sequence with a factor of 1/2. + sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op. + pooler_type (string): Name of the type of pooling operation that should be applied. + For instance, "ROIPool" or "ROIAlignV2". + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default + is heuristically defined as 224 pixels in the FPN paper (based on ImageNet + pre-training). + canonical_level (int): The feature map level index from which a canonically-sized box + should be placed. The default is defined as level 4 (stride=16) in the FPN paper, + i.e., a box of size 224x224 will be placed on the feature with stride=16. + The box placement for all boxes will be determined from their sizes w.r.t + canonical_box_size. For example, a box whose area is 4x that of a canonical box + should be used to pool features from feature level ``canonical_level+1``. + + Note that the actual input feature maps given to this module may not have + sufficiently many levels for the input boxes. If the boxes are too large or too + small for the input feature maps, the closest level will be used. + """ + super().__init__() + + if isinstance(output_size, int): + output_size = (output_size, output_size) + assert len(output_size) == 2 + assert isinstance(output_size[0], int) and isinstance(output_size[1], int) + self.output_size = output_size + + if pooler_type == "ROIAlign": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False + ) + for scale in scales + ) + elif pooler_type == "ROIAlignV2": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True + ) + for scale in scales + ) + elif pooler_type == "ROIPool": + self.level_poolers = nn.ModuleList( + RoIPool(output_size, spatial_scale=scale) for scale in scales + ) + elif pooler_type == "ROIAlignRotated": + self.level_poolers = nn.ModuleList( + ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio) + for scale in scales + ) + else: + raise ValueError("Unknown pooler type: {}".format(pooler_type)) + + # Map scale (defined as 1 / stride) to its feature map level under the + # assumption that stride is a power of 2. + min_level = -(math.log2(scales[0])) + max_level = -(math.log2(scales[-1])) + assert math.isclose(min_level, int(min_level)) and math.isclose( + max_level, int(max_level) + ), "Featuremap stride is not power of 2!" + self.min_level = int(min_level) + self.max_level = int(max_level) + assert ( + len(scales) == self.max_level - self.min_level + 1 + ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!" + assert 0 <= self.min_level and self.min_level <= self.max_level + self.canonical_level = canonical_level + assert canonical_box_size > 0 + self.canonical_box_size = canonical_box_size + + def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]): + """ + Args: + x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those + used to construct this module. + box_lists (list[Boxes] | list[RotatedBoxes]): + A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. + The box coordinates are defined on the original image and + will be scaled by the `scales` argument of :class:`ROIPooler`. + + Returns: + Tensor: + A tensor of shape (M, C, output_size, output_size) where M is the total number of + boxes aggregated over all N batch images and C is the number of channels in `x`. + """ + num_level_assignments = len(self.level_poolers) + + if not is_fx_tracing(): + torch._assert( + isinstance(x, list) and isinstance(box_lists, list), + "Arguments to pooler must be lists", + ) + assert_fx_safe( + len(x) == num_level_assignments, + "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( + num_level_assignments, len(x) + ), + ) + assert_fx_safe( + len(box_lists) == x[0].size(0), + "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( + x[0].size(0), len(box_lists) + ), + ) + if len(box_lists) == 0: + return _create_zeros(None, x[0].shape[1], *self.output_size, x[0]) + + pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) + + if num_level_assignments == 1: + return self.level_poolers[0](x[0], pooler_fmt_boxes) + + level_assignments = assign_boxes_to_levels( + box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level + ) + + num_channels = x[0].shape[1] + output_size = self.output_size[0] + + output = _create_zeros(pooler_fmt_boxes, num_channels, output_size, output_size, x[0]) + + for level, pooler in enumerate(self.level_poolers): + inds = nonzero_tuple(level_assignments == level)[0] + pooler_fmt_boxes_level = pooler_fmt_boxes[inds] + # Use index_put_ instead of advance indexing, to avoid pytorch/issues/49852 + output.index_put_((inds,), pooler(x[level], pooler_fmt_boxes_level)) + + return output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/postprocessing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..82bbad25cdc5afbde9a3af47174c97ed473cd5f0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/postprocessing.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch +from torch.nn import functional as F + +from annotator.oneformer.detectron2.structures import Instances, ROIMasks + + +# perhaps should rename to "resize_instance" +def detector_postprocess( + results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5 +): + """ + Resize the output instances. + The input images are often resized when entering an object detector. + As a result, we often need the outputs of the detector in a different + resolution from its inputs. + + This function will resize the raw outputs of an R-CNN detector + to produce outputs according to the desired output resolution. + + Args: + results (Instances): the raw outputs from the detector. + `results.image_size` contains the input image resolution the detector sees. + This object might be modified in-place. + output_height, output_width: the desired output resolution. + Returns: + Instances: the resized output from the model, based on the output resolution + """ + if isinstance(output_width, torch.Tensor): + # This shape might (but not necessarily) be tensors during tracing. + # Converts integer tensors to float temporaries to ensure true + # division is performed when computing scale_x and scale_y. + output_width_tmp = output_width.float() + output_height_tmp = output_height.float() + new_size = torch.stack([output_height, output_width]) + else: + new_size = (output_height, output_width) + output_width_tmp = output_width + output_height_tmp = output_height + + scale_x, scale_y = ( + output_width_tmp / results.image_size[1], + output_height_tmp / results.image_size[0], + ) + results = Instances(new_size, **results.get_fields()) + + if results.has("pred_boxes"): + output_boxes = results.pred_boxes + elif results.has("proposal_boxes"): + output_boxes = results.proposal_boxes + else: + output_boxes = None + assert output_boxes is not None, "Predictions must contain boxes!" + + output_boxes.scale(scale_x, scale_y) + output_boxes.clip(results.image_size) + + results = results[output_boxes.nonempty()] + + if results.has("pred_masks"): + if isinstance(results.pred_masks, ROIMasks): + roi_masks = results.pred_masks + else: + # pred_masks is a tensor of shape (N, 1, M, M) + roi_masks = ROIMasks(results.pred_masks[:, 0, :, :]) + results.pred_masks = roi_masks.to_bitmasks( + results.pred_boxes, output_height, output_width, mask_threshold + ).tensor # TODO return ROIMasks/BitMask object in the future + + if results.has("pred_keypoints"): + results.pred_keypoints[:, :, 0] *= scale_x + results.pred_keypoints[:, :, 1] *= scale_y + + return results + + +def sem_seg_postprocess(result, img_size, output_height, output_width): + """ + Return semantic segmentation predictions in the original resolution. + + The input images are often resized when entering semantic segmentor. Moreover, in same + cases, they also padded inside segmentor to be divisible by maximum network stride. + As a result, we often need the predictions of the segmentor in a different + resolution from its inputs. + + Args: + result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), + where C is the number of classes, and H, W are the height and width of the prediction. + img_size (tuple): image size that segmentor is taking as input. + output_height, output_width: the desired output resolution. + + Returns: + semantic segmentation prediction (Tensor): A tensor of the shape + (C, output_height, output_width) that contains per-pixel soft predictions. + """ + result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) + result = F.interpolate( + result, size=(output_height, output_width), mode="bilinear", align_corners=False + )[0] + return result diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3f4e4df7645c67b7a013295207b98fe70b2e574c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator +from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN, StandardRPNHead + +__all__ = list(globals().keys()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/build.py new file mode 100644 index 0000000000000000000000000000000000000000..255cd4d0a852f70eeba79e6630f1703ed901963c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/build.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from annotator.oneformer.detectron2.utils.registry import Registry + +PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") +PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ +Registry for proposal generator, which produces object proposals from feature maps. + +The registered object will be called with `obj(cfg, input_shape)`. +The call should return a `nn.Module` object. +""" + +from . import rpn, rrpn # noqa F401 isort:skip + + +def build_proposal_generator(cfg, input_shape): + """ + Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. + The name can be "PrecomputedProposals" to use no proposal generator. + """ + name = cfg.MODEL.PROPOSAL_GENERATOR.NAME + if name == "PrecomputedProposals": + return None + + return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/proposal_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/proposal_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b5579f43f04e4442f897e20672e4ad5b784c029b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/proposal_utils.py @@ -0,0 +1,205 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import math +from typing import List, Tuple, Union +import torch + +from annotator.oneformer.detectron2.layers import batched_nms, cat, move_device_like +from annotator.oneformer.detectron2.structures import Boxes, Instances + +logger = logging.getLogger(__name__) + + +def _is_tracing(): + # (fixed in TORCH_VERSION >= 1.9) + if torch.jit.is_scripting(): + # https://github.com/pytorch/pytorch/issues/47379 + return False + else: + return torch.jit.is_tracing() + + +def find_top_rpn_proposals( + proposals: List[torch.Tensor], + pred_objectness_logits: List[torch.Tensor], + image_sizes: List[Tuple[int, int]], + nms_thresh: float, + pre_nms_topk: int, + post_nms_topk: int, + min_box_size: float, + training: bool, +): + """ + For each feature map, select the `pre_nms_topk` highest scoring proposals, + apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` + highest scoring proposals among all the feature maps for each image. + + Args: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). + All proposal predictions on the feature maps. + pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). + image_sizes (list[tuple]): sizes (h, w) for each image + nms_thresh (float): IoU threshold to use for NMS + pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. + When RPN is run on multiple feature maps (as in FPN) this number is per + feature map. + post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. + When RPN is run on multiple feature maps (as in FPN) this number is total, + over all feature maps. + min_box_size (float): minimum proposal box side length in pixels (absolute units + wrt input images). + training (bool): True if proposals are to be used in training, otherwise False. + This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." + comment. + + Returns: + list[Instances]: list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i, sorted by their + objectness score in descending order. + """ + num_images = len(image_sizes) + device = ( + proposals[0].device + if torch.jit.is_scripting() + else ("cpu" if torch.jit.is_tracing() else proposals[0].device) + ) + + # 1. Select top-k anchor for every level and every image + topk_scores = [] # #lvl Tensor, each of shape N x topk + topk_proposals = [] + level_ids = [] # #lvl Tensor, each of shape (topk,) + batch_idx = move_device_like(torch.arange(num_images, device=device), proposals[0]) + for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)): + Hi_Wi_A = logits_i.shape[1] + if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing + num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) + else: + num_proposals_i = min(Hi_Wi_A, pre_nms_topk) + + topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) + + # each is N x topk + topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 + + topk_proposals.append(topk_proposals_i) + topk_scores.append(topk_scores_i) + level_ids.append( + move_device_like( + torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device), + proposals[0], + ) + ) + + # 2. Concat all levels together + topk_scores = cat(topk_scores, dim=1) + topk_proposals = cat(topk_proposals, dim=1) + level_ids = cat(level_ids, dim=0) + + # 3. For each image, run a per-level NMS, and choose topk results. + results: List[Instances] = [] + for n, image_size in enumerate(image_sizes): + boxes = Boxes(topk_proposals[n]) + scores_per_img = topk_scores[n] + lvl = level_ids + + valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) + if not valid_mask.all(): + if training: + raise FloatingPointError( + "Predicted boxes or scores contain Inf/NaN. Training has diverged." + ) + boxes = boxes[valid_mask] + scores_per_img = scores_per_img[valid_mask] + lvl = lvl[valid_mask] + boxes.clip(image_size) + + # filter empty boxes + keep = boxes.nonempty(threshold=min_box_size) + if _is_tracing() or keep.sum().item() != len(boxes): + boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep] + + keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) + # In Detectron1, there was different behavior during training vs. testing. + # (https://github.com/facebookresearch/Detectron/issues/459) + # During training, topk is over the proposals from *all* images in the training batch. + # During testing, it is over the proposals for each image separately. + # As a result, the training behavior becomes batch-dependent, + # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. + # This bug is addressed in Detectron2 to make the behavior independent of batch size. + keep = keep[:post_nms_topk] # keep is already sorted + + res = Instances(image_size) + res.proposal_boxes = boxes[keep] + res.objectness_logits = scores_per_img[keep] + results.append(res) + return results + + +def add_ground_truth_to_proposals( + gt: Union[List[Instances], List[Boxes]], proposals: List[Instances] +) -> List[Instances]: + """ + Call `add_ground_truth_to_proposals_single_image` for all images. + + Args: + gt(Union[List[Instances], List[Boxes]): list of N elements. Element i is a Instances + representing the ground-truth for image i. + proposals (list[Instances]): list of N elements. Element i is a Instances + representing the proposals for image i. + + Returns: + list[Instances]: list of N Instances. Each is the proposals for the image, + with field "proposal_boxes" and "objectness_logits". + """ + assert gt is not None + + if len(proposals) != len(gt): + raise ValueError("proposals and gt should have the same length as the number of images!") + if len(proposals) == 0: + return proposals + + return [ + add_ground_truth_to_proposals_single_image(gt_i, proposals_i) + for gt_i, proposals_i in zip(gt, proposals) + ] + + +def add_ground_truth_to_proposals_single_image( + gt: Union[Instances, Boxes], proposals: Instances +) -> Instances: + """ + Augment `proposals` with `gt`. + + Args: + Same as `add_ground_truth_to_proposals`, but with gt and proposals + per image. + + Returns: + Same as `add_ground_truth_to_proposals`, but for only one image. + """ + if isinstance(gt, Boxes): + # convert Boxes to Instances + gt = Instances(proposals.image_size, gt_boxes=gt) + + gt_boxes = gt.gt_boxes + device = proposals.objectness_logits.device + # Assign all ground-truth boxes an objectness logit corresponding to + # P(object) = sigmoid(logit) =~ 1. + gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) + gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) + + # Concatenating gt_boxes with proposals requires them to have the same fields + gt_proposal = Instances(proposals.image_size, **gt.get_fields()) + gt_proposal.proposal_boxes = gt_boxes + gt_proposal.objectness_logits = gt_logits + + for key in proposals.get_fields().keys(): + assert gt_proposal.has( + key + ), "The attribute '{}' in `proposals` does not exist in `gt`".format(key) + + # NOTE: Instances.cat only use fields from the first item. Extra fields in latter items + # will be thrown away. + new_proposals = Instances.cat([proposals, gt_proposal]) + + return new_proposals diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..e37860dd6edb7a3cf493def2ae60a424b4dfc357 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rpn.py @@ -0,0 +1,533 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import Dict, List, Optional, Tuple, Union +import torch +import torch.nn.functional as F +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, cat +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, pairwise_iou +from annotator.oneformer.detectron2.utils.events import get_event_storage +from annotator.oneformer.detectron2.utils.memory import retry_if_cuda_oom +from annotator.oneformer.detectron2.utils.registry import Registry + +from ..anchor_generator import build_anchor_generator +from ..box_regression import Box2BoxTransform, _dense_box_regression_loss +from ..matcher import Matcher +from ..sampling import subsample_labels +from .build import PROPOSAL_GENERATOR_REGISTRY +from .proposal_utils import find_top_rpn_proposals + +RPN_HEAD_REGISTRY = Registry("RPN_HEAD") +RPN_HEAD_REGISTRY.__doc__ = """ +Registry for RPN heads, which take feature maps and perform +objectness classification and bounding box regression for anchors. + +The registered object will be called with `obj(cfg, input_shape)`. +The call should return a `nn.Module` object. +""" + + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + L: number of feature maps per image on which RPN is run + A: number of cell anchors (must be the same for all feature maps) + Hi, Wi: height and width of the i-th feature map + B: size of the box parameterization + +Naming convention: + + objectness: refers to the binary classification of an anchor as object vs. not object. + + deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransform`), or 5d for rotated boxes. + + pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use + sigmoid(pred_objectness_logits) to estimate P(object). + + gt_labels: ground-truth binary classification labels for objectness + + pred_anchor_deltas: predicted box2box transform deltas + + gt_anchor_deltas: ground-truth box2box transform deltas +""" + + +def build_rpn_head(cfg, input_shape): + """ + Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`. + """ + name = cfg.MODEL.RPN.HEAD_NAME + return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape) + + +@RPN_HEAD_REGISTRY.register() +class StandardRPNHead(nn.Module): + """ + Standard RPN classification and regression heads described in :paper:`Faster R-CNN`. + Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts + objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas + specifying how to deform each anchor into an object proposal. + """ + + @configurable + def __init__( + self, *, in_channels: int, num_anchors: int, box_dim: int = 4, conv_dims: List[int] = (-1,) + ): + """ + NOTE: this interface is experimental. + + Args: + in_channels (int): number of input feature channels. When using multiple + input features, they must have the same number of channels. + num_anchors (int): number of anchors to predict for *each spatial position* + on the feature map. The total number of anchors for each + feature map will be `num_anchors * H * W`. + box_dim (int): dimension of a box, which is also the number of box regression + predictions to make for each anchor. An axis aligned box has + box_dim=4, while a rotated box has box_dim=5. + conv_dims (list[int]): a list of integers representing the output channels + of N conv layers. Set it to -1 to use the same number of output channels + as input channels. + """ + super().__init__() + cur_channels = in_channels + # Keeping the old variable names and structure for backwards compatiblity. + # Otherwise the old checkpoints will fail to load. + if len(conv_dims) == 1: + out_channels = cur_channels if conv_dims[0] == -1 else conv_dims[0] + # 3x3 conv for the hidden representation + self.conv = self._get_rpn_conv(cur_channels, out_channels) + cur_channels = out_channels + else: + self.conv = nn.Sequential() + for k, conv_dim in enumerate(conv_dims): + out_channels = cur_channels if conv_dim == -1 else conv_dim + if out_channels <= 0: + raise ValueError( + f"Conv output channels should be greater than 0. Got {out_channels}" + ) + conv = self._get_rpn_conv(cur_channels, out_channels) + self.conv.add_module(f"conv{k}", conv) + cur_channels = out_channels + # 1x1 conv for predicting objectness logits + self.objectness_logits = nn.Conv2d(cur_channels, num_anchors, kernel_size=1, stride=1) + # 1x1 conv for predicting box2box transform deltas + self.anchor_deltas = nn.Conv2d(cur_channels, num_anchors * box_dim, kernel_size=1, stride=1) + + # Keeping the order of weights initialization same for backwards compatiblility. + for layer in self.modules(): + if isinstance(layer, nn.Conv2d): + nn.init.normal_(layer.weight, std=0.01) + nn.init.constant_(layer.bias, 0) + + def _get_rpn_conv(self, in_channels, out_channels): + return Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + activation=nn.ReLU(), + ) + + @classmethod + def from_config(cls, cfg, input_shape): + # Standard RPN is shared across levels: + in_channels = [s.channels for s in input_shape] + assert len(set(in_channels)) == 1, "Each level must have the same channel!" + in_channels = in_channels[0] + + # RPNHead should take the same input as anchor generator + # NOTE: it assumes that creating an anchor generator does not have unwanted side effect. + anchor_generator = build_anchor_generator(cfg, input_shape) + num_anchors = anchor_generator.num_anchors + box_dim = anchor_generator.box_dim + assert ( + len(set(num_anchors)) == 1 + ), "Each level must have the same number of anchors per spatial position" + return { + "in_channels": in_channels, + "num_anchors": num_anchors[0], + "box_dim": box_dim, + "conv_dims": cfg.MODEL.RPN.CONV_DIMS, + } + + def forward(self, features: List[torch.Tensor]): + """ + Args: + features (list[Tensor]): list of feature maps + + Returns: + list[Tensor]: A list of L elements. + Element i is a tensor of shape (N, A, Hi, Wi) representing + the predicted objectness logits for all anchors. A is the number of cell anchors. + list[Tensor]: A list of L elements. Element i is a tensor of shape + (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors + to proposals. + """ + pred_objectness_logits = [] + pred_anchor_deltas = [] + for x in features: + t = self.conv(x) + pred_objectness_logits.append(self.objectness_logits(t)) + pred_anchor_deltas.append(self.anchor_deltas(t)) + return pred_objectness_logits, pred_anchor_deltas + + +@PROPOSAL_GENERATOR_REGISTRY.register() +class RPN(nn.Module): + """ + Region Proposal Network, introduced by :paper:`Faster R-CNN`. + """ + + @configurable + def __init__( + self, + *, + in_features: List[str], + head: nn.Module, + anchor_generator: nn.Module, + anchor_matcher: Matcher, + box2box_transform: Box2BoxTransform, + batch_size_per_image: int, + positive_fraction: float, + pre_nms_topk: Tuple[float, float], + post_nms_topk: Tuple[float, float], + nms_thresh: float = 0.7, + min_box_size: float = 0.0, + anchor_boundary_thresh: float = -1.0, + loss_weight: Union[float, Dict[str, float]] = 1.0, + box_reg_loss_type: str = "smooth_l1", + smooth_l1_beta: float = 0.0, + ): + """ + NOTE: this interface is experimental. + + Args: + in_features (list[str]): list of names of input features to use + head (nn.Module): a module that predicts logits and regression deltas + for each level from a list of per-level features + anchor_generator (nn.Module): a module that creates anchors from a + list of features. Usually an instance of :class:`AnchorGenerator` + anchor_matcher (Matcher): label the anchors by matching them with ground truth. + box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to + instance boxes + batch_size_per_image (int): number of anchors per image to sample for training + positive_fraction (float): fraction of foreground anchors to sample for training + pre_nms_topk (tuple[float]): (train, test) that represents the + number of top k proposals to select before NMS, in + training and testing. + post_nms_topk (tuple[float]): (train, test) that represents the + number of top k proposals to select after NMS, in + training and testing. + nms_thresh (float): NMS threshold used to de-duplicate the predicted proposals + min_box_size (float): remove proposal boxes with any side smaller than this threshold, + in the unit of input image pixels + anchor_boundary_thresh (float): legacy option + loss_weight (float|dict): weights to use for losses. Can be single float for weighting + all rpn losses together, or a dict of individual weightings. Valid dict keys are: + "loss_rpn_cls" - applied to classification loss + "loss_rpn_loc" - applied to box regression loss + box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou". + smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to + use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" + """ + super().__init__() + self.in_features = in_features + self.rpn_head = head + self.anchor_generator = anchor_generator + self.anchor_matcher = anchor_matcher + self.box2box_transform = box2box_transform + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + # Map from self.training state to train/test settings + self.pre_nms_topk = {True: pre_nms_topk[0], False: pre_nms_topk[1]} + self.post_nms_topk = {True: post_nms_topk[0], False: post_nms_topk[1]} + self.nms_thresh = nms_thresh + self.min_box_size = float(min_box_size) + self.anchor_boundary_thresh = anchor_boundary_thresh + if isinstance(loss_weight, float): + loss_weight = {"loss_rpn_cls": loss_weight, "loss_rpn_loc": loss_weight} + self.loss_weight = loss_weight + self.box_reg_loss_type = box_reg_loss_type + self.smooth_l1_beta = smooth_l1_beta + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + in_features = cfg.MODEL.RPN.IN_FEATURES + ret = { + "in_features": in_features, + "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE, + "nms_thresh": cfg.MODEL.RPN.NMS_THRESH, + "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, + "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION, + "loss_weight": { + "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT, + "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT, + }, + "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH, + "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS), + "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE, + "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA, + } + + ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST) + ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST) + + ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features]) + ret["anchor_matcher"] = Matcher( + cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True + ) + ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features]) + return ret + + def _subsample_labels(self, label): + """ + Randomly sample a subset of positive and negative examples, and overwrite + the label vector to the ignore value (-1) for all elements that are not + included in the sample. + + Args: + labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned. + """ + pos_idx, neg_idx = subsample_labels( + label, self.batch_size_per_image, self.positive_fraction, 0 + ) + # Fill with the ignore label (-1), then set positive and negative labels + label.fill_(-1) + label.scatter_(0, pos_idx, 1) + label.scatter_(0, neg_idx, 0) + return label + + @torch.jit.unused + @torch.no_grad() + def label_and_sample_anchors( + self, anchors: List[Boxes], gt_instances: List[Instances] + ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + """ + Args: + anchors (list[Boxes]): anchors for each feature map. + gt_instances: the ground-truth instances for each image. + + Returns: + list[Tensor]: + List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across all feature maps R = sum(Hi * Wi * A). + Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative + class; 1 = positive class. + list[Tensor]: + i-th element is a Rx4 tensor. The values are the matched gt boxes for each + anchor. Values are undefined for those anchors not labeled as 1. + """ + anchors = Boxes.cat(anchors) + + gt_boxes = [x.gt_boxes for x in gt_instances] + image_sizes = [x.image_size for x in gt_instances] + del gt_instances + + gt_labels = [] + matched_gt_boxes = [] + for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): + """ + image_size_i: (h, w) for the i-th image + gt_boxes_i: ground-truth boxes for i-th image + """ + + match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) + matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) + # Matching is memory-expensive and may result in CPU tensors. But the result is small + gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) + del match_quality_matrix + + if self.anchor_boundary_thresh >= 0: + # Discard anchors that go out of the boundaries of the image + # NOTE: This is legacy functionality that is turned off by default in Detectron2 + anchors_inside_image = anchors.inside_box(image_size_i, self.anchor_boundary_thresh) + gt_labels_i[~anchors_inside_image] = -1 + + # A vector of labels (-1, 0, 1) for each anchor + gt_labels_i = self._subsample_labels(gt_labels_i) + + if len(gt_boxes_i) == 0: + # These values won't be used anyway since the anchor is labeled as background + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + else: + # TODO wasted indexing computation for ignored boxes + matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor + + gt_labels.append(gt_labels_i) # N,AHW + matched_gt_boxes.append(matched_gt_boxes_i) + return gt_labels, matched_gt_boxes + + @torch.jit.unused + def losses( + self, + anchors: List[Boxes], + pred_objectness_logits: List[torch.Tensor], + gt_labels: List[torch.Tensor], + pred_anchor_deltas: List[torch.Tensor], + gt_boxes: List[torch.Tensor], + ) -> Dict[str, torch.Tensor]: + """ + Return the losses from a set of RPN predictions and their associated ground-truth. + + Args: + anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each + has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). + pred_objectness_logits (list[Tensor]): A list of L elements. + Element i is a tensor of shape (N, Hi*Wi*A) representing + the predicted objectness logits for all anchors. + gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. + pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape + (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors + to proposals. + gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. + + Returns: + dict[loss name -> loss value]: A dict mapping from loss name to loss value. + Loss names are: `loss_rpn_cls` for objectness classification and + `loss_rpn_loc` for proposal localization. + """ + num_images = len(gt_labels) + gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) + + # Log the number of positive/negative anchors per-image that's used in training + pos_mask = gt_labels == 1 + num_pos_anchors = pos_mask.sum().item() + num_neg_anchors = (gt_labels == 0).sum().item() + storage = get_event_storage() + storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) + storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) + + localization_loss = _dense_box_regression_loss( + anchors, + self.box2box_transform, + pred_anchor_deltas, + gt_boxes, + pos_mask, + box_reg_loss_type=self.box_reg_loss_type, + smooth_l1_beta=self.smooth_l1_beta, + ) + + valid_mask = gt_labels >= 0 + objectness_loss = F.binary_cross_entropy_with_logits( + cat(pred_objectness_logits, dim=1)[valid_mask], + gt_labels[valid_mask].to(torch.float32), + reduction="sum", + ) + normalizer = self.batch_size_per_image * num_images + losses = { + "loss_rpn_cls": objectness_loss / normalizer, + # The original Faster R-CNN paper uses a slightly different normalizer + # for loc loss. But it doesn't matter in practice + "loss_rpn_loc": localization_loss / normalizer, + } + losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} + return losses + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + gt_instances: Optional[List[Instances]] = None, + ): + """ + Args: + images (ImageList): input images of length `N` + features (dict[str, Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + gt_instances (list[Instances], optional): a length `N` list of `Instances`s. + Each `Instances` stores ground-truth instances for the corresponding image. + + Returns: + proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits" + loss: dict[Tensor] or None + """ + features = [features[f] for f in self.in_features] + anchors = self.anchor_generator(features) + + pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) + # Transpose the Hi*Wi*A dimension to the middle: + pred_objectness_logits = [ + # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) + score.permute(0, 2, 3, 1).flatten(1) + for score in pred_objectness_logits + ] + pred_anchor_deltas = [ + # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B) + x.view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1]) + .permute(0, 3, 4, 1, 2) + .flatten(1, -2) + for x in pred_anchor_deltas + ] + + if self.training: + assert gt_instances is not None, "RPN requires gt_instances in training!" + gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) + losses = self.losses( + anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes + ) + else: + losses = {} + proposals = self.predict_proposals( + anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes + ) + return proposals, losses + + def predict_proposals( + self, + anchors: List[Boxes], + pred_objectness_logits: List[torch.Tensor], + pred_anchor_deltas: List[torch.Tensor], + image_sizes: List[Tuple[int, int]], + ): + """ + Decode all the predicted box regression deltas to proposals. Find the top proposals + by applying NMS and removing boxes that are too small. + + Returns: + proposals (list[Instances]): list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i, sorted by their + objectness score in descending order. + """ + # The proposals are treated as fixed for joint training with roi heads. + # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that + # are also network responses. + with torch.no_grad(): + pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) + return find_top_rpn_proposals( + pred_proposals, + pred_objectness_logits, + image_sizes, + self.nms_thresh, + self.pre_nms_topk[self.training], + self.post_nms_topk[self.training], + self.min_box_size, + self.training, + ) + + def _decode_proposals(self, anchors: List[Boxes], pred_anchor_deltas: List[torch.Tensor]): + """ + Transform anchors into proposals by applying the predicted anchor deltas. + + Returns: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape + (N, Hi*Wi*A, B) + """ + N = pred_anchor_deltas[0].shape[0] + proposals = [] + # For each feature map + for anchors_i, pred_anchor_deltas_i in zip(anchors, pred_anchor_deltas): + B = anchors_i.tensor.size(1) + pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B) + # Expand anchors to shape (N*Hi*Wi*A, B) + anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B) + proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i) + # Append feature map proposals with shape (N, Hi*Wi*A, B) + proposals.append(proposals_i.view(N, -1, B)) + return proposals diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rrpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rrpn.py new file mode 100644 index 0000000000000000000000000000000000000000..8535dcd992bc4a83ea05d285f0ec5fae1271f41d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/proposal_generator/rrpn.py @@ -0,0 +1,209 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import logging +from typing import Dict, List +import torch + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ShapeSpec, batched_nms_rotated, cat +from annotator.oneformer.detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated +from annotator.oneformer.detectron2.utils.memory import retry_if_cuda_oom + +from ..box_regression import Box2BoxTransformRotated +from .build import PROPOSAL_GENERATOR_REGISTRY +from .proposal_utils import _is_tracing +from .rpn import RPN + +logger = logging.getLogger(__name__) + + +def find_top_rrpn_proposals( + proposals, + pred_objectness_logits, + image_sizes, + nms_thresh, + pre_nms_topk, + post_nms_topk, + min_box_size, + training, +): + """ + For each feature map, select the `pre_nms_topk` highest scoring proposals, + apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` + highest scoring proposals among all the feature maps if `training` is True, + otherwise, returns the highest `post_nms_topk` scoring proposals for each + feature map. + + Args: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5). + All proposal predictions on the feature maps. + pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). + image_sizes (list[tuple]): sizes (h, w) for each image + nms_thresh (float): IoU threshold to use for NMS + pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. + When RRPN is run on multiple feature maps (as in FPN) this number is per + feature map. + post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. + When RRPN is run on multiple feature maps (as in FPN) this number is total, + over all feature maps. + min_box_size(float): minimum proposal box side length in pixels (absolute units wrt + input images). + training (bool): True if proposals are to be used in training, otherwise False. + This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." + comment. + + Returns: + proposals (list[Instances]): list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i. + """ + num_images = len(image_sizes) + device = proposals[0].device + + # 1. Select top-k anchor for every level and every image + topk_scores = [] # #lvl Tensor, each of shape N x topk + topk_proposals = [] + level_ids = [] # #lvl Tensor, each of shape (topk,) + batch_idx = torch.arange(num_images, device=device) + for level_id, proposals_i, logits_i in zip( + itertools.count(), proposals, pred_objectness_logits + ): + Hi_Wi_A = logits_i.shape[1] + if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing + num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) + else: + num_proposals_i = min(Hi_Wi_A, pre_nms_topk) + + topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) + + # each is N x topk + topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 5 + + topk_proposals.append(topk_proposals_i) + topk_scores.append(topk_scores_i) + level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) + + # 2. Concat all levels together + topk_scores = cat(topk_scores, dim=1) + topk_proposals = cat(topk_proposals, dim=1) + level_ids = cat(level_ids, dim=0) + + # 3. For each image, run a per-level NMS, and choose topk results. + results = [] + for n, image_size in enumerate(image_sizes): + boxes = RotatedBoxes(topk_proposals[n]) + scores_per_img = topk_scores[n] + lvl = level_ids + + valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) + if not valid_mask.all(): + if training: + raise FloatingPointError( + "Predicted boxes or scores contain Inf/NaN. Training has diverged." + ) + boxes = boxes[valid_mask] + scores_per_img = scores_per_img[valid_mask] + lvl = lvl[valid_mask] + boxes.clip(image_size) + + # filter empty boxes + keep = boxes.nonempty(threshold=min_box_size) + if _is_tracing() or keep.sum().item() != len(boxes): + boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], lvl[keep]) + + keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh) + # In Detectron1, there was different behavior during training vs. testing. + # (https://github.com/facebookresearch/Detectron/issues/459) + # During training, topk is over the proposals from *all* images in the training batch. + # During testing, it is over the proposals for each image separately. + # As a result, the training behavior becomes batch-dependent, + # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. + # This bug is addressed in Detectron2 to make the behavior independent of batch size. + keep = keep[:post_nms_topk] + + res = Instances(image_size) + res.proposal_boxes = boxes[keep] + res.objectness_logits = scores_per_img[keep] + results.append(res) + return results + + +@PROPOSAL_GENERATOR_REGISTRY.register() +class RRPN(RPN): + """ + Rotated Region Proposal Network described in :paper:`RRPN`. + """ + + @configurable + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if self.anchor_boundary_thresh >= 0: + raise NotImplementedError( + "anchor_boundary_thresh is a legacy option not implemented for RRPN." + ) + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + ret = super().from_config(cfg, input_shape) + ret["box2box_transform"] = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) + return ret + + @torch.no_grad() + def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]): + """ + Args: + anchors (list[RotatedBoxes]): anchors for each feature map. + gt_instances: the ground-truth instances for each image. + + Returns: + list[Tensor]: + List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across feature maps. Label values are in {-1, 0, 1}, + with meanings: -1 = ignore; 0 = negative class; 1 = positive class. + list[Tensor]: + i-th element is a Nx5 tensor, where N is the total number of anchors across + feature maps. The values are the matched gt boxes for each anchor. + Values are undefined for those anchors not labeled as 1. + """ + anchors = RotatedBoxes.cat(anchors) + + gt_boxes = [x.gt_boxes for x in gt_instances] + del gt_instances + + gt_labels = [] + matched_gt_boxes = [] + for gt_boxes_i in gt_boxes: + """ + gt_boxes_i: ground-truth boxes for i-th image + """ + match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors) + matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) + # Matching is memory-expensive and may result in CPU tensors. But the result is small + gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) + + # A vector of labels (-1, 0, 1) for each anchor + gt_labels_i = self._subsample_labels(gt_labels_i) + + if len(gt_boxes_i) == 0: + # These values won't be used anyway since the anchor is labeled as background + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + else: + # TODO wasted indexing computation for ignored boxes + matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor + + gt_labels.append(gt_labels_i) # N,AHW + matched_gt_boxes.append(matched_gt_boxes_i) + return gt_labels, matched_gt_boxes + + @torch.no_grad() + def predict_proposals(self, anchors, pred_objectness_logits, pred_anchor_deltas, image_sizes): + pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) + return find_top_rrpn_proposals( + pred_proposals, + pred_objectness_logits, + image_sizes, + self.nms_thresh, + self.pre_nms_topk[self.training], + self.post_nms_topk[self.training], + self.min_box_size, + self.training, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d13e9c57235b982f3e0645bc316de2b75755dfda --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead +from .keypoint_head import ( + ROI_KEYPOINT_HEAD_REGISTRY, + build_keypoint_head, + BaseKeypointRCNNHead, + KRCNNConvDeconvUpsampleHead, +) +from .mask_head import ( + ROI_MASK_HEAD_REGISTRY, + build_mask_head, + BaseMaskRCNNHead, + MaskRCNNConvUpsampleHead, +) +from .roi_heads import ( + ROI_HEADS_REGISTRY, + ROIHeads, + Res5ROIHeads, + StandardROIHeads, + build_roi_heads, + select_foreground_proposals, +) +from .cascade_rcnn import CascadeROIHeads +from .rotated_fast_rcnn import RROIHeads +from .fast_rcnn import FastRCNNOutputLayers + +from . import cascade_rcnn # isort:skip + +__all__ = list(globals().keys()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/box_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/box_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1e598af4f08af6618997607e1633f2b842eb6da0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/box_head.py @@ -0,0 +1,118 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from typing import List +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, get_norm +from annotator.oneformer.detectron2.utils.registry import Registry + +__all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"] + +ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") +ROI_BOX_HEAD_REGISTRY.__doc__ = """ +Registry for box heads, which make box predictions from per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +# To get torchscript support, we make the head a subclass of `nn.Sequential`. +# Therefore, to add new layers in this head class, please make sure they are +# added in the order they will be used in forward(). +@ROI_BOX_HEAD_REGISTRY.register() +class FastRCNNConvFCHead(nn.Sequential): + """ + A head with several 3x3 conv layers (each followed by norm & relu) and then + several fc layers (each followed by relu). + """ + + @configurable + def __init__( + self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature. + conv_dims (list[int]): the output dimensions of the conv layers + fc_dims (list[int]): the output dimensions of the fc layers + conv_norm (str or callable): normalization for the conv layers. + See :func:`detectron2.layers.get_norm` for supported types. + """ + super().__init__() + assert len(conv_dims) + len(fc_dims) > 0 + + self._output_size = (input_shape.channels, input_shape.height, input_shape.width) + + self.conv_norm_relus = [] + for k, conv_dim in enumerate(conv_dims): + conv = Conv2d( + self._output_size[0], + conv_dim, + kernel_size=3, + padding=1, + bias=not conv_norm, + norm=get_norm(conv_norm, conv_dim), + activation=nn.ReLU(), + ) + self.add_module("conv{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) + + self.fcs = [] + for k, fc_dim in enumerate(fc_dims): + if k == 0: + self.add_module("flatten", nn.Flatten()) + fc = nn.Linear(int(np.prod(self._output_size)), fc_dim) + self.add_module("fc{}".format(k + 1), fc) + self.add_module("fc_relu{}".format(k + 1), nn.ReLU()) + self.fcs.append(fc) + self._output_size = fc_dim + + for layer in self.conv_norm_relus: + weight_init.c2_msra_fill(layer) + for layer in self.fcs: + weight_init.c2_xavier_fill(layer) + + @classmethod + def from_config(cls, cfg, input_shape): + num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV + conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM + num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC + fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM + return { + "input_shape": input_shape, + "conv_dims": [conv_dim] * num_conv, + "fc_dims": [fc_dim] * num_fc, + "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, + } + + def forward(self, x): + for layer in self: + x = layer(x) + return x + + @property + @torch.jit.unused + def output_shape(self): + """ + Returns: + ShapeSpec: the output feature shape + """ + o = self._output_size + if isinstance(o, int): + return ShapeSpec(channels=o) + else: + return ShapeSpec(channels=o[0], height=o[1], width=o[2]) + + +def build_box_head(cfg, input_shape): + """ + Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_BOX_HEAD.NAME + return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/cascade_rcnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/cascade_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..69b837be7a7c2202fe4f94b7212b49678fe06c1e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/cascade_rcnn.py @@ -0,0 +1,299 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import List +import torch +from torch import nn +from torch.autograd.function import Function + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ShapeSpec +from annotator.oneformer.detectron2.structures import Boxes, Instances, pairwise_iou +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from ..box_regression import Box2BoxTransform +from ..matcher import Matcher +from ..poolers import ROIPooler +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference +from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads + + +class _ScaleGradient(Function): + @staticmethod + def forward(ctx, input, scale): + ctx.scale = scale + return input + + @staticmethod + def backward(ctx, grad_output): + return grad_output * ctx.scale, None + + +@ROI_HEADS_REGISTRY.register() +class CascadeROIHeads(StandardROIHeads): + """ + The ROI heads that implement :paper:`Cascade R-CNN`. + """ + + @configurable + def __init__( + self, + *, + box_in_features: List[str], + box_pooler: ROIPooler, + box_heads: List[nn.Module], + box_predictors: List[nn.Module], + proposal_matchers: List[Matcher], + **kwargs, + ): + """ + NOTE: this interface is experimental. + + Args: + box_pooler (ROIPooler): pooler that extracts region features from given boxes + box_heads (list[nn.Module]): box head for each cascade stage + box_predictors (list[nn.Module]): box predictor for each cascade stage + proposal_matchers (list[Matcher]): matcher with different IoU thresholds to + match boxes with ground truth for each stage. The first matcher matches + RPN proposals with ground truth, the other matchers use boxes predicted + by the previous stage as proposals and match them with ground truth. + """ + assert "proposal_matcher" not in kwargs, ( + "CascadeROIHeads takes 'proposal_matchers=' for each stage instead " + "of one 'proposal_matcher='." + ) + # The first matcher matches RPN proposals with ground truth, done in the base class + kwargs["proposal_matcher"] = proposal_matchers[0] + num_stages = self.num_cascade_stages = len(box_heads) + box_heads = nn.ModuleList(box_heads) + box_predictors = nn.ModuleList(box_predictors) + assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!" + assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!" + super().__init__( + box_in_features=box_in_features, + box_pooler=box_pooler, + box_head=box_heads, + box_predictor=box_predictors, + **kwargs, + ) + self.proposal_matchers = proposal_matchers + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + ret.pop("proposal_matcher") + return ret + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS + cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS + assert len(cascade_bbox_reg_weights) == len(cascade_ious) + assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ + "CascadeROIHeads only support class-agnostic regression now!" + assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features] + # Check all channel counts are equal + assert len(set(in_channels)) == 1, in_channels + in_channels = in_channels[0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + pooled_shape = ShapeSpec( + channels=in_channels, width=pooler_resolution, height=pooler_resolution + ) + + box_heads, box_predictors, proposal_matchers = [], [], [] + for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): + box_head = build_box_head(cfg, pooled_shape) + box_heads.append(box_head) + box_predictors.append( + FastRCNNOutputLayers( + cfg, + box_head.output_shape, + box2box_transform=Box2BoxTransform(weights=bbox_reg_weights), + ) + ) + proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_heads": box_heads, + "box_predictors": box_predictors, + "proposal_matchers": proposal_matchers, + } + + def forward(self, images, features, proposals, targets=None): + del images + if self.training: + proposals = self.label_and_sample_proposals(proposals, targets) + + if self.training: + # Need targets to box head + losses = self._forward_box(features, proposals, targets) + losses.update(self._forward_mask(features, proposals)) + losses.update(self._forward_keypoint(features, proposals)) + return proposals, losses + else: + pred_instances = self._forward_box(features, proposals) + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def _forward_box(self, features, proposals, targets=None): + """ + Args: + features, targets: the same as in + Same as in :meth:`ROIHeads.forward`. + proposals (list[Instances]): the per-image object proposals with + their matching ground truth. + Each has fields "proposal_boxes", and "objectness_logits", + "gt_classes", "gt_boxes". + """ + features = [features[f] for f in self.box_in_features] + head_outputs = [] # (predictor, predictions, proposals) + prev_pred_boxes = None + image_sizes = [x.image_size for x in proposals] + for k in range(self.num_cascade_stages): + if k > 0: + # The output boxes of the previous stage are used to create the input + # proposals of the next stage. + proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes) + if self.training: + proposals = self._match_and_label_boxes(proposals, k, targets) + predictions = self._run_stage(features, proposals, k) + prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals) + head_outputs.append((self.box_predictor[k], predictions, proposals)) + + if self.training: + losses = {} + storage = get_event_storage() + for stage, (predictor, predictions, proposals) in enumerate(head_outputs): + with storage.name_scope("stage{}".format(stage)): + stage_losses = predictor.losses(predictions, proposals) + losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()}) + return losses + else: + # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1) + scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs] + + # Average the scores across heads + scores = [ + sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages) + for scores_per_image in zip(*scores_per_stage) + ] + # Use the boxes of the last head + predictor, predictions, proposals = head_outputs[-1] + boxes = predictor.predict_boxes(predictions, proposals) + pred_instances, _ = fast_rcnn_inference( + boxes, + scores, + image_sizes, + predictor.test_score_thresh, + predictor.test_nms_thresh, + predictor.test_topk_per_image, + ) + return pred_instances + + @torch.no_grad() + def _match_and_label_boxes(self, proposals, stage, targets): + """ + Match proposals with groundtruth using the matcher at the given stage. + Label the proposals as foreground or background based on the match. + + Args: + proposals (list[Instances]): One Instances for each image, with + the field "proposal_boxes". + stage (int): the current stage + targets (list[Instances]): the ground truth instances + + Returns: + list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" + """ + num_fg_samples, num_bg_samples = [], [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + match_quality_matrix = pairwise_iou( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + # proposal_labels are 0 or 1 + matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) + if len(targets_per_image) > 0: + gt_classes = targets_per_image.gt_classes[matched_idxs] + # Label unmatched proposals (0 label from matcher) as background (label=num_classes) + gt_classes[proposal_labels == 0] = self.num_classes + gt_boxes = targets_per_image.gt_boxes[matched_idxs] + else: + gt_classes = torch.zeros_like(matched_idxs) + self.num_classes + gt_boxes = Boxes( + targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) + ) + proposals_per_image.gt_classes = gt_classes + proposals_per_image.gt_boxes = gt_boxes + + num_fg_samples.append((proposal_labels == 1).sum().item()) + num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) + + # Log the number of fg/bg samples in each stage + storage = get_event_storage() + storage.put_scalar( + "stage{}/roi_head/num_fg_samples".format(stage), + sum(num_fg_samples) / len(num_fg_samples), + ) + storage.put_scalar( + "stage{}/roi_head/num_bg_samples".format(stage), + sum(num_bg_samples) / len(num_bg_samples), + ) + return proposals + + def _run_stage(self, features, proposals, stage): + """ + Args: + features (list[Tensor]): #lvl input features to ROIHeads + proposals (list[Instances]): #image Instances, with the field "proposal_boxes" + stage (int): the current stage + + Returns: + Same output as `FastRCNNOutputLayers.forward()`. + """ + box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) + # The original implementation averages the losses among heads, + # but scale up the parameter gradients of the heads. + # This is equivalent to adding the losses among heads, + # but scale down the gradients on features. + if self.training: + box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages) + box_features = self.box_head[stage](box_features) + return self.box_predictor[stage](box_features) + + def _create_proposals_from_boxes(self, boxes, image_sizes): + """ + Args: + boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4 + image_sizes (list[tuple]): list of image shapes in (h, w) + + Returns: + list[Instances]: per-image proposals with the given boxes. + """ + # Just like RPN, the proposals should not have gradients + boxes = [Boxes(b.detach()) for b in boxes] + proposals = [] + for boxes_per_image, image_size in zip(boxes, image_sizes): + boxes_per_image.clip(image_size) + if self.training: + # do not filter empty boxes at inference time, + # because the scores from each stage need to be aligned and added later + boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] + prop = Instances(image_size) + prop.proposal_boxes = boxes_per_image + proposals.append(prop) + return proposals diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/fast_rcnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/fast_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..a81c58ea863f32a24ed7d5ad3b2e4e4416c6a0ab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/fast_rcnn.py @@ -0,0 +1,569 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +from typing import Callable, Dict, List, Optional, Tuple, Union +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data.detection_utils import get_fed_loss_cls_weights +from annotator.oneformer.detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple +from annotator.oneformer.detectron2.modeling.box_regression import Box2BoxTransform, _dense_box_regression_loss +from annotator.oneformer.detectron2.structures import Boxes, Instances +from annotator.oneformer.detectron2.utils.events import get_event_storage + +__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"] + + +logger = logging.getLogger(__name__) + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + R: number of ROIs, combined over all images, in the minibatch + Ri: number of ROIs in image i + K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. + +Naming convention: + + deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransform`). + + pred_class_logits: predicted class scores in [-inf, +inf]; use + softmax(pred_class_logits) to estimate P(class). + + gt_classes: ground-truth classification labels in [0, K], where [0, K) represent + foreground object classes and K represents the background class. + + pred_proposal_deltas: predicted box2box transform deltas for transforming proposals + to detection box predictions. + + gt_proposal_deltas: ground-truth box2box transform deltas +""" + + +def fast_rcnn_inference( + boxes: List[torch.Tensor], + scores: List[torch.Tensor], + image_shapes: List[Tuple[int, int]], + score_thresh: float, + nms_thresh: float, + topk_per_image: int, +): + """ + Call `fast_rcnn_inference_single_image` for all images. + + Args: + boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic + boxes for each image. Element i has shape (Ri, K * 4) if doing + class-specific regression, or (Ri, 4) if doing class-agnostic + regression, where Ri is the number of predicted objects for image i. + This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. + scores (list[Tensor]): A list of Tensors of predicted class scores for each image. + Element i has shape (Ri, K + 1), where Ri is the number of predicted objects + for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. + image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. + score_thresh (float): Only return detections with a confidence score exceeding this + threshold. + nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. + topk_per_image (int): The number of top scoring detections to return. Set < 0 to return + all detections. + + Returns: + instances: (list[Instances]): A list of N instances, one for each image in the batch, + that stores the topk most confidence detections. + kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates + the corresponding boxes/scores index in [0, Ri) from the input, for image i. + """ + result_per_image = [ + fast_rcnn_inference_single_image( + boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image + ) + for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) + ] + return [x[0] for x in result_per_image], [x[1] for x in result_per_image] + + +def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"): + """ + Log the classification metrics to EventStorage. + + Args: + pred_logits: Rx(K+1) logits. The last column is for background class. + gt_classes: R labels + """ + num_instances = gt_classes.numel() + if num_instances == 0: + return + pred_classes = pred_logits.argmax(dim=1) + bg_class_ind = pred_logits.shape[1] - 1 + + fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind) + num_fg = fg_inds.nonzero().numel() + fg_gt_classes = gt_classes[fg_inds] + fg_pred_classes = pred_classes[fg_inds] + + num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() + num_accurate = (pred_classes == gt_classes).nonzero().numel() + fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() + + storage = get_event_storage() + storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances) + if num_fg > 0: + storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg) + storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg) + + +def fast_rcnn_inference_single_image( + boxes, + scores, + image_shape: Tuple[int, int], + score_thresh: float, + nms_thresh: float, + topk_per_image: int, +): + """ + Single-image inference. Return bounding-box detection results by thresholding + on scores and applying non-maximum suppression (NMS). + + Args: + Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes + per image. + + Returns: + Same as `fast_rcnn_inference`, but for only one image. + """ + valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores = scores[valid_mask] + + scores = scores[:, :-1] + num_bbox_reg_classes = boxes.shape[1] // 4 + # Convert to Boxes to use the `clip` function ... + boxes = Boxes(boxes.reshape(-1, 4)) + boxes.clip(image_shape) + boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 + + # 1. Filter results based on detection scores. It can make NMS more efficient + # by filtering out low-confidence detections. + filter_mask = scores > score_thresh # R x K + # R' x 2. First column contains indices of the R predictions; + # Second column contains indices of classes. + filter_inds = filter_mask.nonzero() + if num_bbox_reg_classes == 1: + boxes = boxes[filter_inds[:, 0], 0] + else: + boxes = boxes[filter_mask] + scores = scores[filter_mask] + + # 2. Apply NMS for each class independently. + keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) + if topk_per_image >= 0: + keep = keep[:topk_per_image] + boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] + + result = Instances(image_shape) + result.pred_boxes = Boxes(boxes) + result.scores = scores + result.pred_classes = filter_inds[:, 1] + return result, filter_inds[:, 0] + + +class FastRCNNOutputLayers(nn.Module): + """ + Two linear layers for predicting Fast R-CNN outputs: + + 1. proposal-to-detection box regression deltas + 2. classification scores + """ + + @configurable + def __init__( + self, + input_shape: ShapeSpec, + *, + box2box_transform, + num_classes: int, + test_score_thresh: float = 0.0, + test_nms_thresh: float = 0.5, + test_topk_per_image: int = 100, + cls_agnostic_bbox_reg: bool = False, + smooth_l1_beta: float = 0.0, + box_reg_loss_type: str = "smooth_l1", + loss_weight: Union[float, Dict[str, float]] = 1.0, + use_fed_loss: bool = False, + use_sigmoid_ce: bool = False, + get_fed_loss_cls_weights: Optional[Callable] = None, + fed_loss_num_classes: int = 50, + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature to this module + box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): + num_classes (int): number of foreground classes + test_score_thresh (float): threshold to filter predictions results. + test_nms_thresh (float): NMS threshold for prediction results. + test_topk_per_image (int): number of top predictions to produce per image. + cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression + smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if + `box_reg_loss_type` is "smooth_l1" + box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou", + "diou", "ciou" + loss_weight (float|dict): weights to use for losses. Can be single float for weighting + all losses, or a dict of individual weightings. Valid dict keys are: + * "loss_cls": applied to classification loss + * "loss_box_reg": applied to box regression loss + use_fed_loss (bool): whether to use federated loss which samples additional negative + classes to calculate the loss + use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary + cross entropy with logits. This could be used together with federated loss + get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency + weight power, and returns the probabilities to sample negative classes for + federated loss. The implementation can be found in + detectron2/data/detection_utils.py + fed_loss_num_classes (int): number of federated classes to keep in total + """ + super().__init__() + if isinstance(input_shape, int): # some backward compatibility + input_shape = ShapeSpec(channels=input_shape) + self.num_classes = num_classes + input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) + # prediction layer for num_classes foreground classes and one background class (hence + 1) + self.cls_score = nn.Linear(input_size, num_classes + 1) + num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes + box_dim = len(box2box_transform.weights) + self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim) + + nn.init.normal_(self.cls_score.weight, std=0.01) + nn.init.normal_(self.bbox_pred.weight, std=0.001) + for l in [self.cls_score, self.bbox_pred]: + nn.init.constant_(l.bias, 0) + + self.box2box_transform = box2box_transform + self.smooth_l1_beta = smooth_l1_beta + self.test_score_thresh = test_score_thresh + self.test_nms_thresh = test_nms_thresh + self.test_topk_per_image = test_topk_per_image + self.box_reg_loss_type = box_reg_loss_type + if isinstance(loss_weight, float): + loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} + self.loss_weight = loss_weight + self.use_fed_loss = use_fed_loss + self.use_sigmoid_ce = use_sigmoid_ce + self.fed_loss_num_classes = fed_loss_num_classes + + if self.use_fed_loss: + assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss" + fed_loss_cls_weights = get_fed_loss_cls_weights() + assert ( + len(fed_loss_cls_weights) == self.num_classes + ), "Please check the provided fed_loss_cls_weights. Their size should match num_classes" + self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights) + + @classmethod + def from_config(cls, cfg, input_shape): + return { + "input_shape": input_shape, + "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), + # fmt: off + "num_classes" : cfg.MODEL.ROI_HEADS.NUM_CLASSES, + "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, + "smooth_l1_beta" : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, + "test_score_thresh" : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, + "test_nms_thresh" : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, + "test_topk_per_image" : cfg.TEST.DETECTIONS_PER_IMAGE, + "box_reg_loss_type" : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE, + "loss_weight" : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT}, # noqa + "use_fed_loss" : cfg.MODEL.ROI_BOX_HEAD.USE_FED_LOSS, + "use_sigmoid_ce" : cfg.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE, + "get_fed_loss_cls_weights" : lambda: get_fed_loss_cls_weights(dataset_names=cfg.DATASETS.TRAIN, freq_weight_power=cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER), # noqa + "fed_loss_num_classes" : cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES, + # fmt: on + } + + def forward(self, x): + """ + Args: + x: per-region features of shape (N, ...) for N bounding boxes to predict. + + Returns: + (Tensor, Tensor): + First tensor: shape (N,K+1), scores for each of the N box. Each row contains the + scores for K object categories and 1 background class. + + Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), + or (N,4) for class-agnostic regression. + """ + if x.dim() > 2: + x = torch.flatten(x, start_dim=1) + scores = self.cls_score(x) + proposal_deltas = self.bbox_pred(x) + return scores, proposal_deltas + + def losses(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were used + to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``, + ``gt_classes`` are expected. + + Returns: + Dict[str, Tensor]: dict of losses + """ + scores, proposal_deltas = predictions + + # parse classification outputs + gt_classes = ( + cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0) + ) + _log_classification_stats(scores, gt_classes) + + # parse box regression outputs + if len(proposals): + proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) # Nx4 + assert not proposal_boxes.requires_grad, "Proposals should not require gradients!" + # If "gt_boxes" does not exist, the proposals must be all negative and + # should not be included in regression loss computation. + # Here we just use proposal_boxes as an arbitrary placeholder because its + # value won't be used in self.box_reg_loss(). + gt_boxes = cat( + [(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals], + dim=0, + ) + else: + proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device) + + if self.use_sigmoid_ce: + loss_cls = self.sigmoid_cross_entropy_loss(scores, gt_classes) + else: + loss_cls = cross_entropy(scores, gt_classes, reduction="mean") + + losses = { + "loss_cls": loss_cls, + "loss_box_reg": self.box_reg_loss( + proposal_boxes, gt_boxes, proposal_deltas, gt_classes + ), + } + return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} + + # Implementation from https://github.com/xingyizhou/CenterNet2/blob/master/projects/CenterNet2/centernet/modeling/roi_heads/fed_loss.py # noqa + # with slight modifications + def get_fed_loss_classes(self, gt_classes, num_fed_loss_classes, num_classes, weight): + """ + Args: + gt_classes: a long tensor of shape R that contains the gt class label of each proposal. + num_fed_loss_classes: minimum number of classes to keep when calculating federated loss. + Will sample negative classes if number of unique gt_classes is smaller than this value. + num_classes: number of foreground classes + weight: probabilities used to sample negative classes + + Returns: + Tensor: + classes to keep when calculating the federated loss, including both unique gt + classes and sampled negative classes. + """ + unique_gt_classes = torch.unique(gt_classes) + prob = unique_gt_classes.new_ones(num_classes + 1).float() + prob[-1] = 0 + if len(unique_gt_classes) < num_fed_loss_classes: + prob[:num_classes] = weight.float().clone() + prob[unique_gt_classes] = 0 + sampled_negative_classes = torch.multinomial( + prob, num_fed_loss_classes - len(unique_gt_classes), replacement=False + ) + fed_loss_classes = torch.cat([unique_gt_classes, sampled_negative_classes]) + else: + fed_loss_classes = unique_gt_classes + return fed_loss_classes + + # Implementation from https://github.com/xingyizhou/CenterNet2/blob/master/projects/CenterNet2/centernet/modeling/roi_heads/custom_fast_rcnn.py#L113 # noqa + # with slight modifications + def sigmoid_cross_entropy_loss(self, pred_class_logits, gt_classes): + """ + Args: + pred_class_logits: shape (N, K+1), scores for each of the N box. Each row contains the + scores for K object categories and 1 background class + gt_classes: a long tensor of shape R that contains the gt class label of each proposal. + """ + if pred_class_logits.numel() == 0: + return pred_class_logits.new_zeros([1])[0] + + N = pred_class_logits.shape[0] + K = pred_class_logits.shape[1] - 1 + + target = pred_class_logits.new_zeros(N, K + 1) + target[range(len(gt_classes)), gt_classes] = 1 + target = target[:, :K] + + cls_loss = F.binary_cross_entropy_with_logits( + pred_class_logits[:, :-1], target, reduction="none" + ) + + if self.use_fed_loss: + fed_loss_classes = self.get_fed_loss_classes( + gt_classes, + num_fed_loss_classes=self.fed_loss_num_classes, + num_classes=K, + weight=self.fed_loss_cls_weights, + ) + fed_loss_classes_mask = fed_loss_classes.new_zeros(K + 1) + fed_loss_classes_mask[fed_loss_classes] = 1 + fed_loss_classes_mask = fed_loss_classes_mask[:K] + weight = fed_loss_classes_mask.view(1, K).expand(N, K).float() + else: + weight = 1 + + loss = torch.sum(cls_loss * weight) / N + return loss + + def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes): + """ + Args: + proposal_boxes/gt_boxes are tensors with the same shape (R, 4 or 5). + pred_deltas has shape (R, 4 or 5), or (R, num_classes * (4 or 5)). + gt_classes is a long tensor of shape R, the gt class label of each proposal. + R shall be the number of proposals. + """ + box_dim = proposal_boxes.shape[1] # 4 or 5 + # Regression loss is only computed for foreground proposals (those matched to a GT) + fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0] + if pred_deltas.shape[1] == box_dim: # cls-agnostic regression + fg_pred_deltas = pred_deltas[fg_inds] + else: + fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[ + fg_inds, gt_classes[fg_inds] + ] + + loss_box_reg = _dense_box_regression_loss( + [proposal_boxes[fg_inds]], + self.box2box_transform, + [fg_pred_deltas.unsqueeze(0)], + [gt_boxes[fg_inds]], + ..., + self.box_reg_loss_type, + self.smooth_l1_beta, + ) + + # The reg loss is normalized using the total number of regions (R), not the number + # of foreground regions even though the box regression loss is only defined on + # foreground regions. Why? Because doing so gives equal training influence to + # each foreground example. To see how, consider two different minibatches: + # (1) Contains a single foreground region + # (2) Contains 100 foreground regions + # If we normalize by the number of foreground regions, the single example in + # minibatch (1) will be given 100 times as much influence as each foreground + # example in minibatch (2). Normalizing by the total number of regions, R, + # means that the single example in minibatch (1) and each of the 100 examples + # in minibatch (2) are given equal influence. + return loss_box_reg / max(gt_classes.numel(), 1.0) # return 0 if empty + + def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. The ``proposal_boxes`` field is expected. + + Returns: + list[Instances]: same as `fast_rcnn_inference`. + list[Tensor]: same as `fast_rcnn_inference`. + """ + boxes = self.predict_boxes(predictions, proposals) + scores = self.predict_probs(predictions, proposals) + image_shapes = [x.image_size for x in proposals] + return fast_rcnn_inference( + boxes, + scores, + image_shapes, + self.test_score_thresh, + self.test_nms_thresh, + self.test_topk_per_image, + ) + + def predict_boxes_for_gt_classes(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were used + to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected. + + Returns: + list[Tensor]: + A list of Tensors of predicted boxes for GT classes in case of + class-specific box head. Element i of the list has shape (Ri, B), where Ri is + the number of proposals for image i and B is the box dimension (4 or 5) + """ + if not len(proposals): + return [] + scores, proposal_deltas = predictions + proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) + N, B = proposal_boxes.shape + predict_boxes = self.box2box_transform.apply_deltas( + proposal_deltas, proposal_boxes + ) # Nx(KxB) + + K = predict_boxes.shape[1] // B + if K > 1: + gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) + # Some proposals are ignored or have a background class. Their gt_classes + # cannot be used as index. + gt_classes = gt_classes.clamp_(0, K - 1) + + predict_boxes = predict_boxes.view(N, K, B)[ + torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes + ] + num_prop_per_image = [len(p) for p in proposals] + return predict_boxes.split(num_prop_per_image) + + def predict_boxes( + self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] + ): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. The ``proposal_boxes`` field is expected. + + Returns: + list[Tensor]: + A list of Tensors of predicted class-specific or class-agnostic boxes + for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is + the number of proposals for image i and B is the box dimension (4 or 5) + """ + if not len(proposals): + return [] + _, proposal_deltas = predictions + num_prop_per_image = [len(p) for p in proposals] + proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) + predict_boxes = self.box2box_transform.apply_deltas( + proposal_deltas, + proposal_boxes, + ) # Nx(KxB) + return predict_boxes.split(num_prop_per_image) + + def predict_probs( + self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] + ): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. + + Returns: + list[Tensor]: + A list of Tensors of predicted class probabilities for each image. + Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i. + """ + scores, _ = predictions + num_inst_per_image = [len(p) for p in proposals] + if self.use_sigmoid_ce: + probs = scores.sigmoid() + else: + probs = F.softmax(scores, dim=-1) + return probs.split(num_inst_per_image, dim=0) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/keypoint_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/keypoint_head.py new file mode 100644 index 0000000000000000000000000000000000000000..bcf92dc8ab553beef98f4b8ddde639ed9d4ff0cc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/keypoint_head.py @@ -0,0 +1,272 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import List +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate +from annotator.oneformer.detectron2.structures import Instances, heatmaps_to_keypoints +from annotator.oneformer.detectron2.utils.events import get_event_storage +from annotator.oneformer.detectron2.utils.registry import Registry + +_TOTAL_SKIPPED = 0 + + +__all__ = [ + "ROI_KEYPOINT_HEAD_REGISTRY", + "build_keypoint_head", + "BaseKeypointRCNNHead", + "KRCNNConvDeconvUpsampleHead", +] + + +ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD") +ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """ +Registry for keypoint heads, which make keypoint predictions from per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def build_keypoint_head(cfg, input_shape): + """ + Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME + return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape) + + +def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer): + """ + Arguments: + pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number + of instances in the batch, K is the number of keypoints, and S is the side length + of the keypoint heatmap. The values are spatial logits. + instances (list[Instances]): A list of M Instances, where M is the batch size. + These instances are predictions from the model + that are in 1:1 correspondence with pred_keypoint_logits. + Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint` + instance. + normalizer (float): Normalize the loss by this amount. + If not specified, we normalize by the number of visible keypoints in the minibatch. + + Returns a scalar tensor containing the loss. + """ + heatmaps = [] + valid = [] + + keypoint_side_len = pred_keypoint_logits.shape[2] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + keypoints = instances_per_image.gt_keypoints + heatmaps_per_image, valid_per_image = keypoints.to_heatmap( + instances_per_image.proposal_boxes.tensor, keypoint_side_len + ) + heatmaps.append(heatmaps_per_image.view(-1)) + valid.append(valid_per_image.view(-1)) + + if len(heatmaps): + keypoint_targets = cat(heatmaps, dim=0) + valid = cat(valid, dim=0).to(dtype=torch.uint8) + valid = torch.nonzero(valid).squeeze(1) + + # torch.mean (in binary_cross_entropy_with_logits) doesn't + # accept empty tensors, so handle it separately + if len(heatmaps) == 0 or valid.numel() == 0: + global _TOTAL_SKIPPED + _TOTAL_SKIPPED += 1 + storage = get_event_storage() + storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False) + return pred_keypoint_logits.sum() * 0 + + N, K, H, W = pred_keypoint_logits.shape + pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W) + + keypoint_loss = F.cross_entropy( + pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum" + ) + + # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch + if normalizer is None: + normalizer = valid.numel() + keypoint_loss /= normalizer + + return keypoint_loss + + +def keypoint_rcnn_inference(pred_keypoint_logits: torch.Tensor, pred_instances: List[Instances]): + """ + Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score) + and add it to the `pred_instances` as a `pred_keypoints` field. + + Args: + pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number + of instances in the batch, K is the number of keypoints, and S is the side length of + the keypoint heatmap. The values are spatial logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images. + + Returns: + None. Each element in pred_instances will contain extra "pred_keypoints" and + "pred_keypoint_heatmaps" fields. "pred_keypoints" is a tensor of shape + (#instance, K, 3) where the last dimension corresponds to (x, y, score). + The scores are larger than 0. "pred_keypoint_heatmaps" contains the raw + keypoint logits as passed to this function. + """ + # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor) + bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0) + + pred_keypoint_logits = pred_keypoint_logits.detach() + keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits, bboxes_flat.detach()) + num_instances_per_image = [len(i) for i in pred_instances] + keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0) + heatmap_results = pred_keypoint_logits.split(num_instances_per_image, dim=0) + + for keypoint_results_per_image, heatmap_results_per_image, instances_per_image in zip( + keypoint_results, heatmap_results, pred_instances + ): + # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score) + # heatmap_results_per_image is (num instances)x(num keypoints)x(side)x(side) + instances_per_image.pred_keypoints = keypoint_results_per_image + instances_per_image.pred_keypoint_heatmaps = heatmap_results_per_image + + +class BaseKeypointRCNNHead(nn.Module): + """ + Implement the basic Keypoint R-CNN losses and inference logic described in + Sec. 5 of :paper:`Mask R-CNN`. + """ + + @configurable + def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0): + """ + NOTE: this interface is experimental. + + Args: + num_keypoints (int): number of keypoints to predict + loss_weight (float): weight to multiple on the keypoint loss + loss_normalizer (float or str): + If float, divide the loss by `loss_normalizer * #images`. + If 'visible', the loss is normalized by the total number of + visible keypoints across images. + """ + super().__init__() + self.num_keypoints = num_keypoints + self.loss_weight = loss_weight + assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer + self.loss_normalizer = loss_normalizer + + @classmethod + def from_config(cls, cfg, input_shape): + ret = { + "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT, + "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS, + } + normalize_by_visible = ( + cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS + ) # noqa + if not normalize_by_visible: + batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE + positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION + ret["loss_normalizer"] = ( + ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction + ) + else: + ret["loss_normalizer"] = "visible" + return ret + + def forward(self, x, instances: List[Instances]): + """ + Args: + x: input 4D region feature(s) provided by :class:`ROIHeads`. + instances (list[Instances]): contains the boxes & labels corresponding + to the input features. + Exact format is up to its caller to decide. + Typically, this is the foreground instances in training, with + "proposal_boxes" field and other gt annotations. + In inference, it contains boxes that are already predicted. + + Returns: + A dict of losses if in training. The predicted "instances" if in inference. + """ + x = self.layers(x) + if self.training: + num_images = len(instances) + normalizer = ( + None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer + ) + return { + "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer) + * self.loss_weight + } + else: + keypoint_rcnn_inference(x, instances) + return instances + + def layers(self, x): + """ + Neural network layers that makes predictions from regional input features. + """ + raise NotImplementedError + + +# To get torchscript support, we make the head a subclass of `nn.Sequential`. +# Therefore, to add new layers in this head class, please make sure they are +# added in the order they will be used in forward(). +@ROI_KEYPOINT_HEAD_REGISTRY.register() +class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead, nn.Sequential): + """ + A standard keypoint head containing a series of 3x3 convs, followed by + a transpose convolution and bilinear interpolation for upsampling. + It is described in Sec. 5 of :paper:`Mask R-CNN`. + """ + + @configurable + def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature + conv_dims: an iterable of output channel counts for each conv in the head + e.g. (512, 512, 512) for three convs outputting 512 channels. + """ + super().__init__(num_keypoints=num_keypoints, **kwargs) + + # default up_scale to 2.0 (this can be made an option) + up_scale = 2.0 + in_channels = input_shape.channels + + for idx, layer_channels in enumerate(conv_dims, 1): + module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) + self.add_module("conv_fcn{}".format(idx), module) + self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU()) + in_channels = layer_channels + + deconv_kernel = 4 + self.score_lowres = ConvTranspose2d( + in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1 + ) + self.up_scale = up_scale + + for name, param in self.named_parameters(): + if "bias" in name: + nn.init.constant_(param, 0) + elif "weight" in name: + # Caffe2 implementation uses MSRAFill, which in fact + # corresponds to kaiming_normal_ in PyTorch + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + ret["input_shape"] = input_shape + ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS + return ret + + def layers(self, x): + for layer in self: + x = layer(x) + x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/mask_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/mask_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1b5465e413195aa21733157af4e1ae3a2b897e7c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/mask_head.py @@ -0,0 +1,298 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import List +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm +from annotator.oneformer.detectron2.layers.wrappers import move_device_like +from annotator.oneformer.detectron2.structures import Instances +from annotator.oneformer.detectron2.utils.events import get_event_storage +from annotator.oneformer.detectron2.utils.registry import Registry + +__all__ = [ + "BaseMaskRCNNHead", + "MaskRCNNConvUpsampleHead", + "build_mask_head", + "ROI_MASK_HEAD_REGISTRY", +] + + +ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") +ROI_MASK_HEAD_REGISTRY.__doc__ = """ +Registry for mask heads, which predicts instance masks given +per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +@torch.jit.unused +def mask_rcnn_loss(pred_mask_logits: torch.Tensor, instances: List[Instances], vis_period: int = 0): + """ + Compute the mask prediction loss defined in the Mask R-CNN paper. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. These instances are in 1:1 + correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, + ...) associated with each instance are stored in fields. + vis_period (int): the period (in steps) to dump visualization. + + Returns: + mask_loss (Tensor): A scalar tensor containing the loss. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + total_num_masks = pred_mask_logits.size(0) + mask_side_len = pred_mask_logits.size(2) + assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!" + + gt_classes = [] + gt_masks = [] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + if not cls_agnostic_mask: + gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) + gt_classes.append(gt_classes_per_image) + + gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize( + instances_per_image.proposal_boxes.tensor, mask_side_len + ).to(device=pred_mask_logits.device) + # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len + gt_masks.append(gt_masks_per_image) + + if len(gt_masks) == 0: + return pred_mask_logits.sum() * 0 + + gt_masks = cat(gt_masks, dim=0) + + if cls_agnostic_mask: + pred_mask_logits = pred_mask_logits[:, 0] + else: + indices = torch.arange(total_num_masks) + gt_classes = cat(gt_classes, dim=0) + pred_mask_logits = pred_mask_logits[indices, gt_classes] + + if gt_masks.dtype == torch.bool: + gt_masks_bool = gt_masks + else: + # Here we allow gt_masks to be float as well (depend on the implementation of rasterize()) + gt_masks_bool = gt_masks > 0.5 + gt_masks = gt_masks.to(dtype=torch.float32) + + # Log the training accuracy (using gt classes and 0.5 threshold) + mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool + mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0)) + num_positive = gt_masks_bool.sum().item() + false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max( + gt_masks_bool.numel() - num_positive, 1.0 + ) + false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0) + + storage = get_event_storage() + storage.put_scalar("mask_rcnn/accuracy", mask_accuracy) + storage.put_scalar("mask_rcnn/false_positive", false_positive) + storage.put_scalar("mask_rcnn/false_negative", false_negative) + if vis_period > 0 and storage.iter % vis_period == 0: + pred_masks = pred_mask_logits.sigmoid() + vis_masks = torch.cat([pred_masks, gt_masks], axis=2) + name = "Left: mask prediction; Right: mask GT" + for idx, vis_mask in enumerate(vis_masks): + vis_mask = torch.stack([vis_mask] * 3, axis=0) + storage.put_image(name + f" ({idx})", vis_mask) + + mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean") + return mask_loss + + +def mask_rcnn_inference(pred_mask_logits: torch.Tensor, pred_instances: List[Instances]): + """ + Convert pred_mask_logits to estimated foreground probability masks while also + extracting only the masks for the predicted classes in pred_instances. For each + predicted box, the mask of the same class is attached to the instance by adding a + new "pred_masks" field to pred_instances. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. Each Instances must have field "pred_classes". + + Returns: + None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask, + Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized) + masks the resolution predicted by the network; post-processing steps, such as resizing + the predicted masks to the original image resolution and/or binarizing them, is left + to the caller. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + + if cls_agnostic_mask: + mask_probs_pred = pred_mask_logits.sigmoid() + else: + # Select masks corresponding to the predicted classes + num_masks = pred_mask_logits.shape[0] + class_pred = cat([i.pred_classes for i in pred_instances]) + device = ( + class_pred.device + if torch.jit.is_scripting() + else ("cpu" if torch.jit.is_tracing() else class_pred.device) + ) + indices = move_device_like(torch.arange(num_masks, device=device), class_pred) + mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid() + # mask_probs_pred.shape: (B, 1, Hmask, Wmask) + + num_boxes_per_image = [len(i) for i in pred_instances] + mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0) + + for prob, instances in zip(mask_probs_pred, pred_instances): + instances.pred_masks = prob # (1, Hmask, Wmask) + + +class BaseMaskRCNNHead(nn.Module): + """ + Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN` + """ + + @configurable + def __init__(self, *, loss_weight: float = 1.0, vis_period: int = 0): + """ + NOTE: this interface is experimental. + + Args: + loss_weight (float): multiplier of the loss + vis_period (int): visualization period + """ + super().__init__() + self.vis_period = vis_period + self.loss_weight = loss_weight + + @classmethod + def from_config(cls, cfg, input_shape): + return {"vis_period": cfg.VIS_PERIOD} + + def forward(self, x, instances: List[Instances]): + """ + Args: + x: input region feature(s) provided by :class:`ROIHeads`. + instances (list[Instances]): contains the boxes & labels corresponding + to the input features. + Exact format is up to its caller to decide. + Typically, this is the foreground instances in training, with + "proposal_boxes" field and other gt annotations. + In inference, it contains boxes that are already predicted. + + Returns: + A dict of losses in training. The predicted "instances" in inference. + """ + x = self.layers(x) + if self.training: + return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period) * self.loss_weight} + else: + mask_rcnn_inference(x, instances) + return instances + + def layers(self, x): + """ + Neural network layers that makes predictions from input features. + """ + raise NotImplementedError + + +# To get torchscript support, we make the head a subclass of `nn.Sequential`. +# Therefore, to add new layers in this head class, please make sure they are +# added in the order they will be used in forward(). +@ROI_MASK_HEAD_REGISTRY.register() +class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead, nn.Sequential): + """ + A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). + Predictions are made with a final 1x1 conv layer. + """ + + @configurable + def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature + num_classes (int): the number of foreground classes (i.e. background is not + included). 1 if using class agnostic prediction. + conv_dims (list[int]): a list of N>0 integers representing the output dimensions + of N-1 conv layers and the last upsample layer. + conv_norm (str or callable): normalization for the conv layers. + See :func:`detectron2.layers.get_norm` for supported types. + """ + super().__init__(**kwargs) + assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" + + self.conv_norm_relus = [] + + cur_channels = input_shape.channels + for k, conv_dim in enumerate(conv_dims[:-1]): + conv = Conv2d( + cur_channels, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=not conv_norm, + norm=get_norm(conv_norm, conv_dim), + activation=nn.ReLU(), + ) + self.add_module("mask_fcn{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + cur_channels = conv_dim + + self.deconv = ConvTranspose2d( + cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0 + ) + self.add_module("deconv_relu", nn.ReLU()) + cur_channels = conv_dims[-1] + + self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0) + + for layer in self.conv_norm_relus + [self.deconv]: + weight_init.c2_msra_fill(layer) + # use normal distribution initialization for mask prediction layer + nn.init.normal_(self.predictor.weight, std=0.001) + if self.predictor.bias is not None: + nn.init.constant_(self.predictor.bias, 0) + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM + num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV + ret.update( + conv_dims=[conv_dim] * (num_conv + 1), # +1 for ConvTranspose + conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM, + input_shape=input_shape, + ) + if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK: + ret["num_classes"] = 1 + else: + ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES + return ret + + def layers(self, x): + for layer in self: + x = layer(x) + return x + + +def build_mask_head(cfg, input_shape): + """ + Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_MASK_HEAD.NAME + return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/roi_heads.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/roi_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..d554a3878e7d9fa49971128ad260c3e831b70c65 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/roi_heads.py @@ -0,0 +1,877 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import inspect +import logging +import numpy as np +from typing import Dict, List, Optional, Tuple +import torch +from torch import nn + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ShapeSpec, nonzero_tuple +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, pairwise_iou +from annotator.oneformer.detectron2.utils.events import get_event_storage +from annotator.oneformer.detectron2.utils.registry import Registry + +from ..backbone.resnet import BottleneckBlock, ResNet +from ..matcher import Matcher +from ..poolers import ROIPooler +from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals +from ..sampling import subsample_labels +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers +from .keypoint_head import build_keypoint_head +from .mask_head import build_mask_head + +ROI_HEADS_REGISTRY = Registry("ROI_HEADS") +ROI_HEADS_REGISTRY.__doc__ = """ +Registry for ROI heads in a generalized R-CNN model. +ROIHeads take feature maps and region proposals, and +perform per-region computation. + +The registered object will be called with `obj(cfg, input_shape)`. +The call is expected to return an :class:`ROIHeads`. +""" + +logger = logging.getLogger(__name__) + + +def build_roi_heads(cfg, input_shape): + """ + Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`. + """ + name = cfg.MODEL.ROI_HEADS.NAME + return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape) + + +def select_foreground_proposals( + proposals: List[Instances], bg_label: int +) -> Tuple[List[Instances], List[torch.Tensor]]: + """ + Given a list of N Instances (for N images), each containing a `gt_classes` field, + return a list of Instances that contain only instances with `gt_classes != -1 && + gt_classes != bg_label`. + + Args: + proposals (list[Instances]): A list of N Instances, where N is the number of + images in the batch. + bg_label: label index of background class. + + Returns: + list[Instances]: N Instances, each contains only the selected foreground instances. + list[Tensor]: N boolean vector, correspond to the selection mask of + each Instances object. True for selected instances. + """ + assert isinstance(proposals, (list, tuple)) + assert isinstance(proposals[0], Instances) + assert proposals[0].has("gt_classes") + fg_proposals = [] + fg_selection_masks = [] + for proposals_per_image in proposals: + gt_classes = proposals_per_image.gt_classes + fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) + fg_idxs = fg_selection_mask.nonzero().squeeze(1) + fg_proposals.append(proposals_per_image[fg_idxs]) + fg_selection_masks.append(fg_selection_mask) + return fg_proposals, fg_selection_masks + + +def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]: + """ + Args: + proposals (list[Instances]): a list of N Instances, where N is the + number of images. + + Returns: + proposals: only contains proposals with at least one visible keypoint. + + Note that this is still slightly different from Detectron. + In Detectron, proposals for training keypoint head are re-sampled from + all the proposals with IOU>threshold & >=1 visible keypoint. + + Here, the proposals are first sampled from all proposals with + IOU>threshold, then proposals with no visible keypoint are filtered out. + This strategy seems to make no difference on Detectron and is easier to implement. + """ + ret = [] + all_num_fg = [] + for proposals_per_image in proposals: + # If empty/unannotated image (hard negatives), skip filtering for train + if len(proposals_per_image) == 0: + ret.append(proposals_per_image) + continue + gt_keypoints = proposals_per_image.gt_keypoints.tensor + # #fg x K x 3 + vis_mask = gt_keypoints[:, :, 2] >= 1 + xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] + proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1) # #fg x 1 x 4 + kp_in_box = ( + (xs >= proposal_boxes[:, :, 0]) + & (xs <= proposal_boxes[:, :, 2]) + & (ys >= proposal_boxes[:, :, 1]) + & (ys <= proposal_boxes[:, :, 3]) + ) + selection = (kp_in_box & vis_mask).any(dim=1) + selection_idxs = nonzero_tuple(selection)[0] + all_num_fg.append(selection_idxs.numel()) + ret.append(proposals_per_image[selection_idxs]) + + storage = get_event_storage() + storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) + return ret + + +class ROIHeads(torch.nn.Module): + """ + ROIHeads perform all per-region computation in an R-CNN. + + It typically contains logic to + + 1. (in training only) match proposals with ground truth and sample them + 2. crop the regions and extract per-region features using proposals + 3. make per-region predictions with different heads + + It can have many variants, implemented as subclasses of this class. + This base class contains the logic to match/sample proposals. + But it is not necessary to inherit this class if the sampling logic is not needed. + """ + + @configurable + def __init__( + self, + *, + num_classes, + batch_size_per_image, + positive_fraction, + proposal_matcher, + proposal_append_gt=True, + ): + """ + NOTE: this interface is experimental. + + Args: + num_classes (int): number of foreground classes (i.e. background is not included) + batch_size_per_image (int): number of proposals to sample for training + positive_fraction (float): fraction of positive (foreground) proposals + to sample for training. + proposal_matcher (Matcher): matcher that matches proposals and ground truth + proposal_append_gt (bool): whether to include ground truth as proposals as well + """ + super().__init__() + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + self.num_classes = num_classes + self.proposal_matcher = proposal_matcher + self.proposal_append_gt = proposal_append_gt + + @classmethod + def from_config(cls, cfg): + return { + "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, + "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION, + "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, + "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT, + # Matcher to assign box proposals to gt boxes + "proposal_matcher": Matcher( + cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, + cfg.MODEL.ROI_HEADS.IOU_LABELS, + allow_low_quality_matches=False, + ), + } + + def _sample_proposals( + self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Based on the matching between N proposals and M groundtruth, + sample the proposals and set their classification labels. + + Args: + matched_idxs (Tensor): a vector of length N, each is the best-matched + gt index in [0, M) for each proposal. + matched_labels (Tensor): a vector of length N, the matcher's label + (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. + gt_classes (Tensor): a vector of length M. + + Returns: + Tensor: a vector of indices of sampled proposals. Each is in [0, N). + Tensor: a vector of the same length, the classification label for + each sampled proposal. Each sample is labeled as either a category in + [0, num_classes) or the background (num_classes). + """ + has_gt = gt_classes.numel() > 0 + # Get the corresponding GT for each proposal + if has_gt: + gt_classes = gt_classes[matched_idxs] + # Label unmatched proposals (0 label from matcher) as background (label=num_classes) + gt_classes[matched_labels == 0] = self.num_classes + # Label ignore proposals (-1 label) + gt_classes[matched_labels == -1] = -1 + else: + gt_classes = torch.zeros_like(matched_idxs) + self.num_classes + + sampled_fg_idxs, sampled_bg_idxs = subsample_labels( + gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes + ) + + sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) + return sampled_idxs, gt_classes[sampled_idxs] + + @torch.no_grad() + def label_and_sample_proposals( + self, proposals: List[Instances], targets: List[Instances] + ) -> List[Instances]: + """ + Prepare some proposals to be used to train the ROI heads. + It performs box matching between `proposals` and `targets`, and assigns + training labels to the proposals. + It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth + boxes, with a fraction of positives that is no larger than + ``self.positive_fraction``. + + Args: + See :meth:`ROIHeads.forward` + + Returns: + list[Instances]: + length `N` list of `Instances`s containing the proposals + sampled for training. Each `Instances` has the following fields: + + - proposal_boxes: the proposal boxes + - gt_boxes: the ground-truth box that the proposal is assigned to + (this is only meaningful if the proposal has a label > 0; if label = 0 + then the ground-truth box is random) + + Other fields such as "gt_classes", "gt_masks", that's included in `targets`. + """ + # Augment proposals with ground-truth boxes. + # In the case of learned proposals (e.g., RPN), when training starts + # the proposals will be low quality due to random initialization. + # It's possible that none of these initial + # proposals have high enough overlap with the gt objects to be used + # as positive examples for the second stage components (box head, + # cls head, mask head). Adding the gt boxes to the set of proposals + # ensures that the second stage components will have some positive + # examples from the start of training. For RPN, this augmentation improves + # convergence and empirically improves box AP on COCO by about 0.5 + # points (under one tested configuration). + if self.proposal_append_gt: + proposals = add_ground_truth_to_proposals(targets, proposals) + + proposals_with_gt = [] + + num_fg_samples = [] + num_bg_samples = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + has_gt = len(targets_per_image) > 0 + match_quality_matrix = pairwise_iou( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) + sampled_idxs, gt_classes = self._sample_proposals( + matched_idxs, matched_labels, targets_per_image.gt_classes + ) + + # Set target attributes of the sampled proposals: + proposals_per_image = proposals_per_image[sampled_idxs] + proposals_per_image.gt_classes = gt_classes + + if has_gt: + sampled_targets = matched_idxs[sampled_idxs] + # We index all the attributes of targets that start with "gt_" + # and have not been added to proposals yet (="gt_classes"). + # NOTE: here the indexing waste some compute, because heads + # like masks, keypoints, etc, will filter the proposals again, + # (by foreground/background, or number of keypoints in the image, etc) + # so we essentially index the data twice. + for (trg_name, trg_value) in targets_per_image.get_fields().items(): + if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name): + proposals_per_image.set(trg_name, trg_value[sampled_targets]) + # If no GT is given in the image, we don't know what a dummy gt value can be. + # Therefore the returned proposals won't have any gt_* fields, except for a + # gt_classes full of background label. + + num_bg_samples.append((gt_classes == self.num_classes).sum().item()) + num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) + proposals_with_gt.append(proposals_per_image) + + # Log the number of fg/bg samples that are selected for training ROI heads + storage = get_event_storage() + storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) + storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) + + return proposals_with_gt + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: + """ + Args: + images (ImageList): + features (dict[str,Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + proposals (list[Instances]): length `N` list of `Instances`. The i-th + `Instances` contains object proposals for the i-th input image, + with fields "proposal_boxes" and "objectness_logits". + targets (list[Instances], optional): length `N` list of `Instances`. The i-th + `Instances` contains the ground-truth per-instance annotations + for the i-th input image. Specify `targets` during training only. + It may have the following fields: + + - gt_boxes: the bounding box of each instance. + - gt_classes: the label for each instance with a category ranging in [0, #class]. + - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance. + - gt_keypoints: NxKx3, the groud-truth keypoints for each instance. + + Returns: + list[Instances]: length `N` list of `Instances` containing the + detected instances. Returned during inference only; may be [] during training. + + dict[str->Tensor]: + mapping from a named loss to a tensor storing the loss. Used during training only. + """ + raise NotImplementedError() + + +@ROI_HEADS_REGISTRY.register() +class Res5ROIHeads(ROIHeads): + """ + The ROIHeads in a typical "C4" R-CNN model, where + the box and mask head share the cropping and + the per-region feature computation by a Res5 block. + See :paper:`ResNet` Appendix A. + """ + + @configurable + def __init__( + self, + *, + in_features: List[str], + pooler: ROIPooler, + res5: nn.Module, + box_predictor: nn.Module, + mask_head: Optional[nn.Module] = None, + **kwargs, + ): + """ + NOTE: this interface is experimental. + + Args: + in_features (list[str]): list of backbone feature map names to use for + feature extraction + pooler (ROIPooler): pooler to extra region features from backbone + res5 (nn.Sequential): a CNN to compute per-region features, to be used by + ``box_predictor`` and ``mask_head``. Typically this is a "res5" + block from a ResNet. + box_predictor (nn.Module): make box predictions from the feature. + Should have the same interface as :class:`FastRCNNOutputLayers`. + mask_head (nn.Module): transform features to make mask predictions + """ + super().__init__(**kwargs) + self.in_features = in_features + self.pooler = pooler + if isinstance(res5, (list, tuple)): + res5 = nn.Sequential(*res5) + self.res5 = res5 + self.box_predictor = box_predictor + self.mask_on = mask_head is not None + if self.mask_on: + self.mask_head = mask_head + + @classmethod + def from_config(cls, cfg, input_shape): + # fmt: off + ret = super().from_config(cfg) + in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + pooler_scales = (1.0 / input_shape[in_features[0]].stride, ) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + mask_on = cfg.MODEL.MASK_ON + # fmt: on + assert not cfg.MODEL.KEYPOINT_ON + assert len(in_features) == 1 + + ret["pooler"] = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + + # Compatbility with old moco code. Might be useful. + # See notes in StandardROIHeads.from_config + if not inspect.ismethod(cls._build_res5_block): + logger.warning( + "The behavior of _build_res5_block may change. " + "Please do not depend on private methods." + ) + cls._build_res5_block = classmethod(cls._build_res5_block) + + ret["res5"], out_channels = cls._build_res5_block(cfg) + ret["box_predictor"] = FastRCNNOutputLayers( + cfg, ShapeSpec(channels=out_channels, height=1, width=1) + ) + + if mask_on: + ret["mask_head"] = build_mask_head( + cfg, + ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), + ) + return ret + + @classmethod + def _build_res5_block(cls, cfg): + # fmt: off + stage_channel_factor = 2 ** 3 # res5 is 8x res2 + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + bottleneck_channels = num_groups * width_per_group * stage_channel_factor + out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor + stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 + norm = cfg.MODEL.RESNETS.NORM + assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ + "Deformable conv is not yet supported in res5 head." + # fmt: on + + blocks = ResNet.make_stage( + BottleneckBlock, + 3, + stride_per_block=[2, 1, 1], + in_channels=out_channels // 2, + bottleneck_channels=bottleneck_channels, + out_channels=out_channels, + num_groups=num_groups, + norm=norm, + stride_in_1x1=stride_in_1x1, + ) + return nn.Sequential(*blocks), out_channels + + def _shared_roi_transform(self, features: List[torch.Tensor], boxes: List[Boxes]): + x = self.pooler(features, boxes) + return self.res5(x) + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ): + """ + See :meth:`ROIHeads.forward`. + """ + del images + + if self.training: + assert targets + proposals = self.label_and_sample_proposals(proposals, targets) + del targets + + proposal_boxes = [x.proposal_boxes for x in proposals] + box_features = self._shared_roi_transform( + [features[f] for f in self.in_features], proposal_boxes + ) + predictions = self.box_predictor(box_features.mean(dim=[2, 3])) + + if self.training: + del features + losses = self.box_predictor.losses(predictions, proposals) + if self.mask_on: + proposals, fg_selection_masks = select_foreground_proposals( + proposals, self.num_classes + ) + # Since the ROI feature transform is shared between boxes and masks, + # we don't need to recompute features. The mask loss is only defined + # on foreground proposals, so we need to select out the foreground + # features. + mask_features = box_features[torch.cat(fg_selection_masks, dim=0)] + del box_features + losses.update(self.mask_head(mask_features, proposals)) + return [], losses + else: + pred_instances, _ = self.box_predictor.inference(predictions, proposals) + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def forward_with_given_boxes( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> List[Instances]: + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + instances (Instances): + the same `Instances` object, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + assert not self.training + assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") + + if self.mask_on: + feature_list = [features[f] for f in self.in_features] + x = self._shared_roi_transform(feature_list, [x.pred_boxes for x in instances]) + return self.mask_head(x, instances) + else: + return instances + + +@ROI_HEADS_REGISTRY.register() +class StandardROIHeads(ROIHeads): + """ + It's "standard" in a sense that there is no ROI transform sharing + or feature sharing between tasks. + Each head independently processes the input features by each head's + own pooler and head. + + This class is used by most models, such as FPN and C5. + To implement more models, you can subclass it and implement a different + :meth:`forward()` or a head. + """ + + @configurable + def __init__( + self, + *, + box_in_features: List[str], + box_pooler: ROIPooler, + box_head: nn.Module, + box_predictor: nn.Module, + mask_in_features: Optional[List[str]] = None, + mask_pooler: Optional[ROIPooler] = None, + mask_head: Optional[nn.Module] = None, + keypoint_in_features: Optional[List[str]] = None, + keypoint_pooler: Optional[ROIPooler] = None, + keypoint_head: Optional[nn.Module] = None, + train_on_pred_boxes: bool = False, + **kwargs, + ): + """ + NOTE: this interface is experimental. + + Args: + box_in_features (list[str]): list of feature names to use for the box head. + box_pooler (ROIPooler): pooler to extra region features for box head + box_head (nn.Module): transform features to make box predictions + box_predictor (nn.Module): make box predictions from the feature. + Should have the same interface as :class:`FastRCNNOutputLayers`. + mask_in_features (list[str]): list of feature names to use for the mask + pooler or mask head. None if not using mask head. + mask_pooler (ROIPooler): pooler to extract region features from image features. + The mask head will then take region features to make predictions. + If None, the mask head will directly take the dict of image features + defined by `mask_in_features` + mask_head (nn.Module): transform features to make mask predictions + keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask_*``. + train_on_pred_boxes (bool): whether to use proposal boxes or + predicted boxes from the box head to train other heads. + """ + super().__init__(**kwargs) + # keep self.in_features for backward compatibility + self.in_features = self.box_in_features = box_in_features + self.box_pooler = box_pooler + self.box_head = box_head + self.box_predictor = box_predictor + + self.mask_on = mask_in_features is not None + if self.mask_on: + self.mask_in_features = mask_in_features + self.mask_pooler = mask_pooler + self.mask_head = mask_head + + self.keypoint_on = keypoint_in_features is not None + if self.keypoint_on: + self.keypoint_in_features = keypoint_in_features + self.keypoint_pooler = keypoint_pooler + self.keypoint_head = keypoint_head + + self.train_on_pred_boxes = train_on_pred_boxes + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg) + ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES + # Subclasses that have not been updated to use from_config style construction + # may have overridden _init_*_head methods. In this case, those overridden methods + # will not be classmethods and we need to avoid trying to call them here. + # We test for this with ismethod which only returns True for bound methods of cls. + # Such subclasses will need to handle calling their overridden _init_*_head methods. + if inspect.ismethod(cls._init_box_head): + ret.update(cls._init_box_head(cfg, input_shape)) + if inspect.ismethod(cls._init_mask_head): + ret.update(cls._init_mask_head(cfg, input_shape)) + if inspect.ismethod(cls._init_keypoint_head): + ret.update(cls._init_keypoint_head(cfg, input_shape)) + return ret + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + # fmt: on + + # If StandardROIHeads is applied on multiple feature maps (as in FPN), + # then we share the same predictors and therefore the channel counts must be the same + in_channels = [input_shape[f].channels for f in in_features] + # Check all channel counts are equal + assert len(set(in_channels)) == 1, in_channels + in_channels = in_channels[0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + # Here we split "box head" and "box predictor", which is mainly due to historical reasons. + # They are used together so the "box predictor" layers should be part of the "box head". + # New subclasses of ROIHeads do not need "box predictor"s. + box_head = build_box_head( + cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) + ) + box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_head": box_head, + "box_predictor": box_predictor, + } + + @classmethod + def _init_mask_head(cls, cfg, input_shape): + if not cfg.MODEL.MASK_ON: + return {} + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features][0] + + ret = {"mask_in_features": in_features} + ret["mask_pooler"] = ( + ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + if pooler_type + else None + ) + if pooler_type: + shape = ShapeSpec( + channels=in_channels, width=pooler_resolution, height=pooler_resolution + ) + else: + shape = {f: input_shape[f] for f in in_features} + ret["mask_head"] = build_mask_head(cfg, shape) + return ret + + @classmethod + def _init_keypoint_head(cls, cfg, input_shape): + if not cfg.MODEL.KEYPOINT_ON: + return {} + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa + sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features][0] + + ret = {"keypoint_in_features": in_features} + ret["keypoint_pooler"] = ( + ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + if pooler_type + else None + ) + if pooler_type: + shape = ShapeSpec( + channels=in_channels, width=pooler_resolution, height=pooler_resolution + ) + else: + shape = {f: input_shape[f] for f in in_features} + ret["keypoint_head"] = build_keypoint_head(cfg, shape) + return ret + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: + """ + See :class:`ROIHeads.forward`. + """ + del images + if self.training: + assert targets, "'targets' argument is required during training" + proposals = self.label_and_sample_proposals(proposals, targets) + del targets + + if self.training: + losses = self._forward_box(features, proposals) + # Usually the original proposals used by the box head are used by the mask, keypoint + # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes + # predicted by the box head. + losses.update(self._forward_mask(features, proposals)) + losses.update(self._forward_keypoint(features, proposals)) + return proposals, losses + else: + pred_instances = self._forward_box(features, proposals) + # During inference cascaded prediction is used: the mask and keypoints heads are only + # applied to the top scoring box detections. + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def forward_with_given_boxes( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> List[Instances]: + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + This is useful for downstream tasks where a box is known, but need to obtain + other attributes (outputs of other heads). + Test-time augmentation also uses this. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + list[Instances]: + the same `Instances` objects, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + assert not self.training + assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") + + instances = self._forward_mask(features, instances) + instances = self._forward_keypoint(features, instances) + return instances + + def _forward_box(self, features: Dict[str, torch.Tensor], proposals: List[Instances]): + """ + Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, + the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + proposals (list[Instances]): the per-image object proposals with + their matching ground truth. + Each has fields "proposal_boxes", and "objectness_logits", + "gt_classes", "gt_boxes". + + Returns: + In training, a dict of losses. + In inference, a list of `Instances`, the predicted instances. + """ + features = [features[f] for f in self.box_in_features] + box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) + box_features = self.box_head(box_features) + predictions = self.box_predictor(box_features) + del box_features + + if self.training: + losses = self.box_predictor.losses(predictions, proposals) + # proposals is modified in-place below, so losses must be computed first. + if self.train_on_pred_boxes: + with torch.no_grad(): + pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( + predictions, proposals + ) + for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): + proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) + return losses + else: + pred_instances, _ = self.box_predictor.inference(predictions, proposals) + return pred_instances + + def _forward_mask(self, features: Dict[str, torch.Tensor], instances: List[Instances]): + """ + Forward logic of the mask prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict masks. + In training, they can be the proposals. + In inference, they can be the boxes predicted by R-CNN box head. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_masks" and return it. + """ + if not self.mask_on: + return {} if self.training else instances + + if self.training: + # head is only trained on positive proposals. + instances, _ = select_foreground_proposals(instances, self.num_classes) + + if self.mask_pooler is not None: + features = [features[f] for f in self.mask_in_features] + boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances] + features = self.mask_pooler(features, boxes) + else: + features = {f: features[f] for f in self.mask_in_features} + return self.mask_head(features, instances) + + def _forward_keypoint(self, features: Dict[str, torch.Tensor], instances: List[Instances]): + """ + Forward logic of the keypoint prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict keypoints. + In training, they can be the proposals. + In inference, they can be the boxes predicted by R-CNN box head. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_keypoints" and return it. + """ + if not self.keypoint_on: + return {} if self.training else instances + + if self.training: + # head is only trained on positive proposals with >=1 visible keypoints. + instances, _ = select_foreground_proposals(instances, self.num_classes) + instances = select_proposals_with_visible_keypoints(instances) + + if self.keypoint_pooler is not None: + features = [features[f] for f in self.keypoint_in_features] + boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances] + features = self.keypoint_pooler(features, boxes) + else: + features = {f: features[f] for f in self.keypoint_in_features} + return self.keypoint_head(features, instances) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/rotated_fast_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..0d4cb8d50a8eeecb13bb6d9c9b8f021bed605cbc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/roi_heads/rotated_fast_rcnn.py @@ -0,0 +1,271 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +import torch + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ShapeSpec, batched_nms_rotated +from annotator.oneformer.detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated +from annotator.oneformer.detectron2.utils.events import get_event_storage + +from ..box_regression import Box2BoxTransformRotated +from ..poolers import ROIPooler +from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers +from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads + +logger = logging.getLogger(__name__) + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + R: number of ROIs, combined over all images, in the minibatch + Ri: number of ROIs in image i + K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. + +Naming convention: + + deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransformRotated`). + + pred_class_logits: predicted class scores in [-inf, +inf]; use + softmax(pred_class_logits) to estimate P(class). + + gt_classes: ground-truth classification labels in [0, K], where [0, K) represent + foreground object classes and K represents the background class. + + pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals + to detection box predictions. + + gt_proposal_deltas: ground-truth rotated box2box transform deltas +""" + + +def fast_rcnn_inference_rotated( + boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image +): + """ + Call `fast_rcnn_inference_single_image_rotated` for all images. + + Args: + boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic + boxes for each image. Element i has shape (Ri, K * 5) if doing + class-specific regression, or (Ri, 5) if doing class-agnostic + regression, where Ri is the number of predicted objects for image i. + This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. + scores (list[Tensor]): A list of Tensors of predicted class scores for each image. + Element i has shape (Ri, K + 1), where Ri is the number of predicted objects + for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. + image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. + score_thresh (float): Only return detections with a confidence score exceeding this + threshold. + nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. + topk_per_image (int): The number of top scoring detections to return. Set < 0 to return + all detections. + + Returns: + instances: (list[Instances]): A list of N instances, one for each image in the batch, + that stores the topk most confidence detections. + kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates + the corresponding boxes/scores index in [0, Ri) from the input, for image i. + """ + result_per_image = [ + fast_rcnn_inference_single_image_rotated( + boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image + ) + for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) + ] + return [x[0] for x in result_per_image], [x[1] for x in result_per_image] + + +@torch.no_grad() +def fast_rcnn_inference_single_image_rotated( + boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image +): + """ + Single-image inference. Return rotated bounding-box detection results by thresholding + on scores and applying rotated non-maximum suppression (Rotated NMS). + + Args: + Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes + per image. + + Returns: + Same as `fast_rcnn_inference_rotated`, but for only one image. + """ + valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores = scores[valid_mask] + + B = 5 # box dimension + scores = scores[:, :-1] + num_bbox_reg_classes = boxes.shape[1] // B + # Convert to Boxes to use the `clip` function ... + boxes = RotatedBoxes(boxes.reshape(-1, B)) + boxes.clip(image_shape) + boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B + # Filter results based on detection scores + filter_mask = scores > score_thresh # R x K + # R' x 2. First column contains indices of the R predictions; + # Second column contains indices of classes. + filter_inds = filter_mask.nonzero() + if num_bbox_reg_classes == 1: + boxes = boxes[filter_inds[:, 0], 0] + else: + boxes = boxes[filter_mask] + scores = scores[filter_mask] + + # Apply per-class Rotated NMS + keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) + if topk_per_image >= 0: + keep = keep[:topk_per_image] + boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] + + result = Instances(image_shape) + result.pred_boxes = RotatedBoxes(boxes) + result.scores = scores + result.pred_classes = filter_inds[:, 1] + + return result, filter_inds[:, 0] + + +class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers): + """ + Two linear layers for predicting Rotated Fast R-CNN outputs. + """ + + @classmethod + def from_config(cls, cfg, input_shape): + args = super().from_config(cfg, input_shape) + args["box2box_transform"] = Box2BoxTransformRotated( + weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS + ) + return args + + def inference(self, predictions, proposals): + """ + Returns: + list[Instances]: same as `fast_rcnn_inference_rotated`. + list[Tensor]: same as `fast_rcnn_inference_rotated`. + """ + boxes = self.predict_boxes(predictions, proposals) + scores = self.predict_probs(predictions, proposals) + image_shapes = [x.image_size for x in proposals] + + return fast_rcnn_inference_rotated( + boxes, + scores, + image_shapes, + self.test_score_thresh, + self.test_nms_thresh, + self.test_topk_per_image, + ) + + +@ROI_HEADS_REGISTRY.register() +class RROIHeads(StandardROIHeads): + """ + This class is used by Rotated Fast R-CNN to detect rotated boxes. + For now, it only supports box predictions but not mask or keypoints. + """ + + @configurable + def __init__(self, **kwargs): + """ + NOTE: this interface is experimental. + """ + super().__init__(**kwargs) + assert ( + not self.mask_on and not self.keypoint_on + ), "Mask/Keypoints not supported in Rotated ROIHeads." + assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!" + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + # fmt: on + assert pooler_type in ["ROIAlignRotated"], pooler_type + # assume all channel counts are equal + in_channels = [input_shape[f].channels for f in in_features][0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + box_head = build_box_head( + cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) + ) + # This line is the only difference v.s. StandardROIHeads + box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_head": box_head, + "box_predictor": box_predictor, + } + + @torch.no_grad() + def label_and_sample_proposals(self, proposals, targets): + """ + Prepare some proposals to be used to train the RROI heads. + It performs box matching between `proposals` and `targets`, and assigns + training labels to the proposals. + It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, + with a fraction of positives that is no larger than `self.positive_sample_fraction. + + Args: + See :meth:`StandardROIHeads.forward` + + Returns: + list[Instances]: length `N` list of `Instances`s containing the proposals + sampled for training. Each `Instances` has the following fields: + - proposal_boxes: the rotated proposal boxes + - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to + (this is only meaningful if the proposal has a label > 0; if label = 0 + then the ground-truth box is random) + - gt_classes: the ground-truth classification lable for each proposal + """ + if self.proposal_append_gt: + proposals = add_ground_truth_to_proposals(targets, proposals) + + proposals_with_gt = [] + + num_fg_samples = [] + num_bg_samples = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + has_gt = len(targets_per_image) > 0 + match_quality_matrix = pairwise_iou_rotated( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) + sampled_idxs, gt_classes = self._sample_proposals( + matched_idxs, matched_labels, targets_per_image.gt_classes + ) + + proposals_per_image = proposals_per_image[sampled_idxs] + proposals_per_image.gt_classes = gt_classes + + if has_gt: + sampled_targets = matched_idxs[sampled_idxs] + proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] + + num_bg_samples.append((gt_classes == self.num_classes).sum().item()) + num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) + proposals_with_gt.append(proposals_per_image) + + # Log the number of fg/bg samples that are selected for training ROI heads + storage = get_event_storage() + storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) + storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) + + return proposals_with_gt diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/sampling.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..5c55fbf9f3cd985a179aeb8ad6ced524a31c3f6c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/sampling.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch + +from annotator.oneformer.detectron2.layers import nonzero_tuple + +__all__ = ["subsample_labels"] + + +def subsample_labels( + labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int +): + """ + Return `num_samples` (or fewer, if not enough found) + random samples from `labels` which is a mixture of positives & negatives. + It will try to return as many positives as possible without + exceeding `positive_fraction * num_samples`, and then try to + fill the remaining slots with negatives. + + Args: + labels (Tensor): (N, ) label vector with values: + * -1: ignore + * bg_label: background ("negative") class + * otherwise: one or more foreground ("positive") classes + num_samples (int): The total number of labels with value >= 0 to return. + Values that are not sampled will be filled with -1 (ignore). + positive_fraction (float): The number of subsampled labels with values > 0 + is `min(num_positives, int(positive_fraction * num_samples))`. The number + of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. + In order words, if there are not enough positives, the sample is filled with + negatives. If there are also not enough negatives, then as many elements are + sampled as is possible. + bg_label (int): label index of background ("negative") class. + + Returns: + pos_idx, neg_idx (Tensor): + 1D vector of indices. The total length of both is `num_samples` or fewer. + """ + positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] + negative = nonzero_tuple(labels == bg_label)[0] + + num_pos = int(num_samples * positive_fraction) + # protect against not enough positive examples + num_pos = min(positive.numel(), num_pos) + num_neg = num_samples - num_pos + # protect against not enough negative examples + num_neg = min(negative.numel(), num_neg) + + # randomly select positive and negative examples + perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + pos_idx = positive[perm1] + neg_idx = negative[perm2] + return pos_idx, neg_idx diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/test_time_augmentation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/test_time_augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..625f8ba9a01275df64967c097912538337ec91dc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/modeling/test_time_augmentation.py @@ -0,0 +1,307 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import numpy as np +from contextlib import contextmanager +from itertools import count +from typing import List +import torch +from fvcore.transforms import HFlipTransform, NoOpTransform +from torch import nn +from torch.nn.parallel import DistributedDataParallel + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data.detection_utils import read_image +from annotator.oneformer.detectron2.data.transforms import ( + RandomFlip, + ResizeShortestEdge, + ResizeTransform, + apply_augmentations, +) +from annotator.oneformer.detectron2.structures import Boxes, Instances + +from .meta_arch import GeneralizedRCNN +from .postprocessing import detector_postprocess +from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image + +__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"] + + +class DatasetMapperTTA: + """ + Implement test-time augmentation for detection data. + It is a callable which takes a dataset dict from a detection dataset, + and returns a list of dataset dicts where the images + are augmented from the input image by the transformations defined in the config. + This is used for test-time augmentation. + """ + + @configurable + def __init__(self, min_sizes: List[int], max_size: int, flip: bool): + """ + Args: + min_sizes: list of short-edge size to resize the image to + max_size: maximum height or width of resized images + flip: whether to apply flipping augmentation + """ + self.min_sizes = min_sizes + self.max_size = max_size + self.flip = flip + + @classmethod + def from_config(cls, cfg): + return { + "min_sizes": cfg.TEST.AUG.MIN_SIZES, + "max_size": cfg.TEST.AUG.MAX_SIZE, + "flip": cfg.TEST.AUG.FLIP, + } + + def __call__(self, dataset_dict): + """ + Args: + dict: a dict in standard model input format. See tutorials for details. + + Returns: + list[dict]: + a list of dicts, which contain augmented version of the input image. + The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. + Each dict has field "transforms" which is a TransformList, + containing the transforms that are used to generate this image. + """ + numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() + shape = numpy_image.shape + orig_shape = (dataset_dict["height"], dataset_dict["width"]) + if shape[:2] != orig_shape: + # It transforms the "original" image in the dataset to the input image + pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) + else: + pre_tfm = NoOpTransform() + + # Create all combinations of augmentations to use + aug_candidates = [] # each element is a list[Augmentation] + for min_size in self.min_sizes: + resize = ResizeShortestEdge(min_size, self.max_size) + aug_candidates.append([resize]) # resize only + if self.flip: + flip = RandomFlip(prob=1.0) + aug_candidates.append([resize, flip]) # resize + flip + + # Apply all the augmentations + ret = [] + for aug in aug_candidates: + new_image, tfms = apply_augmentations(aug, np.copy(numpy_image)) + torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1))) + + dic = copy.deepcopy(dataset_dict) + dic["transforms"] = pre_tfm + tfms + dic["image"] = torch_image + ret.append(dic) + return ret + + +class GeneralizedRCNNWithTTA(nn.Module): + """ + A GeneralizedRCNN with test-time augmentation enabled. + Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`. + """ + + def __init__(self, cfg, model, tta_mapper=None, batch_size=3): + """ + Args: + cfg (CfgNode): + model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. + tta_mapper (callable): takes a dataset dict and returns a list of + augmented versions of the dataset dict. Defaults to + `DatasetMapperTTA(cfg)`. + batch_size (int): batch the augmented images into this batch size for inference. + """ + super().__init__() + if isinstance(model, DistributedDataParallel): + model = model.module + assert isinstance( + model, GeneralizedRCNN + ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model)) + self.cfg = cfg.clone() + assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet" + assert ( + not self.cfg.MODEL.LOAD_PROPOSALS + ), "TTA for pre-computed proposals is not supported yet" + + self.model = model + + if tta_mapper is None: + tta_mapper = DatasetMapperTTA(cfg) + self.tta_mapper = tta_mapper + self.batch_size = batch_size + + @contextmanager + def _turn_off_roi_heads(self, attrs): + """ + Open a context where some heads in `model.roi_heads` are temporarily turned off. + Args: + attr (list[str]): the attribute in `model.roi_heads` which can be used + to turn off a specific head, e.g., "mask_on", "keypoint_on". + """ + roi_heads = self.model.roi_heads + old = {} + for attr in attrs: + try: + old[attr] = getattr(roi_heads, attr) + except AttributeError: + # The head may not be implemented in certain ROIHeads + pass + + if len(old.keys()) == 0: + yield + else: + for attr in old.keys(): + setattr(roi_heads, attr, False) + yield + for attr in old.keys(): + setattr(roi_heads, attr, old[attr]) + + def _batch_inference(self, batched_inputs, detected_instances=None): + """ + Execute inference on a list of inputs, + using batch size = self.batch_size, instead of the length of the list. + + Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference` + """ + if detected_instances is None: + detected_instances = [None] * len(batched_inputs) + + outputs = [] + inputs, instances = [], [] + for idx, input, instance in zip(count(), batched_inputs, detected_instances): + inputs.append(input) + instances.append(instance) + if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1: + outputs.extend( + self.model.inference( + inputs, + instances if instances[0] is not None else None, + do_postprocess=False, + ) + ) + inputs, instances = [], [] + return outputs + + def __call__(self, batched_inputs): + """ + Same input/output format as :meth:`GeneralizedRCNN.forward` + """ + + def _maybe_read_image(dataset_dict): + ret = copy.copy(dataset_dict) + if "image" not in ret: + image = read_image(ret.pop("file_name"), self.model.input_format) + image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1))) # CHW + ret["image"] = image + if "height" not in ret and "width" not in ret: + ret["height"] = image.shape[1] + ret["width"] = image.shape[2] + return ret + + return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs] + + def _inference_one_image(self, input): + """ + Args: + input (dict): one dataset dict with "image" field being a CHW tensor + + Returns: + dict: one output dict + """ + orig_shape = (input["height"], input["width"]) + augmented_inputs, tfms = self._get_augmented_inputs(input) + # Detect boxes from all augmented versions + with self._turn_off_roi_heads(["mask_on", "keypoint_on"]): + # temporarily disable roi heads + all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms) + # merge all detected boxes to obtain final predictions for boxes + merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape) + + if self.cfg.MODEL.MASK_ON: + # Use the detected boxes to obtain masks + augmented_instances = self._rescale_detected_boxes( + augmented_inputs, merged_instances, tfms + ) + # run forward on the detected boxes + outputs = self._batch_inference(augmented_inputs, augmented_instances) + # Delete now useless variables to avoid being out of memory + del augmented_inputs, augmented_instances + # average the predictions + merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms) + merged_instances = detector_postprocess(merged_instances, *orig_shape) + return {"instances": merged_instances} + else: + return {"instances": merged_instances} + + def _get_augmented_inputs(self, input): + augmented_inputs = self.tta_mapper(input) + tfms = [x.pop("transforms") for x in augmented_inputs] + return augmented_inputs, tfms + + def _get_augmented_boxes(self, augmented_inputs, tfms): + # 1: forward with all augmented images + outputs = self._batch_inference(augmented_inputs) + # 2: union the results + all_boxes = [] + all_scores = [] + all_classes = [] + for output, tfm in zip(outputs, tfms): + # Need to inverse the transforms on boxes, to obtain results on original image + pred_boxes = output.pred_boxes.tensor + original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy()) + all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device)) + + all_scores.extend(output.scores) + all_classes.extend(output.pred_classes) + all_boxes = torch.cat(all_boxes, dim=0) + return all_boxes, all_scores, all_classes + + def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw): + # select from the union of all results + num_boxes = len(all_boxes) + num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES + # +1 because fast_rcnn_inference expects background scores as well + all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device) + for idx, cls, score in zip(count(), all_classes, all_scores): + all_scores_2d[idx, cls] = score + + merged_instances, _ = fast_rcnn_inference_single_image( + all_boxes, + all_scores_2d, + shape_hw, + 1e-8, + self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, + self.cfg.TEST.DETECTIONS_PER_IMAGE, + ) + + return merged_instances + + def _rescale_detected_boxes(self, augmented_inputs, merged_instances, tfms): + augmented_instances = [] + for input, tfm in zip(augmented_inputs, tfms): + # Transform the target box to the augmented image's coordinate space + pred_boxes = merged_instances.pred_boxes.tensor.cpu().numpy() + pred_boxes = torch.from_numpy(tfm.apply_box(pred_boxes)) + + aug_instances = Instances( + image_size=input["image"].shape[1:3], + pred_boxes=Boxes(pred_boxes), + pred_classes=merged_instances.pred_classes, + scores=merged_instances.scores, + ) + augmented_instances.append(aug_instances) + return augmented_instances + + def _reduce_pred_masks(self, outputs, tfms): + # Should apply inverse transforms on masks. + # We assume only resize & flip are used. pred_masks is a scale-invariant + # representation, so we handle flip specially + for output, tfm in zip(outputs, tfms): + if any(isinstance(t, HFlipTransform) for t in tfm.transforms): + output.pred_masks = output.pred_masks.flip(dims=[3]) + all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0) + avg_pred_masks = torch.mean(all_pred_masks, dim=0) + return avg_pred_masks diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95afe7ff8c8a9bd2f56621fcc3c1bdac11c256a9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/README.md @@ -0,0 +1,2 @@ + +Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d0540b93ebbad78d6ff2cc0adc0fe8375816c2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import importlib.abc +import importlib.util +from pathlib import Path + +__all__ = [] + +_PROJECTS = { + "point_rend": "PointRend", + "deeplab": "DeepLab", + "panoptic_deeplab": "Panoptic-DeepLab", +} +_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent / "projects" + +if _PROJECT_ROOT.is_dir(): + # This is true only for in-place installation (pip install -e, setup.py develop), + # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 + + class _D2ProjectsFinder(importlib.abc.MetaPathFinder): + def find_spec(self, name, path, target=None): + if not name.startswith("detectron2.projects."): + return + project_name = name.split(".")[-1] + project_dir = _PROJECTS.get(project_name) + if not project_dir: + return + target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py" + if not target_file.is_file(): + return + return importlib.util.spec_from_file_location(name, target_file) + + import sys + + sys.meta_path.append(_D2ProjectsFinder()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dcd88ff0c09d630577e3ac9f8afb5324a80a7be4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .build_solver import build_lr_scheduler +from .config import add_deeplab_config +from .resnet import build_resnet_deeplab_backbone +from .semantic_seg import DeepLabV3Head, DeepLabV3PlusHead diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/build_solver.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/build_solver.py new file mode 100644 index 0000000000000000000000000000000000000000..19ab244380e8bcbb15c37f467bb58bc4f8dc17ec --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/build_solver.py @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch + +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.solver import LRScheduler +from annotator.oneformer.detectron2.solver import build_lr_scheduler as build_d2_lr_scheduler + +from .lr_scheduler import WarmupPolyLR + + +def build_lr_scheduler(cfg: CfgNode, optimizer: torch.optim.Optimizer) -> LRScheduler: + """ + Build a LR scheduler from config. + """ + name = cfg.SOLVER.LR_SCHEDULER_NAME + if name == "WarmupPolyLR": + return WarmupPolyLR( + optimizer, + cfg.SOLVER.MAX_ITER, + warmup_factor=cfg.SOLVER.WARMUP_FACTOR, + warmup_iters=cfg.SOLVER.WARMUP_ITERS, + warmup_method=cfg.SOLVER.WARMUP_METHOD, + power=cfg.SOLVER.POLY_LR_POWER, + constant_ending=cfg.SOLVER.POLY_LR_CONSTANT_ENDING, + ) + else: + return build_d2_lr_scheduler(cfg, optimizer) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/config.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/config.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5e45a9124e61c12d90cfc5032b268496891a4a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/config.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. + + +def add_deeplab_config(cfg): + """ + Add config for DeepLab. + """ + # We retry random cropping until no single category in semantic segmentation GT occupies more + # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. + cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 + # Used for `poly` learning rate schedule. + cfg.SOLVER.POLY_LR_POWER = 0.9 + cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0 + # Loss type, choose from `cross_entropy`, `hard_pixel_mining`. + cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE = "hard_pixel_mining" + # DeepLab settings + cfg.MODEL.SEM_SEG_HEAD.PROJECT_FEATURES = ["res2"] + cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS = [48] + cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256 + cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS = [6, 12, 18] + cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT = 0.1 + cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV = False + # Backbone new configs + cfg.MODEL.RESNETS.RES4_DILATION = 1 + cfg.MODEL.RESNETS.RES5_MULTI_GRID = [1, 2, 4] + # ResNet stem type from: `basic`, `deeplab` + cfg.MODEL.RESNETS.STEM_TYPE = "deeplab" diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/loss.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..3a43087b7c1a2b4d2b249fad117724dbd0f14fdd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/loss.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import torch +import torch.nn as nn + + +class DeepLabCE(nn.Module): + """ + Hard pixel mining with cross entropy loss, for semantic segmentation. + This is used in TensorFlow DeepLab frameworks. + Paper: DeeperLab: Single-Shot Image Parser + Reference: https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/deeplab/utils/train_utils.py#L33 # noqa + Arguments: + ignore_label: Integer, label to ignore. + top_k_percent_pixels: Float, the value lies in [0.0, 1.0]. When its + value < 1.0, only compute the loss for the top k percent pixels + (e.g., the top 20% pixels). This is useful for hard pixel mining. + weight: Tensor, a manual rescaling weight given to each class. + """ + + def __init__(self, ignore_label=-1, top_k_percent_pixels=1.0, weight=None): + super(DeepLabCE, self).__init__() + self.top_k_percent_pixels = top_k_percent_pixels + self.ignore_label = ignore_label + self.criterion = nn.CrossEntropyLoss( + weight=weight, ignore_index=ignore_label, reduction="none" + ) + + def forward(self, logits, labels, weights=None): + if weights is None: + pixel_losses = self.criterion(logits, labels).contiguous().view(-1) + else: + # Apply per-pixel loss weights. + pixel_losses = self.criterion(logits, labels) * weights + pixel_losses = pixel_losses.contiguous().view(-1) + if self.top_k_percent_pixels == 1.0: + return pixel_losses.mean() + + top_k_pixels = int(self.top_k_percent_pixels * pixel_losses.numel()) + pixel_losses, _ = torch.topk(pixel_losses, top_k_pixels) + return pixel_losses.mean() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/lr_scheduler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..9e15b0e19d03e955406fa1555d0d4f9d31d505c4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/lr_scheduler.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +from typing import List +import torch + +from annotator.oneformer.detectron2.solver.lr_scheduler import LRScheduler, _get_warmup_factor_at_iter + +# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes +# only on epoch boundaries. We typically use iteration based schedules instead. +# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean +# "iteration" instead. + +# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating +# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. + + +class WarmupPolyLR(LRScheduler): + """ + Poly learning rate schedule used to train DeepLab. + Paper: DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, + Atrous Convolution, and Fully Connected CRFs. + Reference: https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/utils/train_utils.py#L337 # noqa + """ + + def __init__( + self, + optimizer: torch.optim.Optimizer, + max_iters: int, + warmup_factor: float = 0.001, + warmup_iters: int = 1000, + warmup_method: str = "linear", + last_epoch: int = -1, + power: float = 0.9, + constant_ending: float = 0.0, + ): + self.max_iters = max_iters + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + self.power = power + self.constant_ending = constant_ending + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + warmup_factor = _get_warmup_factor_at_iter( + self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor + ) + if self.constant_ending > 0 and warmup_factor == 1.0: + # Constant ending lr. + if ( + math.pow((1.0 - self.last_epoch / self.max_iters), self.power) + < self.constant_ending + ): + return [base_lr * self.constant_ending for base_lr in self.base_lrs] + return [ + base_lr * warmup_factor * math.pow((1.0 - self.last_epoch / self.max_iters), self.power) + for base_lr in self.base_lrs + ] + + def _compute_values(self) -> List[float]: + # The new interface + return self.get_lr() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/resnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..28455d123a12f887400c19c263d08cc2ed08522e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/resnet.py @@ -0,0 +1,158 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import fvcore.nn.weight_init as weight_init +import torch.nn.functional as F + +from annotator.oneformer.detectron2.layers import CNNBlockBase, Conv2d, get_norm +from annotator.oneformer.detectron2.modeling import BACKBONE_REGISTRY +from annotator.oneformer.detectron2.modeling.backbone.resnet import ( + BasicStem, + BottleneckBlock, + DeformBottleneckBlock, + ResNet, +) + + +class DeepLabStem(CNNBlockBase): + """ + The DeepLab ResNet stem (layers before the first residual block). + """ + + def __init__(self, in_channels=3, out_channels=128, norm="BN"): + """ + Args: + norm (str or callable): norm after the first conv layer. + See :func:`layers.get_norm` for supported format. + """ + super().__init__(in_channels, out_channels, 4) + self.in_channels = in_channels + self.conv1 = Conv2d( + in_channels, + out_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False, + norm=get_norm(norm, out_channels // 2), + ) + self.conv2 = Conv2d( + out_channels // 2, + out_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False, + norm=get_norm(norm, out_channels // 2), + ) + self.conv3 = Conv2d( + out_channels // 2, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + weight_init.c2_msra_fill(self.conv1) + weight_init.c2_msra_fill(self.conv2) + weight_init.c2_msra_fill(self.conv3) + + def forward(self, x): + x = self.conv1(x) + x = F.relu_(x) + x = self.conv2(x) + x = F.relu_(x) + x = self.conv3(x) + x = F.relu_(x) + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + return x + + +@BACKBONE_REGISTRY.register() +def build_resnet_deeplab_backbone(cfg, input_shape): + """ + Create a ResNet instance from config. + Returns: + ResNet: a :class:`ResNet` instance. + """ + # need registration of new blocks/stems? + norm = cfg.MODEL.RESNETS.NORM + if cfg.MODEL.RESNETS.STEM_TYPE == "basic": + stem = BasicStem( + in_channels=input_shape.channels, + out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, + norm=norm, + ) + elif cfg.MODEL.RESNETS.STEM_TYPE == "deeplab": + stem = DeepLabStem( + in_channels=input_shape.channels, + out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, + norm=norm, + ) + else: + raise ValueError("Unknown stem type: {}".format(cfg.MODEL.RESNETS.STEM_TYPE)) + + # fmt: off + freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT + out_features = cfg.MODEL.RESNETS.OUT_FEATURES + depth = cfg.MODEL.RESNETS.DEPTH + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + bottleneck_channels = num_groups * width_per_group + in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS + out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 + res4_dilation = cfg.MODEL.RESNETS.RES4_DILATION + res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION + deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE + deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED + deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS + res5_multi_grid = cfg.MODEL.RESNETS.RES5_MULTI_GRID + # fmt: on + assert res4_dilation in {1, 2}, "res4_dilation cannot be {}.".format(res4_dilation) + assert res5_dilation in {1, 2, 4}, "res5_dilation cannot be {}.".format(res5_dilation) + if res4_dilation == 2: + # Always dilate res5 if res4 is dilated. + assert res5_dilation == 4 + + num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] + + stages = [] + + # Avoid creating variables without gradients + # It consumes extra memory and may cause allreduce to fail + out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] + max_stage_idx = max(out_stage_idx) + for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): + if stage_idx == 4: + dilation = res4_dilation + elif stage_idx == 5: + dilation = res5_dilation + else: + dilation = 1 + first_stride = 1 if idx == 0 or dilation > 1 else 2 + stage_kargs = { + "num_blocks": num_blocks_per_stage[idx], + "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), + "in_channels": in_channels, + "out_channels": out_channels, + "norm": norm, + } + stage_kargs["bottleneck_channels"] = bottleneck_channels + stage_kargs["stride_in_1x1"] = stride_in_1x1 + stage_kargs["dilation"] = dilation + stage_kargs["num_groups"] = num_groups + if deform_on_per_stage[idx]: + stage_kargs["block_class"] = DeformBottleneckBlock + stage_kargs["deform_modulated"] = deform_modulated + stage_kargs["deform_num_groups"] = deform_num_groups + else: + stage_kargs["block_class"] = BottleneckBlock + if stage_idx == 5: + stage_kargs.pop("dilation") + stage_kargs["dilation_per_block"] = [dilation * mg for mg in res5_multi_grid] + blocks = ResNet.make_stage(**stage_kargs) + in_channels = out_channels + out_channels *= 2 + bottleneck_channels *= 2 + stages.append(blocks) + return ResNet(stem, stages, out_features=out_features).freeze(freeze_at) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/semantic_seg.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/semantic_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..36c2643397f6eeb5412ed333c7de79ded926a6d1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/projects/deeplab/semantic_seg.py @@ -0,0 +1,348 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import Callable, Dict, List, Optional, Tuple, Union +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import ASPP, Conv2d, DepthwiseSeparableConv2d, ShapeSpec, get_norm +from annotator.oneformer.detectron2.modeling import SEM_SEG_HEADS_REGISTRY + +from .loss import DeepLabCE + + +@SEM_SEG_HEADS_REGISTRY.register() +class DeepLabV3PlusHead(nn.Module): + """ + A semantic segmentation head described in :paper:`DeepLabV3+`. + """ + + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + project_channels: List[int], + aspp_dilations: List[int], + aspp_dropout: float, + decoder_channels: List[int], + common_stride: int, + norm: Union[str, Callable], + train_size: Optional[Tuple], + loss_weight: float = 1.0, + loss_type: str = "cross_entropy", + ignore_value: int = -1, + num_classes: Optional[int] = None, + use_depthwise_separable_conv: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape: shape of the input features. They will be ordered by stride + and the last one (with largest stride) is used as the input to the + decoder (i.e. the ASPP module); the rest are low-level feature for + the intermediate levels of decoder. + project_channels (list[int]): a list of low-level feature channels. + The length should be len(in_features) - 1. + aspp_dilations (list(int)): a list of 3 dilations in ASPP. + aspp_dropout (float): apply dropout on the output of ASPP. + decoder_channels (list[int]): a list of output channels of each + decoder stage. It should have the same length as "in_features" + (each element in "in_features" corresponds to one decoder stage). + common_stride (int): output stride of decoder. + norm (str or callable): normalization for all conv layers. + train_size (tuple): (height, width) of training images. + loss_weight (float): loss weight. + loss_type (str): type of loss function, 2 opptions: + (1) "cross_entropy" is the standard cross entropy loss. + (2) "hard_pixel_mining" is the loss in DeepLab that samples + top k% hardest pixels. + ignore_value (int): category to be ignored during training. + num_classes (int): number of classes, if set to None, the decoder + will not construct a predictor. + use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d + in ASPP and decoder. + """ + super().__init__() + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + + # fmt: off + self.in_features = [k for k, v in input_shape] # starting from "res2" to "res5" + in_channels = [x[1].channels for x in input_shape] + in_strides = [x[1].stride for x in input_shape] + aspp_channels = decoder_channels[-1] + self.ignore_value = ignore_value + self.common_stride = common_stride # output stride + self.loss_weight = loss_weight + self.loss_type = loss_type + self.decoder_only = num_classes is None + self.use_depthwise_separable_conv = use_depthwise_separable_conv + # fmt: on + + assert ( + len(project_channels) == len(self.in_features) - 1 + ), "Expected {} project_channels, got {}".format( + len(self.in_features) - 1, len(project_channels) + ) + assert len(decoder_channels) == len( + self.in_features + ), "Expected {} decoder_channels, got {}".format( + len(self.in_features), len(decoder_channels) + ) + self.decoder = nn.ModuleDict() + + use_bias = norm == "" + for idx, in_channel in enumerate(in_channels): + decoder_stage = nn.ModuleDict() + + if idx == len(self.in_features) - 1: + # ASPP module + if train_size is not None: + train_h, train_w = train_size + encoder_stride = in_strides[-1] + if train_h % encoder_stride or train_w % encoder_stride: + raise ValueError("Crop size need to be divisible by encoder stride.") + pool_h = train_h // encoder_stride + pool_w = train_w // encoder_stride + pool_kernel_size = (pool_h, pool_w) + else: + pool_kernel_size = None + project_conv = ASPP( + in_channel, + aspp_channels, + aspp_dilations, + norm=norm, + activation=F.relu, + pool_kernel_size=pool_kernel_size, + dropout=aspp_dropout, + use_depthwise_separable_conv=use_depthwise_separable_conv, + ) + fuse_conv = None + else: + project_conv = Conv2d( + in_channel, + project_channels[idx], + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, project_channels[idx]), + activation=F.relu, + ) + weight_init.c2_xavier_fill(project_conv) + if use_depthwise_separable_conv: + # We use a single 5x5 DepthwiseSeparableConv2d to replace + # 2 3x3 Conv2d since they have the same receptive field, + # proposed in :paper:`Panoptic-DeepLab`. + fuse_conv = DepthwiseSeparableConv2d( + project_channels[idx] + decoder_channels[idx + 1], + decoder_channels[idx], + kernel_size=5, + padding=2, + norm1=norm, + activation1=F.relu, + norm2=norm, + activation2=F.relu, + ) + else: + fuse_conv = nn.Sequential( + Conv2d( + project_channels[idx] + decoder_channels[idx + 1], + decoder_channels[idx], + kernel_size=3, + padding=1, + bias=use_bias, + norm=get_norm(norm, decoder_channels[idx]), + activation=F.relu, + ), + Conv2d( + decoder_channels[idx], + decoder_channels[idx], + kernel_size=3, + padding=1, + bias=use_bias, + norm=get_norm(norm, decoder_channels[idx]), + activation=F.relu, + ), + ) + weight_init.c2_xavier_fill(fuse_conv[0]) + weight_init.c2_xavier_fill(fuse_conv[1]) + + decoder_stage["project_conv"] = project_conv + decoder_stage["fuse_conv"] = fuse_conv + + self.decoder[self.in_features[idx]] = decoder_stage + + if not self.decoder_only: + self.predictor = Conv2d( + decoder_channels[0], num_classes, kernel_size=1, stride=1, padding=0 + ) + nn.init.normal_(self.predictor.weight, 0, 0.001) + nn.init.constant_(self.predictor.bias, 0) + + if self.loss_type == "cross_entropy": + self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=self.ignore_value) + elif self.loss_type == "hard_pixel_mining": + self.loss = DeepLabCE(ignore_label=self.ignore_value, top_k_percent_pixels=0.2) + else: + raise ValueError("Unexpected loss type: %s" % self.loss_type) + + @classmethod + def from_config(cls, cfg, input_shape): + if cfg.INPUT.CROP.ENABLED: + assert cfg.INPUT.CROP.TYPE == "absolute" + train_size = cfg.INPUT.CROP.SIZE + else: + train_size = None + decoder_channels = [cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM] * ( + len(cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES) - 1 + ) + [cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS] + ret = dict( + input_shape={ + k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + }, + project_channels=cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS, + aspp_dilations=cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS, + aspp_dropout=cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT, + decoder_channels=decoder_channels, + common_stride=cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE, + norm=cfg.MODEL.SEM_SEG_HEAD.NORM, + train_size=train_size, + loss_weight=cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT, + loss_type=cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE, + ignore_value=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + use_depthwise_separable_conv=cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV, + ) + return ret + + def forward(self, features, targets=None): + """ + Returns: + In training, returns (None, dict of losses) + In inference, returns (CxHxW logits, {}) + """ + y = self.layers(features) + if self.decoder_only: + # Output from self.layers() only contains decoder feature. + return y + if self.training: + return None, self.losses(y, targets) + else: + y = F.interpolate( + y, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + return y, {} + + def layers(self, features): + # Reverse feature maps into top-down order (from low to high resolution) + for f in self.in_features[::-1]: + x = features[f] + proj_x = self.decoder[f]["project_conv"](x) + if self.decoder[f]["fuse_conv"] is None: + # This is aspp module + y = proj_x + else: + # Upsample y + y = F.interpolate(y, size=proj_x.size()[2:], mode="bilinear", align_corners=False) + y = torch.cat([proj_x, y], dim=1) + y = self.decoder[f]["fuse_conv"](y) + if not self.decoder_only: + y = self.predictor(y) + return y + + def losses(self, predictions, targets): + predictions = F.interpolate( + predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + loss = self.loss(predictions, targets) + losses = {"loss_sem_seg": loss * self.loss_weight} + return losses + + +@SEM_SEG_HEADS_REGISTRY.register() +class DeepLabV3Head(nn.Module): + """ + A semantic segmentation head described in :paper:`DeepLabV3`. + """ + + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): + super().__init__() + + # fmt: off + self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + in_channels = [input_shape[f].channels for f in self.in_features] + aspp_channels = cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS + aspp_dilations = cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS + self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE + num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES + conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE # output stride + norm = cfg.MODEL.SEM_SEG_HEAD.NORM + self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT + self.loss_type = cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE + train_crop_size = cfg.INPUT.CROP.SIZE + aspp_dropout = cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT + use_depthwise_separable_conv = cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV + # fmt: on + + assert len(self.in_features) == 1 + assert len(in_channels) == 1 + + # ASPP module + if cfg.INPUT.CROP.ENABLED: + assert cfg.INPUT.CROP.TYPE == "absolute" + train_crop_h, train_crop_w = train_crop_size + if train_crop_h % self.common_stride or train_crop_w % self.common_stride: + raise ValueError("Crop size need to be divisible by output stride.") + pool_h = train_crop_h // self.common_stride + pool_w = train_crop_w // self.common_stride + pool_kernel_size = (pool_h, pool_w) + else: + pool_kernel_size = None + self.aspp = ASPP( + in_channels[0], + aspp_channels, + aspp_dilations, + norm=norm, + activation=F.relu, + pool_kernel_size=pool_kernel_size, + dropout=aspp_dropout, + use_depthwise_separable_conv=use_depthwise_separable_conv, + ) + + self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) + nn.init.normal_(self.predictor.weight, 0, 0.001) + nn.init.constant_(self.predictor.bias, 0) + + if self.loss_type == "cross_entropy": + self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=self.ignore_value) + elif self.loss_type == "hard_pixel_mining": + self.loss = DeepLabCE(ignore_label=self.ignore_value, top_k_percent_pixels=0.2) + else: + raise ValueError("Unexpected loss type: %s" % self.loss_type) + + def forward(self, features, targets=None): + """ + Returns: + In training, returns (None, dict of losses) + In inference, returns (CxHxW logits, {}) + """ + x = features[self.in_features[0]] + x = self.aspp(x) + x = self.predictor(x) + if self.training: + return None, self.losses(x, targets) + else: + x = F.interpolate( + x, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + return x, {} + + def losses(self, predictions, targets): + predictions = F.interpolate( + predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + loss = self.loss(predictions, targets) + losses = {"loss_sem_seg": loss * self.loss_weight} + return losses diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e36c64f60f38f41d01dd2c9fb30364489a03841 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params +from .lr_scheduler import ( + LRMultiplier, + LRScheduler, + WarmupCosineLR, + WarmupMultiStepLR, + WarmupParamScheduler, +) + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/build.py new file mode 100644 index 0000000000000000000000000000000000000000..5e526df1e05b1ad8943c18cc7a1e5e43c58d57c8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/build.py @@ -0,0 +1,310 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import itertools +import logging +from collections import defaultdict +from enum import Enum +from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Type, Union +import torch +from fvcore.common.param_scheduler import ( + CosineParamScheduler, + MultiStepParamScheduler, + StepWithFixedGammaParamScheduler, +) + +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.utils.env import TORCH_VERSION + +from .lr_scheduler import LRMultiplier, LRScheduler, WarmupParamScheduler + +_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]] +_GradientClipper = Callable[[_GradientClipperInput], None] + + +class GradientClipType(Enum): + VALUE = "value" + NORM = "norm" + + +def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper: + """ + Creates gradient clipping closure to clip by value or by norm, + according to the provided config. + """ + cfg = copy.deepcopy(cfg) + + def clip_grad_norm(p: _GradientClipperInput): + torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE) + + def clip_grad_value(p: _GradientClipperInput): + torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE) + + _GRADIENT_CLIP_TYPE_TO_CLIPPER = { + GradientClipType.VALUE: clip_grad_value, + GradientClipType.NORM: clip_grad_norm, + } + return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)] + + +def _generate_optimizer_class_with_gradient_clipping( + optimizer: Type[torch.optim.Optimizer], + *, + per_param_clipper: Optional[_GradientClipper] = None, + global_clipper: Optional[_GradientClipper] = None, +) -> Type[torch.optim.Optimizer]: + """ + Dynamically creates a new type that inherits the type of a given instance + and overrides the `step` method to add gradient clipping + """ + assert ( + per_param_clipper is None or global_clipper is None + ), "Not allowed to use both per-parameter clipping and global clipping" + + def optimizer_wgc_step(self, closure=None): + if per_param_clipper is not None: + for group in self.param_groups: + for p in group["params"]: + per_param_clipper(p) + else: + # global clipper for future use with detr + # (https://github.com/facebookresearch/detr/pull/287) + all_params = itertools.chain(*[g["params"] for g in self.param_groups]) + global_clipper(all_params) + super(type(self), self).step(closure) + + OptimizerWithGradientClip = type( + optimizer.__name__ + "WithGradientClip", + (optimizer,), + {"step": optimizer_wgc_step}, + ) + return OptimizerWithGradientClip + + +def maybe_add_gradient_clipping( + cfg: CfgNode, optimizer: Type[torch.optim.Optimizer] +) -> Type[torch.optim.Optimizer]: + """ + If gradient clipping is enabled through config options, wraps the existing + optimizer type to become a new dynamically created class OptimizerWithGradientClip + that inherits the given optimizer and overrides the `step` method to + include gradient clipping. + + Args: + cfg: CfgNode, configuration options + optimizer: type. A subclass of torch.optim.Optimizer + + Return: + type: either the input `optimizer` (if gradient clipping is disabled), or + a subclass of it with gradient clipping included in the `step` method. + """ + if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED: + return optimizer + if isinstance(optimizer, torch.optim.Optimizer): + optimizer_type = type(optimizer) + else: + assert issubclass(optimizer, torch.optim.Optimizer), optimizer + optimizer_type = optimizer + + grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS) + OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping( + optimizer_type, per_param_clipper=grad_clipper + ) + if isinstance(optimizer, torch.optim.Optimizer): + optimizer.__class__ = OptimizerWithGradientClip # a bit hacky, not recommended + return optimizer + else: + return OptimizerWithGradientClip + + +def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: + """ + Build an optimizer from config. + """ + params = get_default_optimizer_params( + model, + base_lr=cfg.SOLVER.BASE_LR, + weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, + bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR, + weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS, + ) + sgd_args = { + "params": params, + "lr": cfg.SOLVER.BASE_LR, + "momentum": cfg.SOLVER.MOMENTUM, + "nesterov": cfg.SOLVER.NESTEROV, + "weight_decay": cfg.SOLVER.WEIGHT_DECAY, + } + if TORCH_VERSION >= (1, 12): + sgd_args["foreach"] = True + return maybe_add_gradient_clipping(cfg, torch.optim.SGD(**sgd_args)) + + +def get_default_optimizer_params( + model: torch.nn.Module, + base_lr: Optional[float] = None, + weight_decay: Optional[float] = None, + weight_decay_norm: Optional[float] = None, + bias_lr_factor: Optional[float] = 1.0, + weight_decay_bias: Optional[float] = None, + lr_factor_func: Optional[Callable] = None, + overrides: Optional[Dict[str, Dict[str, float]]] = None, +) -> List[Dict[str, Any]]: + """ + Get default param list for optimizer, with support for a few types of + overrides. If no overrides needed, this is equivalent to `model.parameters()`. + + Args: + base_lr: lr for every group by default. Can be omitted to use the one in optimizer. + weight_decay: weight decay for every group by default. Can be omitted to use the one + in optimizer. + weight_decay_norm: override weight decay for params in normalization layers + bias_lr_factor: multiplier of lr for bias parameters. + weight_decay_bias: override weight decay for bias parameters. + lr_factor_func: function to calculate lr decay rate by mapping the parameter names to + corresponding lr decay rate. Note that setting this option requires + also setting ``base_lr``. + overrides: if not `None`, provides values for optimizer hyperparameters + (LR, weight decay) for module parameters with a given name; e.g. + ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and + weight decay values for all module parameters named `embedding`. + + For common detection models, ``weight_decay_norm`` is the only option + needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings + from Detectron1 that are not found useful. + + Example: + :: + torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0), + lr=0.01, weight_decay=1e-4, momentum=0.9) + """ + if overrides is None: + overrides = {} + defaults = {} + if base_lr is not None: + defaults["lr"] = base_lr + if weight_decay is not None: + defaults["weight_decay"] = weight_decay + bias_overrides = {} + if bias_lr_factor is not None and bias_lr_factor != 1.0: + # NOTE: unlike Detectron v1, we now by default make bias hyperparameters + # exactly the same as regular weights. + if base_lr is None: + raise ValueError("bias_lr_factor requires base_lr") + bias_overrides["lr"] = base_lr * bias_lr_factor + if weight_decay_bias is not None: + bias_overrides["weight_decay"] = weight_decay_bias + if len(bias_overrides): + if "bias" in overrides: + raise ValueError("Conflicting overrides for 'bias'") + overrides["bias"] = bias_overrides + if lr_factor_func is not None: + if base_lr is None: + raise ValueError("lr_factor_func requires base_lr") + norm_module_types = ( + torch.nn.BatchNorm1d, + torch.nn.BatchNorm2d, + torch.nn.BatchNorm3d, + torch.nn.SyncBatchNorm, + # NaiveSyncBatchNorm inherits from BatchNorm2d + torch.nn.GroupNorm, + torch.nn.InstanceNorm1d, + torch.nn.InstanceNorm2d, + torch.nn.InstanceNorm3d, + torch.nn.LayerNorm, + torch.nn.LocalResponseNorm, + ) + params: List[Dict[str, Any]] = [] + memo: Set[torch.nn.parameter.Parameter] = set() + for module_name, module in model.named_modules(): + for module_param_name, value in module.named_parameters(recurse=False): + if not value.requires_grad: + continue + # Avoid duplicating parameters + if value in memo: + continue + memo.add(value) + + hyperparams = copy.copy(defaults) + if isinstance(module, norm_module_types) and weight_decay_norm is not None: + hyperparams["weight_decay"] = weight_decay_norm + if lr_factor_func is not None: + hyperparams["lr"] *= lr_factor_func(f"{module_name}.{module_param_name}") + + hyperparams.update(overrides.get(module_param_name, {})) + params.append({"params": [value], **hyperparams}) + return reduce_param_groups(params) + + +def _expand_param_groups(params: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + # Transform parameter groups into per-parameter structure. + # Later items in `params` can overwrite parameters set in previous items. + ret = defaultdict(dict) + for item in params: + assert "params" in item + cur_params = {x: y for x, y in item.items() if x != "params"} + for param in item["params"]: + ret[param].update({"params": [param], **cur_params}) + return list(ret.values()) + + +def reduce_param_groups(params: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + # Reorganize the parameter groups and merge duplicated groups. + # The number of parameter groups needs to be as small as possible in order + # to efficiently use the PyTorch multi-tensor optimizer. Therefore instead + # of using a parameter_group per single parameter, we reorganize the + # parameter groups and merge duplicated groups. This approach speeds + # up multi-tensor optimizer significantly. + params = _expand_param_groups(params) + groups = defaultdict(list) # re-group all parameter groups by their hyperparams + for item in params: + cur_params = tuple((x, y) for x, y in item.items() if x != "params") + groups[cur_params].extend(item["params"]) + ret = [] + for param_keys, param_values in groups.items(): + cur = {kv[0]: kv[1] for kv in param_keys} + cur["params"] = param_values + ret.append(cur) + return ret + + +def build_lr_scheduler(cfg: CfgNode, optimizer: torch.optim.Optimizer) -> LRScheduler: + """ + Build a LR scheduler from config. + """ + name = cfg.SOLVER.LR_SCHEDULER_NAME + + if name == "WarmupMultiStepLR": + steps = [x for x in cfg.SOLVER.STEPS if x <= cfg.SOLVER.MAX_ITER] + if len(steps) != len(cfg.SOLVER.STEPS): + logger = logging.getLogger(__name__) + logger.warning( + "SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. " + "These values will be ignored." + ) + sched = MultiStepParamScheduler( + values=[cfg.SOLVER.GAMMA**k for k in range(len(steps) + 1)], + milestones=steps, + num_updates=cfg.SOLVER.MAX_ITER, + ) + elif name == "WarmupCosineLR": + end_value = cfg.SOLVER.BASE_LR_END / cfg.SOLVER.BASE_LR + assert end_value >= 0.0 and end_value <= 1.0, end_value + sched = CosineParamScheduler(1, end_value) + elif name == "WarmupStepWithFixedGammaLR": + sched = StepWithFixedGammaParamScheduler( + base_value=1.0, + gamma=cfg.SOLVER.GAMMA, + num_decays=cfg.SOLVER.NUM_DECAYS, + num_updates=cfg.SOLVER.MAX_ITER, + ) + else: + raise ValueError("Unknown LR scheduler: {}".format(name)) + + sched = WarmupParamScheduler( + sched, + cfg.SOLVER.WARMUP_FACTOR, + min(cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER, 1.0), + cfg.SOLVER.WARMUP_METHOD, + cfg.SOLVER.RESCALE_INTERVAL, + ) + return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/lr_scheduler.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..d6aed2bb20c418bf6cc5594c1244b241796d7086 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/solver/lr_scheduler.py @@ -0,0 +1,246 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import math +from bisect import bisect_right +from typing import List +import torch +from fvcore.common.param_scheduler import ( + CompositeParamScheduler, + ConstantParamScheduler, + LinearParamScheduler, + ParamScheduler, +) + +try: + from torch.optim.lr_scheduler import LRScheduler +except ImportError: + from torch.optim.lr_scheduler import _LRScheduler as LRScheduler + +logger = logging.getLogger(__name__) + + +class WarmupParamScheduler(CompositeParamScheduler): + """ + Add an initial warmup stage to another scheduler. + """ + + def __init__( + self, + scheduler: ParamScheduler, + warmup_factor: float, + warmup_length: float, + warmup_method: str = "linear", + rescale_interval: bool = False, + ): + """ + Args: + scheduler: warmup will be added at the beginning of this scheduler + warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001 + warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire + training, e.g. 0.01 + warmup_method: one of "linear" or "constant" + rescale_interval: whether we will rescale the interval of the scheduler after + warmup + """ + end_value = scheduler(warmup_length) # the value to reach when warmup ends + start_value = warmup_factor * scheduler(0.0) + if warmup_method == "constant": + warmup = ConstantParamScheduler(start_value) + elif warmup_method == "linear": + warmup = LinearParamScheduler(start_value, end_value) + else: + raise ValueError("Unknown warmup method: {}".format(warmup_method)) + super().__init__( + [warmup, scheduler], + interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"], + lengths=[warmup_length, 1 - warmup_length], + ) + + +class LRMultiplier(LRScheduler): + """ + A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the + learning rate of each param in the optimizer. + Every step, the learning rate of each parameter becomes its initial value + multiplied by the output of the given :class:`ParamScheduler`. + + The absolute learning rate value of each parameter can be different. + This scheduler can be used as long as the relative scale among them do + not change during training. + + Examples: + :: + LRMultiplier( + opt, + WarmupParamScheduler( + MultiStepParamScheduler( + [1, 0.1, 0.01], + milestones=[60000, 80000], + num_updates=90000, + ), 0.001, 100 / 90000 + ), + max_iter=90000 + ) + """ + + # NOTES: in the most general case, every LR can use its own scheduler. + # Supporting this requires interaction with the optimizer when its parameter + # group is initialized. For example, classyvision implements its own optimizer + # that allows different schedulers for every parameter group. + # To avoid this complexity, we use this class to support the most common cases + # where the relative scale among all LRs stay unchanged during training. In this + # case we only need a total of one scheduler that defines the relative LR multiplier. + + def __init__( + self, + optimizer: torch.optim.Optimizer, + multiplier: ParamScheduler, + max_iter: int, + last_iter: int = -1, + ): + """ + Args: + optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``. + ``last_iter`` is the same as ``last_epoch``. + multiplier: a fvcore ParamScheduler that defines the multiplier on + every LR of the optimizer + max_iter: the total number of training iterations + """ + if not isinstance(multiplier, ParamScheduler): + raise ValueError( + "_LRMultiplier(multiplier=) must be an instance of fvcore " + f"ParamScheduler. Got {multiplier} instead." + ) + self._multiplier = multiplier + self._max_iter = max_iter + super().__init__(optimizer, last_epoch=last_iter) + + def state_dict(self): + # fvcore schedulers are stateless. Only keep pytorch scheduler states + return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch} + + def get_lr(self) -> List[float]: + multiplier = self._multiplier(self.last_epoch / self._max_iter) + return [base_lr * multiplier for base_lr in self.base_lrs] + + +""" +Content below is no longer needed! +""" + +# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes +# only on epoch boundaries. We typically use iteration based schedules instead. +# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean +# "iteration" instead. + +# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating +# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. + + +class WarmupMultiStepLR(LRScheduler): + def __init__( + self, + optimizer: torch.optim.Optimizer, + milestones: List[int], + gamma: float = 0.1, + warmup_factor: float = 0.001, + warmup_iters: int = 1000, + warmup_method: str = "linear", + last_epoch: int = -1, + ): + logger.warning( + "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" + ) + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of" " increasing integers. Got {}", milestones + ) + self.milestones = milestones + self.gamma = gamma + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + warmup_factor = _get_warmup_factor_at_iter( + self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor + ) + return [ + base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) + for base_lr in self.base_lrs + ] + + def _compute_values(self) -> List[float]: + # The new interface + return self.get_lr() + + +class WarmupCosineLR(LRScheduler): + def __init__( + self, + optimizer: torch.optim.Optimizer, + max_iters: int, + warmup_factor: float = 0.001, + warmup_iters: int = 1000, + warmup_method: str = "linear", + last_epoch: int = -1, + ): + logger.warning( + "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" + ) + self.max_iters = max_iters + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + warmup_factor = _get_warmup_factor_at_iter( + self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor + ) + # Different definitions of half-cosine with warmup are possible. For + # simplicity we multiply the standard half-cosine schedule by the warmup + # factor. An alternative is to start the period of the cosine at warmup_iters + # instead of at 0. In the case that warmup_iters << max_iters the two are + # very close to each other. + return [ + base_lr + * warmup_factor + * 0.5 + * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) + for base_lr in self.base_lrs + ] + + def _compute_values(self) -> List[float]: + # The new interface + return self.get_lr() + + +def _get_warmup_factor_at_iter( + method: str, iter: int, warmup_iters: int, warmup_factor: float +) -> float: + """ + Return the learning rate warmup factor at a specific iteration. + See :paper:`ImageNet in 1h` for more details. + + Args: + method (str): warmup method; either "constant" or "linear". + iter (int): iteration at which to calculate the warmup factor. + warmup_iters (int): the number of warmup iterations. + warmup_factor (float): the base warmup factor (the meaning changes according + to the method used). + + Returns: + float: the effective warmup factor at the given iteration. + """ + if iter >= warmup_iters: + return 1.0 + + if method == "constant": + return warmup_factor + elif method == "linear": + alpha = iter / warmup_iters + return warmup_factor * (1 - alpha) + alpha + else: + raise ValueError("Unknown warmup method: {}".format(method)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2942fc58e3fce82e690eafc2de0204816e94cc2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa, pairwise_point_box_distance +from .image_list import ImageList + +from .instances import Instances +from .keypoints import Keypoints, heatmaps_to_keypoints +from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, ROIMasks +from .rotated_boxes import RotatedBoxes +from .rotated_boxes import pairwise_iou as pairwise_iou_rotated + +__all__ = [k for k in globals().keys() if not k.startswith("_")] + + +from annotator.oneformer.detectron2.utils.env import fixup_module_metadata + +fixup_module_metadata(__name__, globals(), __all__) +del fixup_module_metadata diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/boxes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..fd396f68645db1d6946056eed868ffcc02cd7a22 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/boxes.py @@ -0,0 +1,425 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +import numpy as np +from enum import IntEnum, unique +from typing import List, Tuple, Union +import torch +from torch import device + +_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] + + +@unique +class BoxMode(IntEnum): + """ + Enum of different ways to represent a box. + """ + + XYXY_ABS = 0 + """ + (x0, y0, x1, y1) in absolute floating points coordinates. + The coordinates in range [0, width or height]. + """ + XYWH_ABS = 1 + """ + (x0, y0, w, h) in absolute floating points coordinates. + """ + XYXY_REL = 2 + """ + Not yet supported! + (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. + """ + XYWH_REL = 3 + """ + Not yet supported! + (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. + """ + XYWHA_ABS = 4 + """ + (xc, yc, w, h, a) in absolute floating points coordinates. + (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. + """ + + @staticmethod + def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType: + """ + Args: + box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 + from_mode, to_mode (BoxMode) + + Returns: + The converted box of the same type. + """ + if from_mode == to_mode: + return box + + original_type = type(box) + is_numpy = isinstance(box, np.ndarray) + single_box = isinstance(box, (list, tuple)) + if single_box: + assert len(box) == 4 or len(box) == 5, ( + "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," + " where k == 4 or 5" + ) + arr = torch.tensor(box)[None, :] + else: + # avoid modifying the input box + if is_numpy: + arr = torch.from_numpy(np.asarray(box)).clone() + else: + arr = box.clone() + + assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ], "Relative mode not yet supported!" + + if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: + assert ( + arr.shape[-1] == 5 + ), "The last dimension of input shape must be 5 for XYWHA format" + original_dtype = arr.dtype + arr = arr.double() + + w = arr[:, 2] + h = arr[:, 3] + a = arr[:, 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + new_w = c * w + s * h + new_h = c * h + s * w + + # convert center to top-left corner + arr[:, 0] -= new_w / 2.0 + arr[:, 1] -= new_h / 2.0 + # bottom-right corner + arr[:, 2] = arr[:, 0] + new_w + arr[:, 3] = arr[:, 1] + new_h + + arr = arr[:, :4].to(dtype=original_dtype) + elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: + original_dtype = arr.dtype + arr = arr.double() + arr[:, 0] += arr[:, 2] / 2.0 + arr[:, 1] += arr[:, 3] / 2.0 + angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) + arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) + else: + if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: + arr[:, 2] += arr[:, 0] + arr[:, 3] += arr[:, 1] + elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: + arr[:, 2] -= arr[:, 0] + arr[:, 3] -= arr[:, 1] + else: + raise NotImplementedError( + "Conversion from BoxMode {} to {} is not supported yet".format( + from_mode, to_mode + ) + ) + + if single_box: + return original_type(arr.flatten().tolist()) + if is_numpy: + return arr.numpy() + else: + return arr + + +class Boxes: + """ + This structure stores a list of boxes as a Nx4 torch.Tensor. + It supports some common methods about boxes + (`area`, `clip`, `nonempty`, etc), + and also behaves like a Tensor + (support indexing, `to(device)`, `.device`, and iteration over all boxes) + + Attributes: + tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2). + """ + + def __init__(self, tensor: torch.Tensor): + """ + Args: + tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2). + """ + if not isinstance(tensor, torch.Tensor): + tensor = torch.as_tensor(tensor, dtype=torch.float32, device=torch.device("cpu")) + else: + tensor = tensor.to(torch.float32) + if tensor.numel() == 0: + # Use reshape, so we don't end up creating a new tensor that does not depend on + # the inputs (and consequently confuses jit) + tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32) + assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size() + + self.tensor = tensor + + def clone(self) -> "Boxes": + """ + Clone the Boxes. + + Returns: + Boxes + """ + return Boxes(self.tensor.clone()) + + def to(self, device: torch.device): + # Boxes are assumed float32 and does not support to(dtype) + return Boxes(self.tensor.to(device=device)) + + def area(self) -> torch.Tensor: + """ + Computes the area of all the boxes. + + Returns: + torch.Tensor: a vector with areas of each box. + """ + box = self.tensor + area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) + return area + + def clip(self, box_size: Tuple[int, int]) -> None: + """ + Clip (in place) the boxes by limiting x coordinates to the range [0, width] + and y coordinates to the range [0, height]. + + Args: + box_size (height, width): The clipping box's size. + """ + assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!" + h, w = box_size + x1 = self.tensor[:, 0].clamp(min=0, max=w) + y1 = self.tensor[:, 1].clamp(min=0, max=h) + x2 = self.tensor[:, 2].clamp(min=0, max=w) + y2 = self.tensor[:, 3].clamp(min=0, max=h) + self.tensor = torch.stack((x1, y1, x2, y2), dim=-1) + + def nonempty(self, threshold: float = 0.0) -> torch.Tensor: + """ + Find boxes that are non-empty. + A box is considered empty, if either of its side is no larger than threshold. + + Returns: + Tensor: + a binary vector which represents whether each box is empty + (False) or non-empty (True). + """ + box = self.tensor + widths = box[:, 2] - box[:, 0] + heights = box[:, 3] - box[:, 1] + keep = (widths > threshold) & (heights > threshold) + return keep + + def __getitem__(self, item) -> "Boxes": + """ + Args: + item: int, slice, or a BoolTensor + + Returns: + Boxes: Create a new :class:`Boxes` by indexing. + + The following usage are allowed: + + 1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box. + 2. `new_boxes = boxes[2:10]`: return a slice of boxes. + 3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor + with `length = len(boxes)`. Nonzero elements in the vector will be selected. + + Note that the returned Boxes might share storage with this Boxes, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return Boxes(self.tensor[item].view(1, -1)) + b = self.tensor[item] + assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item) + return Boxes(b) + + def __len__(self) -> int: + return self.tensor.shape[0] + + def __repr__(self) -> str: + return "Boxes(" + str(self.tensor) + ")" + + def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: + """ + Args: + box_size (height, width): Size of the reference box. + boundary_threshold (int): Boxes that extend beyond the reference box + boundary by more than boundary_threshold are considered "outside". + + Returns: + a binary vector, indicating whether each box is inside the reference box. + """ + height, width = box_size + inds_inside = ( + (self.tensor[..., 0] >= -boundary_threshold) + & (self.tensor[..., 1] >= -boundary_threshold) + & (self.tensor[..., 2] < width + boundary_threshold) + & (self.tensor[..., 3] < height + boundary_threshold) + ) + return inds_inside + + def get_centers(self) -> torch.Tensor: + """ + Returns: + The box centers in a Nx2 array of (x, y). + """ + return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2 + + def scale(self, scale_x: float, scale_y: float) -> None: + """ + Scale the box with horizontal and vertical scaling factors + """ + self.tensor[:, 0::2] *= scale_x + self.tensor[:, 1::2] *= scale_y + + @classmethod + def cat(cls, boxes_list: List["Boxes"]) -> "Boxes": + """ + Concatenates a list of Boxes into a single Boxes + + Arguments: + boxes_list (list[Boxes]) + + Returns: + Boxes: the concatenated Boxes + """ + assert isinstance(boxes_list, (list, tuple)) + if len(boxes_list) == 0: + return cls(torch.empty(0)) + assert all([isinstance(box, Boxes) for box in boxes_list]) + + # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input + cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) + return cat_boxes + + @property + def device(self) -> device: + return self.tensor.device + + # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript + # https://github.com/pytorch/pytorch/issues/18627 + @torch.jit.unused + def __iter__(self): + """ + Yield a box as a Tensor of shape (4,) at a time. + """ + yield from self.tensor + + +def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Given two lists of boxes of size N and M, + compute the intersection area between __all__ N x M pairs of boxes. + The box order must be (xmin, ymin, xmax, ymax) + + Args: + boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. + + Returns: + Tensor: intersection, sized [N,M]. + """ + boxes1, boxes2 = boxes1.tensor, boxes2.tensor + width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max( + boxes1[:, None, :2], boxes2[:, :2] + ) # [N,M,2] + + width_height.clamp_(min=0) # [N,M,2] + intersection = width_height.prod(dim=2) # [N,M] + return intersection + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Given two lists of boxes of size N and M, compute the IoU + (intersection over union) between **all** N x M pairs of boxes. + The box order must be (xmin, ymin, xmax, ymax). + + Args: + boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. + + Returns: + Tensor: IoU, sized [N,M]. + """ + area1 = boxes1.area() # [N] + area2 = boxes2.area() # [M] + inter = pairwise_intersection(boxes1, boxes2) + + # handle empty boxes + iou = torch.where( + inter > 0, + inter / (area1[:, None] + area2 - inter), + torch.zeros(1, dtype=inter.dtype, device=inter.device), + ) + return iou + + +def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area). + + Args: + boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. + + Returns: + Tensor: IoA, sized [N,M]. + """ + area2 = boxes2.area() # [M] + inter = pairwise_intersection(boxes1, boxes2) + + # handle empty boxes + ioa = torch.where( + inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device) + ) + return ioa + + +def pairwise_point_box_distance(points: torch.Tensor, boxes: Boxes): + """ + Pairwise distance between N points and M boxes. The distance between a + point and a box is represented by the distance from the point to 4 edges + of the box. Distances are all positive when the point is inside the box. + + Args: + points: Nx2 coordinates. Each row is (x, y) + boxes: M boxes + + Returns: + Tensor: distances of size (N, M, 4). The 4 values are distances from + the point to the left, top, right, bottom of the box. + """ + x, y = points.unsqueeze(dim=2).unbind(dim=1) # (N, 1) + x0, y0, x1, y1 = boxes.tensor.unsqueeze(dim=0).unbind(dim=2) # (1, M) + return torch.stack([x - x0, y - y0, x1 - x, y1 - y], dim=2) + + +def matched_pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Compute pairwise intersection over union (IOU) of two sets of matched + boxes that have the same number of boxes. + Similar to :func:`pairwise_iou`, but computes only diagonal elements of the matrix. + + Args: + boxes1 (Boxes): bounding boxes, sized [N,4]. + boxes2 (Boxes): same length as boxes1 + Returns: + Tensor: iou, sized [N]. + """ + assert len(boxes1) == len( + boxes2 + ), "boxlists should have the same" "number of entries, got {}, {}".format( + len(boxes1), len(boxes2) + ) + area1 = boxes1.area() # [N] + area2 = boxes2.area() # [N] + box1, box2 = boxes1.tensor, boxes2.tensor + lt = torch.max(box1[:, :2], box2[:, :2]) # [N,2] + rb = torch.min(box1[:, 2:], box2[:, 2:]) # [N,2] + wh = (rb - lt).clamp(min=0) # [N,2] + inter = wh[:, 0] * wh[:, 1] # [N] + iou = inter / (area1 + area2 - inter) # [N] + return iou diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/image_list.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/image_list.py new file mode 100644 index 0000000000000000000000000000000000000000..86c8b9512a5fd8abda7fdf058a63b19f809e46f6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/image_list.py @@ -0,0 +1,129 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from __future__ import division +from typing import Any, Dict, List, Optional, Tuple +import torch +from torch import device +from torch.nn import functional as F + +from annotator.oneformer.detectron2.layers.wrappers import move_device_like, shapes_to_tensor + + +class ImageList(object): + """ + Structure that holds a list of images (of possibly + varying sizes) as a single tensor. + This works by padding the images to the same size. + The original sizes of each image is stored in `image_sizes`. + + Attributes: + image_sizes (list[tuple[int, int]]): each tuple is (h, w). + During tracing, it becomes list[Tensor] instead. + """ + + def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): + """ + Arguments: + tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 + image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can + be smaller than (H, W) due to padding. + """ + self.tensor = tensor + self.image_sizes = image_sizes + + def __len__(self) -> int: + return len(self.image_sizes) + + def __getitem__(self, idx) -> torch.Tensor: + """ + Access the individual image in its original size. + + Args: + idx: int or slice + + Returns: + Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 + """ + size = self.image_sizes[idx] + return self.tensor[idx, ..., : size[0], : size[1]] + + @torch.jit.unused + def to(self, *args: Any, **kwargs: Any) -> "ImageList": + cast_tensor = self.tensor.to(*args, **kwargs) + return ImageList(cast_tensor, self.image_sizes) + + @property + def device(self) -> device: + return self.tensor.device + + @staticmethod + def from_tensors( + tensors: List[torch.Tensor], + size_divisibility: int = 0, + pad_value: float = 0.0, + padding_constraints: Optional[Dict[str, int]] = None, + ) -> "ImageList": + """ + Args: + tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or + (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded + to the same shape with `pad_value`. + size_divisibility (int): If `size_divisibility > 0`, add padding to ensure + the common height and width is divisible by `size_divisibility`. + This depends on the model and many models need a divisibility of 32. + pad_value (float): value to pad. + padding_constraints (optional[Dict]): If given, it would follow the format as + {"size_divisibility": int, "square_size": int}, where `size_divisibility` will + overwrite the above one if presented and `square_size` indicates the + square padding size if `square_size` > 0. + Returns: + an `ImageList`. + """ + assert len(tensors) > 0 + assert isinstance(tensors, (tuple, list)) + for t in tensors: + assert isinstance(t, torch.Tensor), type(t) + assert t.shape[:-2] == tensors[0].shape[:-2], t.shape + + image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] + image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes] + max_size = torch.stack(image_sizes_tensor).max(0).values + + if padding_constraints is not None: + square_size = padding_constraints.get("square_size", 0) + if square_size > 0: + # pad to square. + max_size[0] = max_size[1] = square_size + if "size_divisibility" in padding_constraints: + size_divisibility = padding_constraints["size_divisibility"] + if size_divisibility > 1: + stride = size_divisibility + # the last two dims are H,W, both subject to divisibility requirement + max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride + + # handle weirdness of scripting and tracing ... + if torch.jit.is_scripting(): + max_size: List[int] = max_size.to(dtype=torch.long).tolist() + else: + if torch.jit.is_tracing(): + image_sizes = image_sizes_tensor + + if len(tensors) == 1: + # This seems slightly (2%) faster. + # TODO: check whether it's faster for multiple images as well + image_size = image_sizes[0] + padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] + batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0) + else: + # max_size can be a tensor in tracing mode, therefore convert to list + batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size) + device = ( + None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None) + ) + batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device) + batched_imgs = move_device_like(batched_imgs, tensors[0]) + for i, img in enumerate(tensors): + # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)` + # Tracing mode cannot capture `copy_()` of temporary locals + batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img) + + return ImageList(batched_imgs.contiguous(), image_sizes) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/instances.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/instances.py new file mode 100644 index 0000000000000000000000000000000000000000..c9579bce2730f42e256c6eed99d9014d09304c99 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/instances.py @@ -0,0 +1,194 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import itertools +import warnings +from typing import Any, Dict, List, Tuple, Union +import torch + + +class Instances: + """ + This class represents a list of instances in an image. + It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". + All fields must have the same ``__len__`` which is the number of instances. + + All other (non-field) attributes of this class are considered private: + they must start with '_' and are not modifiable by a user. + + Some basic usage: + + 1. Set/get/check a field: + + .. code-block:: python + + instances.gt_boxes = Boxes(...) + print(instances.pred_masks) # a tensor of shape (N, H, W) + print('gt_masks' in instances) + + 2. ``len(instances)`` returns the number of instances + 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields + and returns a new :class:`Instances`. + Typically, ``indices`` is a integer vector of indices, + or a binary mask of length ``num_instances`` + + .. code-block:: python + + category_3_detections = instances[instances.pred_classes == 3] + confident_detections = instances[instances.scores > 0.9] + """ + + def __init__(self, image_size: Tuple[int, int], **kwargs: Any): + """ + Args: + image_size (height, width): the spatial size of the image. + kwargs: fields to add to this `Instances`. + """ + self._image_size = image_size + self._fields: Dict[str, Any] = {} + for k, v in kwargs.items(): + self.set(k, v) + + @property + def image_size(self) -> Tuple[int, int]: + """ + Returns: + tuple: height, width + """ + return self._image_size + + def __setattr__(self, name: str, val: Any) -> None: + if name.startswith("_"): + super().__setattr__(name, val) + else: + self.set(name, val) + + def __getattr__(self, name: str) -> Any: + if name == "_fields" or name not in self._fields: + raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) + return self._fields[name] + + def set(self, name: str, value: Any) -> None: + """ + Set the field named `name` to `value`. + The length of `value` must be the number of instances, + and must agree with other existing fields in this object. + """ + with warnings.catch_warnings(record=True): + data_len = len(value) + if len(self._fields): + assert ( + len(self) == data_len + ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) + self._fields[name] = value + + def has(self, name: str) -> bool: + """ + Returns: + bool: whether the field called `name` exists. + """ + return name in self._fields + + def remove(self, name: str) -> None: + """ + Remove the field called `name`. + """ + del self._fields[name] + + def get(self, name: str) -> Any: + """ + Returns the field called `name`. + """ + return self._fields[name] + + def get_fields(self) -> Dict[str, Any]: + """ + Returns: + dict: a dict which maps names (str) to data of the fields + + Modifying the returned dict will modify this instance. + """ + return self._fields + + # Tensor-like methods + def to(self, *args: Any, **kwargs: Any) -> "Instances": + """ + Returns: + Instances: all fields are called with a `to(device)`, if the field has this method. + """ + ret = Instances(self._image_size) + for k, v in self._fields.items(): + if hasattr(v, "to"): + v = v.to(*args, **kwargs) + ret.set(k, v) + return ret + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": + """ + Args: + item: an index-like object and will be used to index all the fields. + + Returns: + If `item` is a string, return the data in the corresponding field. + Otherwise, returns an `Instances` where all fields are indexed by `item`. + """ + if type(item) == int: + if item >= len(self) or item < -len(self): + raise IndexError("Instances index out of range!") + else: + item = slice(item, None, len(self)) + + ret = Instances(self._image_size) + for k, v in self._fields.items(): + ret.set(k, v[item]) + return ret + + def __len__(self) -> int: + for v in self._fields.values(): + # use __len__ because len() has to be int and is not friendly to tracing + return v.__len__() + raise NotImplementedError("Empty Instances does not support __len__!") + + def __iter__(self): + raise NotImplementedError("`Instances` object is not iterable!") + + @staticmethod + def cat(instance_lists: List["Instances"]) -> "Instances": + """ + Args: + instance_lists (list[Instances]) + + Returns: + Instances + """ + assert all(isinstance(i, Instances) for i in instance_lists) + assert len(instance_lists) > 0 + if len(instance_lists) == 1: + return instance_lists[0] + + image_size = instance_lists[0].image_size + if not isinstance(image_size, torch.Tensor): # could be a tensor in tracing + for i in instance_lists[1:]: + assert i.image_size == image_size + ret = Instances(image_size) + for k in instance_lists[0]._fields.keys(): + values = [i.get(k) for i in instance_lists] + v0 = values[0] + if isinstance(v0, torch.Tensor): + values = torch.cat(values, dim=0) + elif isinstance(v0, list): + values = list(itertools.chain(*values)) + elif hasattr(type(v0), "cat"): + values = type(v0).cat(values) + else: + raise ValueError("Unsupported type {} for concatenation".format(type(v0))) + ret.set(k, values) + return ret + + def __str__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={}, ".format(len(self)) + s += "image_height={}, ".format(self._image_size[0]) + s += "image_width={}, ".format(self._image_size[1]) + s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) + return s + + __repr__ = __str__ diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/keypoints.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/keypoints.py new file mode 100644 index 0000000000000000000000000000000000000000..b93ebed4f6554e67ba9bde8d3af90e8dbb3246b6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/keypoints.py @@ -0,0 +1,235 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from typing import Any, List, Tuple, Union +import torch +from torch.nn import functional as F + + +class Keypoints: + """ + Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property + containing the x,y location and visibility flag of each keypoint. This tensor has shape + (N, K, 3) where N is the number of instances and K is the number of keypoints per instance. + + The visibility flag follows the COCO format and must be one of three integers: + + * v=0: not labeled (in which case x=y=0) + * v=1: labeled but not visible + * v=2: labeled and visible + """ + + def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]): + """ + Arguments: + keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint. + The shape should be (N, K, 3) where N is the number of + instances, and K is the number of keypoints per instance. + """ + device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu") + keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device) + assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape + self.tensor = keypoints + + def __len__(self) -> int: + return self.tensor.size(0) + + def to(self, *args: Any, **kwargs: Any) -> "Keypoints": + return type(self)(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor: + """ + Convert keypoint annotations to a heatmap of one-hot labels for training, + as described in :paper:`Mask R-CNN`. + + Arguments: + boxes: Nx4 tensor, the boxes to draw the keypoints to + + Returns: + heatmaps: + A tensor of shape (N, K), each element is integer spatial label + in the range [0, heatmap_size**2 - 1] for each keypoint in the input. + valid: + A tensor of shape (N, K) containing whether each keypoint is in the roi or not. + """ + return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size) + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints": + """ + Create a new `Keypoints` by indexing on this `Keypoints`. + + The following usage are allowed: + + 1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance. + 2. `new_kpts = kpts[2:10]`: return a slice of key points. + 3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor + with `length = len(kpts)`. Nonzero elements in the vector will be selected. + + Note that the returned Keypoints might share storage with this Keypoints, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return Keypoints([self.tensor[item]]) + return Keypoints(self.tensor[item]) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + @staticmethod + def cat(keypoints_list: List["Keypoints"]) -> "Keypoints": + """ + Concatenates a list of Keypoints into a single Keypoints + + Arguments: + keypoints_list (list[Keypoints]) + + Returns: + Keypoints: the concatenated Keypoints + """ + assert isinstance(keypoints_list, (list, tuple)) + assert len(keypoints_list) > 0 + assert all(isinstance(keypoints, Keypoints) for keypoints in keypoints_list) + + cat_kpts = type(keypoints_list[0])( + torch.cat([kpts.tensor for kpts in keypoints_list], dim=0) + ) + return cat_kpts + + +# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop) +def _keypoints_to_heatmap( + keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space. + + Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the + closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the + continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"): + d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. + + Arguments: + keypoints: tensor of keypoint locations in of shape (N, K, 3). + rois: Nx4 tensor of rois in xyxy format + heatmap_size: integer side length of square heatmap. + + Returns: + heatmaps: A tensor of shape (N, K) containing an integer spatial label + in the range [0, heatmap_size**2 - 1] for each keypoint in the input. + valid: A tensor of shape (N, K) containing whether each keypoint is in + the roi or not. + """ + + if rois.numel() == 0: + return rois.new().long(), rois.new().long() + offset_x = rois[:, 0] + offset_y = rois[:, 1] + scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) + scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) + + offset_x = offset_x[:, None] + offset_y = offset_y[:, None] + scale_x = scale_x[:, None] + scale_y = scale_y[:, None] + + x = keypoints[..., 0] + y = keypoints[..., 1] + + x_boundary_inds = x == rois[:, 2][:, None] + y_boundary_inds = y == rois[:, 3][:, None] + + x = (x - offset_x) * scale_x + x = x.floor().long() + y = (y - offset_y) * scale_y + y = y.floor().long() + + x[x_boundary_inds] = heatmap_size - 1 + y[y_boundary_inds] = heatmap_size - 1 + + valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) + vis = keypoints[..., 2] > 0 + valid = (valid_loc & vis).long() + + lin_ind = y * heatmap_size + x + heatmaps = lin_ind * valid + + return heatmaps, valid + + +@torch.jit.script_if_tracing +def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: + """ + Extract predicted keypoint locations from heatmaps. + + Args: + maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for + each ROI and each keypoint. + rois (Tensor): (#ROIs, 4). The box of each ROI. + + Returns: + Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to + (x, y, logit, score) for each keypoint. + + When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate, + we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from + Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. + """ + + offset_x = rois[:, 0] + offset_y = rois[:, 1] + + widths = (rois[:, 2] - rois[:, 0]).clamp(min=1) + heights = (rois[:, 3] - rois[:, 1]).clamp(min=1) + widths_ceil = widths.ceil() + heights_ceil = heights.ceil() + + num_rois, num_keypoints = maps.shape[:2] + xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4) + + width_corrections = widths / widths_ceil + height_corrections = heights / heights_ceil + + keypoints_idx = torch.arange(num_keypoints, device=maps.device) + + for i in range(num_rois): + outsize = (int(heights_ceil[i]), int(widths_ceil[i])) + roi_map = F.interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False) + + # Although semantically equivalent, `reshape` is used instead of `squeeze` due + # to limitation during ONNX export of `squeeze` in scripting mode + roi_map = roi_map.reshape(roi_map.shape[1:]) # keypoints x H x W + + # softmax over the spatial region + max_score, _ = roi_map.view(num_keypoints, -1).max(1) + max_score = max_score.view(num_keypoints, 1, 1) + tmp_full_resolution = (roi_map - max_score).exp_() + tmp_pool_resolution = (maps[i] - max_score).exp_() + # Produce scores over the region H x W, but normalize with POOL_H x POOL_W, + # so that the scores of objects of different absolute sizes will be more comparable + roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True) + + w = roi_map.shape[2] + pos = roi_map.view(num_keypoints, -1).argmax(1) + + x_int = pos % w + y_int = (pos - x_int) // w + + assert ( + roi_map_scores[keypoints_idx, y_int, x_int] + == roi_map_scores.view(num_keypoints, -1).max(1)[0] + ).all() + + x = (x_int.float() + 0.5) * width_corrections[i] + y = (y_int.float() + 0.5) * height_corrections[i] + + xy_preds[i, :, 0] = x + offset_x[i] + xy_preds[i, :, 1] = y + offset_y[i] + xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int] + xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int] + + return xy_preds diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/masks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/masks.py new file mode 100644 index 0000000000000000000000000000000000000000..995fee72a6d6190c9596a4bf62dc335766b954ee --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/masks.py @@ -0,0 +1,534 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import itertools +import numpy as np +from typing import Any, Iterator, List, Union +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from torch import device + +from annotator.oneformer.detectron2.layers.roi_align import ROIAlign +from annotator.oneformer.detectron2.utils.memory import retry_if_cuda_oom + +from .boxes import Boxes + + +def polygon_area(x, y): + # Using the shoelace formula + # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) + + +def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: + """ + Args: + polygons (list[ndarray]): each array has shape (Nx2,) + height, width (int) + + Returns: + ndarray: a bool mask of shape (height, width) + """ + if len(polygons) == 0: + # COCOAPI does not support empty polygons + return np.zeros((height, width)).astype(bool) + rles = mask_util.frPyObjects(polygons, height, width) + rle = mask_util.merge(rles) + return mask_util.decode(rle).astype(bool) + + +def rasterize_polygons_within_box( + polygons: List[np.ndarray], box: np.ndarray, mask_size: int +) -> torch.Tensor: + """ + Rasterize the polygons into a mask image and + crop the mask content in the given box. + The cropped mask is resized to (mask_size, mask_size). + + This function is used when generating training targets for mask head in Mask R-CNN. + Given original ground-truth masks for an image, new ground-truth mask + training targets in the size of `mask_size x mask_size` + must be provided for each predicted box. This function will be called to + produce such targets. + + Args: + polygons (list[ndarray[float]]): a list of polygons, which represents an instance. + box: 4-element numpy array + mask_size (int): + + Returns: + Tensor: BoolTensor of shape (mask_size, mask_size) + """ + # 1. Shift the polygons w.r.t the boxes + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(polygons) + for p in polygons: + p[0::2] = p[0::2] - box[0] + p[1::2] = p[1::2] - box[1] + + # 2. Rescale the polygons to the new box size + # max() to avoid division by small number + ratio_h = mask_size / max(h, 0.1) + ratio_w = mask_size / max(w, 0.1) + + if ratio_h == ratio_w: + for p in polygons: + p *= ratio_h + else: + for p in polygons: + p[0::2] *= ratio_w + p[1::2] *= ratio_h + + # 3. Rasterize the polygons with coco api + mask = polygons_to_bitmask(polygons, mask_size, mask_size) + mask = torch.from_numpy(mask) + return mask + + +class BitMasks: + """ + This class stores the segmentation masks for all objects in one image, in + the form of bitmaps. + + Attributes: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + + def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): + """ + Args: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + if isinstance(tensor, torch.Tensor): + tensor = tensor.to(torch.bool) + else: + tensor = torch.as_tensor(tensor, dtype=torch.bool, device=torch.device("cpu")) + assert tensor.dim() == 3, tensor.size() + self.image_size = tensor.shape[1:] + self.tensor = tensor + + @torch.jit.unused + def to(self, *args: Any, **kwargs: Any) -> "BitMasks": + return BitMasks(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + @torch.jit.unused + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": + """ + Returns: + BitMasks: Create a new :class:`BitMasks` by indexing. + + The following usage are allowed: + + 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. + 2. `new_masks = masks[2:10]`: return a slice of masks. + 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor + with `length = len(masks)`. Nonzero elements in the vector will be selected. + + Note that the returned object might share storage with this object, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return BitMasks(self.tensor[item].unsqueeze(0)) + m = self.tensor[item] + assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( + item, m.shape + ) + return BitMasks(m) + + @torch.jit.unused + def __iter__(self) -> torch.Tensor: + yield from self.tensor + + @torch.jit.unused + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + def __len__(self) -> int: + return self.tensor.shape[0] + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: a BoolTensor which represents + whether each mask is empty (False) or non-empty (True). + """ + return self.tensor.flatten(1).any(dim=1) + + @staticmethod + def from_polygon_masks( + polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int + ) -> "BitMasks": + """ + Args: + polygon_masks (list[list[ndarray]] or PolygonMasks) + height, width (int) + """ + if isinstance(polygon_masks, PolygonMasks): + polygon_masks = polygon_masks.polygons + masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] + if len(masks): + return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) + else: + return BitMasks(torch.empty(0, height, width, dtype=torch.bool)) + + @staticmethod + def from_roi_masks(roi_masks: "ROIMasks", height: int, width: int) -> "BitMasks": + """ + Args: + roi_masks: + height, width (int): + """ + return roi_masks.to_bitmasks(height, width) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each bitmask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + It has less reconstruction error compared to rasterization with polygons. + However we observe no difference in accuracy, + but BitMasks requires more memory to store all the masks. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: + A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + device = self.tensor.device + + batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] + rois = torch.cat([batch_inds, boxes], dim=1) # Nx5 + + bit_masks = self.tensor.to(dtype=torch.float32) + rois = rois.to(device=device) + output = ( + ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) + .forward(bit_masks[:, None, :, :], rois) + .squeeze(1) + ) + output = output >= 0.5 + return output + + def get_bounding_boxes(self) -> Boxes: + """ + Returns: + Boxes: tight bounding boxes around bitmasks. + If a mask is empty, it's bounding box will be all zero. + """ + boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32) + x_any = torch.any(self.tensor, dim=1) + y_any = torch.any(self.tensor, dim=2) + for idx in range(self.tensor.shape[0]): + x = torch.where(x_any[idx, :])[0] + y = torch.where(y_any[idx, :])[0] + if len(x) > 0 and len(y) > 0: + boxes[idx, :] = torch.as_tensor( + [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32 + ) + return Boxes(boxes) + + @staticmethod + def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": + """ + Concatenates a list of BitMasks into a single BitMasks + + Arguments: + bitmasks_list (list[BitMasks]) + + Returns: + BitMasks: the concatenated BitMasks + """ + assert isinstance(bitmasks_list, (list, tuple)) + assert len(bitmasks_list) > 0 + assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) + + cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) + return cat_bitmasks + + +class PolygonMasks: + """ + This class stores the segmentation masks for all objects in one image, in the form of polygons. + + Attributes: + polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. + """ + + def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): + """ + Arguments: + polygons (list[list[np.ndarray]]): The first + level of the list correspond to individual instances, + the second level to all the polygons that compose the + instance, and the third level to the polygon coordinates. + The third level array should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + """ + if not isinstance(polygons, list): + raise ValueError( + "Cannot create PolygonMasks: Expect a list of list of polygons per image. " + "Got '{}' instead.".format(type(polygons)) + ) + + def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: + # Use float64 for higher precision, because why not? + # Always put polygons on CPU (self.to is a no-op) since they + # are supposed to be small tensors. + # May need to change this assumption if GPU placement becomes useful + if isinstance(t, torch.Tensor): + t = t.cpu().numpy() + return np.asarray(t).astype("float64") + + def process_polygons( + polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] + ) -> List[np.ndarray]: + if not isinstance(polygons_per_instance, list): + raise ValueError( + "Cannot create polygons: Expect a list of polygons per instance. " + "Got '{}' instead.".format(type(polygons_per_instance)) + ) + # transform each polygon to a numpy array + polygons_per_instance = [_make_array(p) for p in polygons_per_instance] + for polygon in polygons_per_instance: + if len(polygon) % 2 != 0 or len(polygon) < 6: + raise ValueError(f"Cannot create a polygon from {len(polygon)} coordinates.") + return polygons_per_instance + + self.polygons: List[List[np.ndarray]] = [ + process_polygons(polygons_per_instance) for polygons_per_instance in polygons + ] + + def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": + return self + + @property + def device(self) -> torch.device: + return torch.device("cpu") + + def get_bounding_boxes(self) -> Boxes: + """ + Returns: + Boxes: tight bounding boxes around polygon masks. + """ + boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) + for idx, polygons_per_instance in enumerate(self.polygons): + minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) + maxxy = torch.zeros(2, dtype=torch.float32) + for polygon in polygons_per_instance: + coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) + minxy = torch.min(minxy, torch.min(coords, dim=0).values) + maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) + boxes[idx, :2] = minxy + boxes[idx, 2:] = maxxy + return Boxes(boxes) + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: + a BoolTensor which represents whether each mask is empty (False) or not (True). + """ + keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] + return torch.from_numpy(np.asarray(keep, dtype=bool)) + + def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": + """ + Support indexing over the instances and return a `PolygonMasks` object. + `item` can be: + + 1. An integer. It will return an object with only one instance. + 2. A slice. It will return an object with the selected instances. + 3. A list[int]. It will return an object with the selected instances, + correpsonding to the indices in the list. + 4. A vector mask of type BoolTensor, whose length is num_instances. + It will return an object with the instances whose mask is nonzero. + """ + if isinstance(item, int): + selected_polygons = [self.polygons[item]] + elif isinstance(item, slice): + selected_polygons = self.polygons[item] + elif isinstance(item, list): + selected_polygons = [self.polygons[i] for i in item] + elif isinstance(item, torch.Tensor): + # Polygons is a list, so we have to move the indices back to CPU. + if item.dtype == torch.bool: + assert item.dim() == 1, item.shape + item = item.nonzero().squeeze(1).cpu().numpy().tolist() + elif item.dtype in [torch.int32, torch.int64]: + item = item.cpu().numpy().tolist() + else: + raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) + selected_polygons = [self.polygons[i] for i in item] + return PolygonMasks(selected_polygons) + + def __iter__(self) -> Iterator[List[np.ndarray]]: + """ + Yields: + list[ndarray]: the polygons for one instance. + Each Tensor is a float64 vector representing a polygon. + """ + return iter(self.polygons) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.polygons)) + return s + + def __len__(self) -> int: + return len(self.polygons) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each mask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + + device = boxes.device + # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise + # (several small tensors for representing a single instance mask) + boxes = boxes.to(torch.device("cpu")) + + results = [ + rasterize_polygons_within_box(poly, box.numpy(), mask_size) + for poly, box in zip(self.polygons, boxes) + ] + """ + poly: list[list[float]], the polygons for one instance + box: a tensor of shape (4,) + """ + if len(results) == 0: + return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) + return torch.stack(results, dim=0).to(device=device) + + def area(self): + """ + Computes area of the mask. + Only works with Polygons, using the shoelace formula: + https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + + Returns: + Tensor: a vector, area for each instance + """ + + area = [] + for polygons_per_instance in self.polygons: + area_per_instance = 0 + for p in polygons_per_instance: + area_per_instance += polygon_area(p[0::2], p[1::2]) + area.append(area_per_instance) + + return torch.tensor(area) + + @staticmethod + def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": + """ + Concatenates a list of PolygonMasks into a single PolygonMasks + + Arguments: + polymasks_list (list[PolygonMasks]) + + Returns: + PolygonMasks: the concatenated PolygonMasks + """ + assert isinstance(polymasks_list, (list, tuple)) + assert len(polymasks_list) > 0 + assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) + + cat_polymasks = type(polymasks_list[0])( + list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) + ) + return cat_polymasks + + +class ROIMasks: + """ + Represent masks by N smaller masks defined in some ROIs. Once ROI boxes are given, + full-image bitmask can be obtained by "pasting" the mask on the region defined + by the corresponding ROI box. + """ + + def __init__(self, tensor: torch.Tensor): + """ + Args: + tensor: (N, M, M) mask tensor that defines the mask within each ROI. + """ + if tensor.dim() != 3: + raise ValueError("ROIMasks must take a masks of 3 dimension.") + self.tensor = tensor + + def to(self, device: torch.device) -> "ROIMasks": + return ROIMasks(self.tensor.to(device)) + + @property + def device(self) -> device: + return self.tensor.device + + def __len__(self): + return self.tensor.shape[0] + + def __getitem__(self, item) -> "ROIMasks": + """ + Returns: + ROIMasks: Create a new :class:`ROIMasks` by indexing. + + The following usage are allowed: + + 1. `new_masks = masks[2:10]`: return a slice of masks. + 2. `new_masks = masks[vector]`, where vector is a torch.BoolTensor + with `length = len(masks)`. Nonzero elements in the vector will be selected. + + Note that the returned object might share storage with this object, + subject to Pytorch's indexing semantics. + """ + t = self.tensor[item] + if t.dim() != 3: + raise ValueError( + f"Indexing on ROIMasks with {item} returns a tensor with shape {t.shape}!" + ) + return ROIMasks(t) + + @torch.jit.unused + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + @torch.jit.unused + def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5): + """ + Args: see documentation of :func:`paste_masks_in_image`. + """ + from annotator.oneformer.detectron2.layers.mask_ops import paste_masks_in_image, _paste_masks_tensor_shape + + if torch.jit.is_tracing(): + if isinstance(height, torch.Tensor): + paste_func = _paste_masks_tensor_shape + else: + paste_func = paste_masks_in_image + else: + paste_func = retry_if_cuda_oom(paste_masks_in_image) + bitmasks = paste_func(self.tensor, boxes.tensor, (height, width), threshold=threshold) + return BitMasks(bitmasks) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/rotated_boxes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/rotated_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..aacfc730dfdf4b6bed5f8c861b720db7656f1cab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/structures/rotated_boxes.py @@ -0,0 +1,505 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import math +from typing import List, Tuple +import torch + +from annotator.oneformer.detectron2.layers.rotated_boxes import pairwise_iou_rotated + +from .boxes import Boxes + + +class RotatedBoxes(Boxes): + """ + This structure stores a list of rotated boxes as a Nx5 torch.Tensor. + It supports some common methods about boxes + (`area`, `clip`, `nonempty`, etc), + and also behaves like a Tensor + (support indexing, `to(device)`, `.device`, and iteration over all boxes) + """ + + def __init__(self, tensor: torch.Tensor): + """ + Args: + tensor (Tensor[float]): a Nx5 matrix. Each row is + (x_center, y_center, width, height, angle), + in which angle is represented in degrees. + While there's no strict range restriction for it, + the recommended principal range is between [-180, 180) degrees. + + Assume we have a horizontal box B = (x_center, y_center, width, height), + where width is along the x-axis and height is along the y-axis. + The rotated box B_rot (x_center, y_center, width, height, angle) + can be seen as: + + 1. When angle == 0: + B_rot == B + 2. When angle > 0: + B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW; + 3. When angle < 0: + B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW. + + Mathematically, since the right-handed coordinate system for image space + is (y, x), where y is top->down and x is left->right, the 4 vertices of the + rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from + the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4) + in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians, + :math:`(y_c, x_c)` is the center of the rectangle): + + .. math:: + + yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c, + + xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c, + + which is the standard rigid-body rotation transformation. + + Intuitively, the angle is + (1) the rotation angle from y-axis in image space + to the height vector (top->down in the box's local coordinate system) + of the box in CCW, and + (2) the rotation angle from x-axis in image space + to the width vector (left->right in the box's local coordinate system) + of the box in CCW. + + More intuitively, consider the following horizontal box ABCD represented + in (x1, y1, x2, y2): (3, 2, 7, 4), + covering the [3, 7] x [2, 4] region of the continuous coordinate system + which looks like this: + + .. code:: none + + O--------> x + | + | A---B + | | | + | D---C + | + v y + + Note that each capital letter represents one 0-dimensional geometric point + instead of a 'square pixel' here. + + In the example above, using (x, y) to represent a point we have: + + .. math:: + + O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4) + + We name vector AB = vector DC as the width vector in box's local coordinate system, and + vector AD = vector BC as the height vector in box's local coordinate system. Initially, + when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis + in the image space, respectively. + + For better illustration, we denote the center of the box as E, + + .. code:: none + + O--------> x + | + | A---B + | | E | + | D---C + | + v y + + where the center E = ((3+7)/2, (2+4)/2) = (5, 3). + + Also, + + .. math:: + + width = |AB| = |CD| = 7 - 3 = 4, + height = |AD| = |BC| = 4 - 2 = 2. + + Therefore, the corresponding representation for the same shape in rotated box in + (x_center, y_center, width, height, angle) format is: + + (5, 3, 4, 2, 0), + + Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees + CCW (counter-clockwise) by definition. It looks like this: + + .. code:: none + + O--------> x + | B-C + | | | + | |E| + | | | + | A-D + v y + + The center E is still located at the same point (5, 3), while the vertices + ABCD are rotated by 90 degrees CCW with regard to E: + A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5) + + Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to + vector AD or vector BC (the top->down height vector in box's local coordinate system), + or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right + width vector in box's local coordinate system). + + .. math:: + + width = |AB| = |CD| = 5 - 1 = 4, + height = |AD| = |BC| = 6 - 4 = 2. + + Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise) + by definition? It looks like this: + + .. code:: none + + O--------> x + | D-A + | | | + | |E| + | | | + | C-B + v y + + The center E is still located at the same point (5, 3), while the vertices + ABCD are rotated by 90 degrees CW with regard to E: + A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1) + + .. math:: + + width = |AB| = |CD| = 5 - 1 = 4, + height = |AD| = |BC| = 6 - 4 = 2. + + This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU + will be 1. However, these two will generate different RoI Pooling results and + should not be treated as an identical box. + + On the other hand, it's easy to see that (X, Y, W, H, A) is identical to + (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be + identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is + equivalent to rotating the same shape 90 degrees CW. + + We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180): + + .. code:: none + + O--------> x + | + | C---D + | | E | + | B---A + | + v y + + .. math:: + + A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2), + + width = |AB| = |CD| = 7 - 3 = 4, + height = |AD| = |BC| = 4 - 2 = 2. + + Finally, this is a very inaccurate (heavily quantized) illustration of + how (5, 3, 4, 2, 60) looks like in case anyone wonders: + + .. code:: none + + O--------> x + | B\ + | / C + | /E / + | A / + | `D + v y + + It's still a rectangle with center of (5, 3), width of 4 and height of 2, + but its angle (and thus orientation) is somewhere between + (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90). + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) + if tensor.numel() == 0: + # Use reshape, so we don't end up creating a new tensor that does not depend on + # the inputs (and consequently confuses jit) + tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device) + assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size() + + self.tensor = tensor + + def clone(self) -> "RotatedBoxes": + """ + Clone the RotatedBoxes. + + Returns: + RotatedBoxes + """ + return RotatedBoxes(self.tensor.clone()) + + def to(self, device: torch.device): + # Boxes are assumed float32 and does not support to(dtype) + return RotatedBoxes(self.tensor.to(device=device)) + + def area(self) -> torch.Tensor: + """ + Computes the area of all the boxes. + + Returns: + torch.Tensor: a vector with areas of each box. + """ + box = self.tensor + area = box[:, 2] * box[:, 3] + return area + + # Avoid in-place operations so that we can torchscript; NOTE: this creates a new tensor + def normalize_angles(self) -> None: + """ + Restrict angles to the range of [-180, 180) degrees + """ + angle_tensor = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0 + self.tensor = torch.cat((self.tensor[:, :4], angle_tensor[:, None]), dim=1) + + def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None: + """ + Clip (in place) the boxes by limiting x coordinates to the range [0, width] + and y coordinates to the range [0, height]. + + For RRPN: + Only clip boxes that are almost horizontal with a tolerance of + clip_angle_threshold to maintain backward compatibility. + + Rotated boxes beyond this threshold are not clipped for two reasons: + + 1. There are potentially multiple ways to clip a rotated box to make it + fit within the image. + 2. It's tricky to make the entire rectangular box fit within the image + and still be able to not leave out pixels of interest. + + Therefore we rely on ops like RoIAlignRotated to safely handle this. + + Args: + box_size (height, width): The clipping box's size. + clip_angle_threshold: + Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees), + we do the clipping as horizontal boxes. + """ + h, w = box_size + + # normalize angles to be within (-180, 180] degrees + self.normalize_angles() + + idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0] + + # convert to (x1, y1, x2, y2) + x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0 + y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0 + x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0 + y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0 + + # clip + x1.clamp_(min=0, max=w) + y1.clamp_(min=0, max=h) + x2.clamp_(min=0, max=w) + y2.clamp_(min=0, max=h) + + # convert back to (xc, yc, w, h) + self.tensor[idx, 0] = (x1 + x2) / 2.0 + self.tensor[idx, 1] = (y1 + y2) / 2.0 + # make sure widths and heights do not increase due to numerical errors + self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1) + self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1) + + def nonempty(self, threshold: float = 0.0) -> torch.Tensor: + """ + Find boxes that are non-empty. + A box is considered empty, if either of its side is no larger than threshold. + + Returns: + Tensor: a binary vector which represents + whether each box is empty (False) or non-empty (True). + """ + box = self.tensor + widths = box[:, 2] + heights = box[:, 3] + keep = (widths > threshold) & (heights > threshold) + return keep + + def __getitem__(self, item) -> "RotatedBoxes": + """ + Returns: + RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing. + + The following usage are allowed: + + 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box. + 2. `new_boxes = boxes[2:10]`: return a slice of boxes. + 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor + with `length = len(boxes)`. Nonzero elements in the vector will be selected. + + Note that the returned RotatedBoxes might share storage with this RotatedBoxes, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return RotatedBoxes(self.tensor[item].view(1, -1)) + b = self.tensor[item] + assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format( + item + ) + return RotatedBoxes(b) + + def __len__(self) -> int: + return self.tensor.shape[0] + + def __repr__(self) -> str: + return "RotatedBoxes(" + str(self.tensor) + ")" + + def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: + """ + Args: + box_size (height, width): Size of the reference box covering + [0, width] x [0, height] + boundary_threshold (int): Boxes that extend beyond the reference box + boundary by more than boundary_threshold are considered "outside". + + For RRPN, it might not be necessary to call this function since it's common + for rotated box to extend to outside of the image boundaries + (the clip function only clips the near-horizontal boxes) + + Returns: + a binary vector, indicating whether each box is inside the reference box. + """ + height, width = box_size + + cnt_x = self.tensor[..., 0] + cnt_y = self.tensor[..., 1] + half_w = self.tensor[..., 2] / 2.0 + half_h = self.tensor[..., 3] / 2.0 + a = self.tensor[..., 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + max_rect_dx = c * half_w + s * half_h + max_rect_dy = c * half_h + s * half_w + + inds_inside = ( + (cnt_x - max_rect_dx >= -boundary_threshold) + & (cnt_y - max_rect_dy >= -boundary_threshold) + & (cnt_x + max_rect_dx < width + boundary_threshold) + & (cnt_y + max_rect_dy < height + boundary_threshold) + ) + + return inds_inside + + def get_centers(self) -> torch.Tensor: + """ + Returns: + The box centers in a Nx2 array of (x, y). + """ + return self.tensor[:, :2] + + def scale(self, scale_x: float, scale_y: float) -> None: + """ + Scale the rotated box with horizontal and vertical scaling factors + Note: when scale_factor_x != scale_factor_y, + the rotated box does not preserve the rectangular shape when the angle + is not a multiple of 90 degrees under resize transformation. + Instead, the shape is a parallelogram (that has skew) + Here we make an approximation by fitting a rotated rectangle to the parallelogram. + """ + self.tensor[:, 0] *= scale_x + self.tensor[:, 1] *= scale_y + theta = self.tensor[:, 4] * math.pi / 180.0 + c = torch.cos(theta) + s = torch.sin(theta) + + # In image space, y is top->down and x is left->right + # Consider the local coordintate system for the rotated box, + # where the box center is located at (0, 0), and the four vertices ABCD are + # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2) + # the midpoint of the left edge AD of the rotated box E is: + # E = (A+D)/2 = (-w / 2, 0) + # the midpoint of the top edge AB of the rotated box F is: + # F(0, -h / 2) + # To get the old coordinates in the global system, apply the rotation transformation + # (Note: the right-handed coordinate system for image space is yOx): + # (old_x, old_y) = (s * y + c * x, c * y - s * x) + # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2) + # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2) + # After applying the scaling factor (sfx, sfy): + # E(new) = (-sfx * c * w / 2, sfy * s * w / 2) + # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2) + # The new width after scaling tranformation becomes: + + # w(new) = |E(new) - O| * 2 + # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2 + # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w + # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2] + # + # For example, + # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x; + # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y + self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2) + + # h(new) = |F(new) - O| * 2 + # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2 + # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h + # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2] + # + # For example, + # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y; + # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x + self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2) + + # The angle is the rotation angle from y-axis in image space to the height + # vector (top->down in the box's local coordinate system) of the box in CCW. + # + # angle(new) = angle_yOx(O - F(new)) + # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) ) + # = atan2(sfx * s * h / 2, sfy * c * h / 2) + # = atan2(sfx * s, sfy * c) + # + # For example, + # when sfx == sfy, angle(new) == atan2(s, c) == angle(old) + self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi + + @classmethod + def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes": + """ + Concatenates a list of RotatedBoxes into a single RotatedBoxes + + Arguments: + boxes_list (list[RotatedBoxes]) + + Returns: + RotatedBoxes: the concatenated RotatedBoxes + """ + assert isinstance(boxes_list, (list, tuple)) + if len(boxes_list) == 0: + return cls(torch.empty(0)) + assert all([isinstance(box, RotatedBoxes) for box in boxes_list]) + + # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input + cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) + return cat_boxes + + @property + def device(self) -> torch.device: + return self.tensor.device + + @torch.jit.unused + def __iter__(self): + """ + Yield a box as a Tensor of shape (5,) at a time. + """ + yield from self.tensor + + +def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None: + """ + Given two lists of rotated boxes of size N and M, + compute the IoU (intersection over union) + between **all** N x M pairs of boxes. + The box order must be (x_center, y_center, width, height, angle). + + Args: + boxes1, boxes2 (RotatedBoxes): + two `RotatedBoxes`. Contains N & M rotated boxes, respectively. + + Returns: + Tensor: IoU, sized [N,M]. + """ + + return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..21078ae822b04b71dbd8b056b5993d173eaf6bff --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .base_tracker import ( # noqa + BaseTracker, + build_tracker_head, + TRACKER_HEADS_REGISTRY, +) +from .bbox_iou_tracker import BBoxIOUTracker # noqa +from .hungarian_tracker import BaseHungarianTracker # noqa +from .iou_weighted_hungarian_bbox_iou_tracker import ( # noqa + IOUWeightedHungarianBBoxIOUTracker, +) +from .utils import create_prediction_pairs # noqa +from .vanilla_hungarian_bbox_iou_tracker import VanillaHungarianBBoxIOUTracker # noqa + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/base_tracker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/base_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..bec640746d4fa40ae4a4020e88300e601b95ea3d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/base_tracker.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Copyright 2004-present Facebook. All Rights Reserved. +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.utils.registry import Registry + +from ..config.config import CfgNode as CfgNode_ +from ..structures import Instances + +TRACKER_HEADS_REGISTRY = Registry("TRACKER_HEADS") +TRACKER_HEADS_REGISTRY.__doc__ = """ +Registry for tracking classes. +""" + + +class BaseTracker(object): + """ + A parent class for all trackers + """ + + @configurable + def __init__(self, **kwargs): + self._prev_instances = None # (D2)instances for previous frame + self._matched_idx = set() # indices in prev_instances found matching + self._matched_ID = set() # idendities in prev_instances found matching + self._untracked_prev_idx = set() # indices in prev_instances not found matching + self._id_count = 0 # used to assign new id + + @classmethod + def from_config(cls, cfg: CfgNode_): + raise NotImplementedError("Calling BaseTracker::from_config") + + def update(self, predictions: Instances) -> Instances: + """ + Args: + predictions: D2 Instances for predictions of the current frame + Return: + D2 Instances for predictions of the current frame with ID assigned + + _prev_instances and instances will have the following fields: + .pred_boxes (shape=[N, 4]) + .scores (shape=[N,]) + .pred_classes (shape=[N,]) + .pred_keypoints (shape=[N, M, 3], Optional) + .pred_masks (shape=List[2D_MASK], Optional) 2D_MASK: shape=[H, W] + .ID (shape=[N,]) + + N: # of detected bboxes + H and W: height and width of 2D mask + """ + raise NotImplementedError("Calling BaseTracker::update") + + +def build_tracker_head(cfg: CfgNode_) -> BaseTracker: + """ + Build a tracker head from `cfg.TRACKER_HEADS.TRACKER_NAME`. + + Args: + cfg: D2 CfgNode, config file with tracker information + Return: + tracker object + """ + name = cfg.TRACKER_HEADS.TRACKER_NAME + tracker_class = TRACKER_HEADS_REGISTRY.get(name) + return tracker_class(cfg) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/bbox_iou_tracker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/bbox_iou_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2b7e2579364b20969db884a5785cb5c650d760ac --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/bbox_iou_tracker.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +# Copyright 2004-present Facebook. All Rights Reserved. +import copy +import numpy as np +from typing import List +import torch + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.structures import Boxes, Instances +from annotator.oneformer.detectron2.structures.boxes import pairwise_iou + +from ..config.config import CfgNode as CfgNode_ +from .base_tracker import TRACKER_HEADS_REGISTRY, BaseTracker + + +@TRACKER_HEADS_REGISTRY.register() +class BBoxIOUTracker(BaseTracker): + """ + A bounding box tracker to assign ID based on IoU between current and previous instances + """ + + @configurable + def __init__( + self, + *, + video_height: int, + video_width: int, + max_num_instances: int = 200, + max_lost_frame_count: int = 0, + min_box_rel_dim: float = 0.02, + min_instance_period: int = 1, + track_iou_threshold: float = 0.5, + **kwargs, + ): + """ + Args: + video_height: height the video frame + video_width: width of the video frame + max_num_instances: maximum number of id allowed to be tracked + max_lost_frame_count: maximum number of frame an id can lost tracking + exceed this number, an id is considered as lost + forever + min_box_rel_dim: a percentage, smaller than this dimension, a bbox is + removed from tracking + min_instance_period: an instance will be shown after this number of period + since its first showing up in the video + track_iou_threshold: iou threshold, below this number a bbox pair is removed + from tracking + """ + super().__init__(**kwargs) + self._video_height = video_height + self._video_width = video_width + self._max_num_instances = max_num_instances + self._max_lost_frame_count = max_lost_frame_count + self._min_box_rel_dim = min_box_rel_dim + self._min_instance_period = min_instance_period + self._track_iou_threshold = track_iou_threshold + + @classmethod + def from_config(cls, cfg: CfgNode_): + """ + Old style initialization using CfgNode + + Args: + cfg: D2 CfgNode, config file + Return: + dictionary storing arguments for __init__ method + """ + assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS + assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS + video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") + video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") + max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) + max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) + min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) + min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) + track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) + return { + "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", + "video_height": video_height, + "video_width": video_width, + "max_num_instances": max_num_instances, + "max_lost_frame_count": max_lost_frame_count, + "min_box_rel_dim": min_box_rel_dim, + "min_instance_period": min_instance_period, + "track_iou_threshold": track_iou_threshold, + } + + def update(self, instances: Instances) -> Instances: + """ + See BaseTracker description + """ + instances = self._initialize_extra_fields(instances) + if self._prev_instances is not None: + # calculate IoU of all bbox pairs + iou_all = pairwise_iou( + boxes1=instances.pred_boxes, + boxes2=self._prev_instances.pred_boxes, + ) + # sort IoU in descending order + bbox_pairs = self._create_prediction_pairs(instances, iou_all) + # assign previous ID to current bbox if IoU > track_iou_threshold + self._reset_fields() + for bbox_pair in bbox_pairs: + idx = bbox_pair["idx"] + prev_id = bbox_pair["prev_id"] + if ( + idx in self._matched_idx + or prev_id in self._matched_ID + or bbox_pair["IoU"] < self._track_iou_threshold + ): + continue + instances.ID[idx] = prev_id + instances.ID_period[idx] = bbox_pair["prev_period"] + 1 + instances.lost_frame_count[idx] = 0 + self._matched_idx.add(idx) + self._matched_ID.add(prev_id) + self._untracked_prev_idx.remove(bbox_pair["prev_idx"]) + instances = self._assign_new_id(instances) + instances = self._merge_untracked_instances(instances) + self._prev_instances = copy.deepcopy(instances) + return instances + + def _create_prediction_pairs(self, instances: Instances, iou_all: np.ndarray) -> List: + """ + For all instances in previous and current frames, create pairs. For each + pair, store index of the instance in current frame predcitions, index in + previous predictions, ID in previous predictions, IoU of the bboxes in this + pair, period in previous predictions. + + Args: + instances: D2 Instances, for predictions of the current frame + iou_all: IoU for all bboxes pairs + Return: + A list of IoU for all pairs + """ + bbox_pairs = [] + for i in range(len(instances)): + for j in range(len(self._prev_instances)): + bbox_pairs.append( + { + "idx": i, + "prev_idx": j, + "prev_id": self._prev_instances.ID[j], + "IoU": iou_all[i, j], + "prev_period": self._prev_instances.ID_period[j], + } + ) + return bbox_pairs + + def _initialize_extra_fields(self, instances: Instances) -> Instances: + """ + If input instances don't have ID, ID_period, lost_frame_count fields, + this method is used to initialize these fields. + + Args: + instances: D2 Instances, for predictions of the current frame + Return: + D2 Instances with extra fields added + """ + if not instances.has("ID"): + instances.set("ID", [None] * len(instances)) + if not instances.has("ID_period"): + instances.set("ID_period", [None] * len(instances)) + if not instances.has("lost_frame_count"): + instances.set("lost_frame_count", [None] * len(instances)) + if self._prev_instances is None: + instances.ID = list(range(len(instances))) + self._id_count += len(instances) + instances.ID_period = [1] * len(instances) + instances.lost_frame_count = [0] * len(instances) + return instances + + def _reset_fields(self): + """ + Before each uodate call, reset fields first + """ + self._matched_idx = set() + self._matched_ID = set() + self._untracked_prev_idx = set(range(len(self._prev_instances))) + + def _assign_new_id(self, instances: Instances) -> Instances: + """ + For each untracked instance, assign a new id + + Args: + instances: D2 Instances, for predictions of the current frame + Return: + D2 Instances with new ID assigned + """ + untracked_idx = set(range(len(instances))).difference(self._matched_idx) + for idx in untracked_idx: + instances.ID[idx] = self._id_count + self._id_count += 1 + instances.ID_period[idx] = 1 + instances.lost_frame_count[idx] = 0 + return instances + + def _merge_untracked_instances(self, instances: Instances) -> Instances: + """ + For untracked previous instances, under certain condition, still keep them + in tracking and merge with the current instances. + + Args: + instances: D2 Instances, for predictions of the current frame + Return: + D2 Instances merging current instances and instances from previous + frame decided to keep tracking + """ + untracked_instances = Instances( + image_size=instances.image_size, + pred_boxes=[], + pred_classes=[], + scores=[], + ID=[], + ID_period=[], + lost_frame_count=[], + ) + prev_bboxes = list(self._prev_instances.pred_boxes) + prev_classes = list(self._prev_instances.pred_classes) + prev_scores = list(self._prev_instances.scores) + prev_ID_period = self._prev_instances.ID_period + if instances.has("pred_masks"): + untracked_instances.set("pred_masks", []) + prev_masks = list(self._prev_instances.pred_masks) + if instances.has("pred_keypoints"): + untracked_instances.set("pred_keypoints", []) + prev_keypoints = list(self._prev_instances.pred_keypoints) + if instances.has("pred_keypoint_heatmaps"): + untracked_instances.set("pred_keypoint_heatmaps", []) + prev_keypoint_heatmaps = list(self._prev_instances.pred_keypoint_heatmaps) + for idx in self._untracked_prev_idx: + x_left, y_top, x_right, y_bot = prev_bboxes[idx] + if ( + (1.0 * (x_right - x_left) / self._video_width < self._min_box_rel_dim) + or (1.0 * (y_bot - y_top) / self._video_height < self._min_box_rel_dim) + or self._prev_instances.lost_frame_count[idx] >= self._max_lost_frame_count + or prev_ID_period[idx] <= self._min_instance_period + ): + continue + untracked_instances.pred_boxes.append(list(prev_bboxes[idx].numpy())) + untracked_instances.pred_classes.append(int(prev_classes[idx])) + untracked_instances.scores.append(float(prev_scores[idx])) + untracked_instances.ID.append(self._prev_instances.ID[idx]) + untracked_instances.ID_period.append(self._prev_instances.ID_period[idx]) + untracked_instances.lost_frame_count.append( + self._prev_instances.lost_frame_count[idx] + 1 + ) + if instances.has("pred_masks"): + untracked_instances.pred_masks.append(prev_masks[idx].numpy().astype(np.uint8)) + if instances.has("pred_keypoints"): + untracked_instances.pred_keypoints.append( + prev_keypoints[idx].numpy().astype(np.uint8) + ) + if instances.has("pred_keypoint_heatmaps"): + untracked_instances.pred_keypoint_heatmaps.append( + prev_keypoint_heatmaps[idx].numpy().astype(np.float32) + ) + untracked_instances.pred_boxes = Boxes(torch.FloatTensor(untracked_instances.pred_boxes)) + untracked_instances.pred_classes = torch.IntTensor(untracked_instances.pred_classes) + untracked_instances.scores = torch.FloatTensor(untracked_instances.scores) + if instances.has("pred_masks"): + untracked_instances.pred_masks = torch.IntTensor(untracked_instances.pred_masks) + if instances.has("pred_keypoints"): + untracked_instances.pred_keypoints = torch.IntTensor(untracked_instances.pred_keypoints) + if instances.has("pred_keypoint_heatmaps"): + untracked_instances.pred_keypoint_heatmaps = torch.FloatTensor( + untracked_instances.pred_keypoint_heatmaps + ) + + return Instances.cat( + [ + instances, + untracked_instances, + ] + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/hungarian_tracker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/hungarian_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..bb2b368ca0483319616dfbe5919554e5d360dd49 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/hungarian_tracker.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# Copyright 2004-present Facebook. All Rights Reserved. +import copy +import numpy as np +from typing import Dict +import torch +from scipy.optimize import linear_sum_assignment + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.structures import Boxes, Instances + +from ..config.config import CfgNode as CfgNode_ +from .base_tracker import BaseTracker + + +class BaseHungarianTracker(BaseTracker): + """ + A base class for all Hungarian trackers + """ + + @configurable + def __init__( + self, + video_height: int, + video_width: int, + max_num_instances: int = 200, + max_lost_frame_count: int = 0, + min_box_rel_dim: float = 0.02, + min_instance_period: int = 1, + **kwargs + ): + """ + Args: + video_height: height the video frame + video_width: width of the video frame + max_num_instances: maximum number of id allowed to be tracked + max_lost_frame_count: maximum number of frame an id can lost tracking + exceed this number, an id is considered as lost + forever + min_box_rel_dim: a percentage, smaller than this dimension, a bbox is + removed from tracking + min_instance_period: an instance will be shown after this number of period + since its first showing up in the video + """ + super().__init__(**kwargs) + self._video_height = video_height + self._video_width = video_width + self._max_num_instances = max_num_instances + self._max_lost_frame_count = max_lost_frame_count + self._min_box_rel_dim = min_box_rel_dim + self._min_instance_period = min_instance_period + + @classmethod + def from_config(cls, cfg: CfgNode_) -> Dict: + raise NotImplementedError("Calling HungarianTracker::from_config") + + def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray: + raise NotImplementedError("Calling HungarianTracker::build_matrix") + + def update(self, instances: Instances) -> Instances: + if instances.has("pred_keypoints"): + raise NotImplementedError("Need to add support for keypoints") + instances = self._initialize_extra_fields(instances) + if self._prev_instances is not None: + self._untracked_prev_idx = set(range(len(self._prev_instances))) + cost_matrix = self.build_cost_matrix(instances, self._prev_instances) + matched_idx, matched_prev_idx = linear_sum_assignment(cost_matrix) + instances = self._process_matched_idx(instances, matched_idx, matched_prev_idx) + instances = self._process_unmatched_idx(instances, matched_idx) + instances = self._process_unmatched_prev_idx(instances, matched_prev_idx) + self._prev_instances = copy.deepcopy(instances) + return instances + + def _initialize_extra_fields(self, instances: Instances) -> Instances: + """ + If input instances don't have ID, ID_period, lost_frame_count fields, + this method is used to initialize these fields. + + Args: + instances: D2 Instances, for predictions of the current frame + Return: + D2 Instances with extra fields added + """ + if not instances.has("ID"): + instances.set("ID", [None] * len(instances)) + if not instances.has("ID_period"): + instances.set("ID_period", [None] * len(instances)) + if not instances.has("lost_frame_count"): + instances.set("lost_frame_count", [None] * len(instances)) + if self._prev_instances is None: + instances.ID = list(range(len(instances))) + self._id_count += len(instances) + instances.ID_period = [1] * len(instances) + instances.lost_frame_count = [0] * len(instances) + return instances + + def _process_matched_idx( + self, instances: Instances, matched_idx: np.ndarray, matched_prev_idx: np.ndarray + ) -> Instances: + assert matched_idx.size == matched_prev_idx.size + for i in range(matched_idx.size): + instances.ID[matched_idx[i]] = self._prev_instances.ID[matched_prev_idx[i]] + instances.ID_period[matched_idx[i]] = ( + self._prev_instances.ID_period[matched_prev_idx[i]] + 1 + ) + instances.lost_frame_count[matched_idx[i]] = 0 + return instances + + def _process_unmatched_idx(self, instances: Instances, matched_idx: np.ndarray) -> Instances: + untracked_idx = set(range(len(instances))).difference(set(matched_idx)) + for idx in untracked_idx: + instances.ID[idx] = self._id_count + self._id_count += 1 + instances.ID_period[idx] = 1 + instances.lost_frame_count[idx] = 0 + return instances + + def _process_unmatched_prev_idx( + self, instances: Instances, matched_prev_idx: np.ndarray + ) -> Instances: + untracked_instances = Instances( + image_size=instances.image_size, + pred_boxes=[], + pred_masks=[], + pred_classes=[], + scores=[], + ID=[], + ID_period=[], + lost_frame_count=[], + ) + prev_bboxes = list(self._prev_instances.pred_boxes) + prev_classes = list(self._prev_instances.pred_classes) + prev_scores = list(self._prev_instances.scores) + prev_ID_period = self._prev_instances.ID_period + if instances.has("pred_masks"): + prev_masks = list(self._prev_instances.pred_masks) + untracked_prev_idx = set(range(len(self._prev_instances))).difference(set(matched_prev_idx)) + for idx in untracked_prev_idx: + x_left, y_top, x_right, y_bot = prev_bboxes[idx] + if ( + (1.0 * (x_right - x_left) / self._video_width < self._min_box_rel_dim) + or (1.0 * (y_bot - y_top) / self._video_height < self._min_box_rel_dim) + or self._prev_instances.lost_frame_count[idx] >= self._max_lost_frame_count + or prev_ID_period[idx] <= self._min_instance_period + ): + continue + untracked_instances.pred_boxes.append(list(prev_bboxes[idx].numpy())) + untracked_instances.pred_classes.append(int(prev_classes[idx])) + untracked_instances.scores.append(float(prev_scores[idx])) + untracked_instances.ID.append(self._prev_instances.ID[idx]) + untracked_instances.ID_period.append(self._prev_instances.ID_period[idx]) + untracked_instances.lost_frame_count.append( + self._prev_instances.lost_frame_count[idx] + 1 + ) + if instances.has("pred_masks"): + untracked_instances.pred_masks.append(prev_masks[idx].numpy().astype(np.uint8)) + + untracked_instances.pred_boxes = Boxes(torch.FloatTensor(untracked_instances.pred_boxes)) + untracked_instances.pred_classes = torch.IntTensor(untracked_instances.pred_classes) + untracked_instances.scores = torch.FloatTensor(untracked_instances.scores) + if instances.has("pred_masks"): + untracked_instances.pred_masks = torch.IntTensor(untracked_instances.pred_masks) + else: + untracked_instances.remove("pred_masks") + + return Instances.cat( + [ + instances, + untracked_instances, + ] + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..e9b40f8a9c269029e220d5dfa8df1e8372d05007 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright 2004-present Facebook. All Rights Reserved. + +import numpy as np +from typing import List + +from annotator.oneformer.detectron2.config import CfgNode as CfgNode_ +from annotator.oneformer.detectron2.config import configurable + +from .base_tracker import TRACKER_HEADS_REGISTRY +from .vanilla_hungarian_bbox_iou_tracker import VanillaHungarianBBoxIOUTracker + + +@TRACKER_HEADS_REGISTRY.register() +class IOUWeightedHungarianBBoxIOUTracker(VanillaHungarianBBoxIOUTracker): + """ + A tracker using IoU as weight in Hungarian algorithm, also known + as Munkres or Kuhn-Munkres algorithm + """ + + @configurable + def __init__( + self, + *, + video_height: int, + video_width: int, + max_num_instances: int = 200, + max_lost_frame_count: int = 0, + min_box_rel_dim: float = 0.02, + min_instance_period: int = 1, + track_iou_threshold: float = 0.5, + **kwargs, + ): + """ + Args: + video_height: height the video frame + video_width: width of the video frame + max_num_instances: maximum number of id allowed to be tracked + max_lost_frame_count: maximum number of frame an id can lost tracking + exceed this number, an id is considered as lost + forever + min_box_rel_dim: a percentage, smaller than this dimension, a bbox is + removed from tracking + min_instance_period: an instance will be shown after this number of period + since its first showing up in the video + track_iou_threshold: iou threshold, below this number a bbox pair is removed + from tracking + """ + super().__init__( + video_height=video_height, + video_width=video_width, + max_num_instances=max_num_instances, + max_lost_frame_count=max_lost_frame_count, + min_box_rel_dim=min_box_rel_dim, + min_instance_period=min_instance_period, + track_iou_threshold=track_iou_threshold, + ) + + @classmethod + def from_config(cls, cfg: CfgNode_): + """ + Old style initialization using CfgNode + + Args: + cfg: D2 CfgNode, config file + Return: + dictionary storing arguments for __init__ method + """ + assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS + assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS + video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") + video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") + max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) + max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) + min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) + min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) + track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) + return { + "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa + "video_height": video_height, + "video_width": video_width, + "max_num_instances": max_num_instances, + "max_lost_frame_count": max_lost_frame_count, + "min_box_rel_dim": min_box_rel_dim, + "min_instance_period": min_instance_period, + "track_iou_threshold": track_iou_threshold, + } + + def assign_cost_matrix_values(self, cost_matrix: np.ndarray, bbox_pairs: List) -> np.ndarray: + """ + Based on IoU for each pair of bbox, assign the associated value in cost matrix + + Args: + cost_matrix: np.ndarray, initialized 2D array with target dimensions + bbox_pairs: list of bbox pair, in each pair, iou value is stored + Return: + np.ndarray, cost_matrix with assigned values + """ + for pair in bbox_pairs: + # assign (-1 * IoU) for above threshold pairs, algorithms will minimize cost + cost_matrix[pair["idx"]][pair["prev_idx"]] = -1 * pair["IoU"] + return cost_matrix diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..78d19984f772c030982402d52307f303b84f98b4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/utils.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import numpy as np +from typing import List + +from annotator.oneformer.detectron2.structures import Instances + + +def create_prediction_pairs( + instances: Instances, + prev_instances: Instances, + iou_all: np.ndarray, + threshold: float = 0.5, +) -> List: + """ + Args: + instances: predictions from current frame + prev_instances: predictions from previous frame + iou_all: 2D numpy array containing iou for each bbox pair + threshold: below the threshold, doesn't consider the pair of bbox is valid + Return: + List of bbox pairs + """ + bbox_pairs = [] + for i in range(len(instances)): + for j in range(len(prev_instances)): + if iou_all[i, j] < threshold: + continue + bbox_pairs.append( + { + "idx": i, + "prev_idx": j, + "prev_id": prev_instances.ID[j], + "IoU": iou_all[i, j], + "prev_period": prev_instances.ID_period[j], + } + ) + return bbox_pairs + + +LARGE_COST_VALUE = 100000 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..eecfe2f31e65147aec47704b9e775e82d9f5fa9a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# Copyright 2004-present Facebook. All Rights Reserved. + +import numpy as np +from typing import List + +from annotator.oneformer.detectron2.config import CfgNode as CfgNode_ +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.structures import Instances +from annotator.oneformer.detectron2.structures.boxes import pairwise_iou +from annotator.oneformer.detectron2.tracking.utils import LARGE_COST_VALUE, create_prediction_pairs + +from .base_tracker import TRACKER_HEADS_REGISTRY +from .hungarian_tracker import BaseHungarianTracker + + +@TRACKER_HEADS_REGISTRY.register() +class VanillaHungarianBBoxIOUTracker(BaseHungarianTracker): + """ + Hungarian algo based tracker using bbox iou as metric + """ + + @configurable + def __init__( + self, + *, + video_height: int, + video_width: int, + max_num_instances: int = 200, + max_lost_frame_count: int = 0, + min_box_rel_dim: float = 0.02, + min_instance_period: int = 1, + track_iou_threshold: float = 0.5, + **kwargs, + ): + """ + Args: + video_height: height the video frame + video_width: width of the video frame + max_num_instances: maximum number of id allowed to be tracked + max_lost_frame_count: maximum number of frame an id can lost tracking + exceed this number, an id is considered as lost + forever + min_box_rel_dim: a percentage, smaller than this dimension, a bbox is + removed from tracking + min_instance_period: an instance will be shown after this number of period + since its first showing up in the video + track_iou_threshold: iou threshold, below this number a bbox pair is removed + from tracking + """ + super().__init__( + video_height=video_height, + video_width=video_width, + max_num_instances=max_num_instances, + max_lost_frame_count=max_lost_frame_count, + min_box_rel_dim=min_box_rel_dim, + min_instance_period=min_instance_period, + ) + self._track_iou_threshold = track_iou_threshold + + @classmethod + def from_config(cls, cfg: CfgNode_): + """ + Old style initialization using CfgNode + + Args: + cfg: D2 CfgNode, config file + Return: + dictionary storing arguments for __init__ method + """ + assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS + assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS + video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") + video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") + max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) + max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) + min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) + min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) + track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) + return { + "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa + "video_height": video_height, + "video_width": video_width, + "max_num_instances": max_num_instances, + "max_lost_frame_count": max_lost_frame_count, + "min_box_rel_dim": min_box_rel_dim, + "min_instance_period": min_instance_period, + "track_iou_threshold": track_iou_threshold, + } + + def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray: + """ + Build the cost matrix for assignment problem + (https://en.wikipedia.org/wiki/Assignment_problem) + + Args: + instances: D2 Instances, for current frame predictions + prev_instances: D2 Instances, for previous frame predictions + + Return: + the cost matrix in numpy array + """ + assert instances is not None and prev_instances is not None + # calculate IoU of all bbox pairs + iou_all = pairwise_iou( + boxes1=instances.pred_boxes, + boxes2=self._prev_instances.pred_boxes, + ) + bbox_pairs = create_prediction_pairs( + instances, self._prev_instances, iou_all, threshold=self._track_iou_threshold + ) + # assign large cost value to make sure pair below IoU threshold won't be matched + cost_matrix = np.full((len(instances), len(prev_instances)), LARGE_COST_VALUE) + return self.assign_cost_matrix_values(cost_matrix, bbox_pairs) + + def assign_cost_matrix_values(self, cost_matrix: np.ndarray, bbox_pairs: List) -> np.ndarray: + """ + Based on IoU for each pair of bbox, assign the associated value in cost matrix + + Args: + cost_matrix: np.ndarray, initialized 2D array with target dimensions + bbox_pairs: list of bbox pair, in each pair, iou value is stored + Return: + np.ndarray, cost_matrix with assigned values + """ + for pair in bbox_pairs: + # assign -1 for IoU above threshold pairs, algorithms will minimize cost + cost_matrix[pair["idx"]][pair["prev_idx"]] = -1 + return cost_matrix diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9765b24a730b77556104187ac3ef5439ab0859fd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/README.md @@ -0,0 +1,5 @@ +# Utility functions + +This folder contain utility functions that are not used in the +core library, but are useful for building models or training +code using the config system. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/analysis.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..d63e14bcb6d9582df8a647c9a2ca46f2f7e4cd1d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/analysis.py @@ -0,0 +1,188 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# -*- coding: utf-8 -*- + +import typing +from typing import Any, List +import fvcore +from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table +from torch import nn + +from annotator.oneformer.detectron2.export import TracingAdapter + +__all__ = [ + "activation_count_operators", + "flop_count_operators", + "parameter_count_table", + "parameter_count", + "FlopCountAnalysis", +] + +FLOPS_MODE = "flops" +ACTIVATIONS_MODE = "activations" + + +# Some extra ops to ignore from counting, including elementwise and reduction ops +_IGNORED_OPS = { + "aten::add", + "aten::add_", + "aten::argmax", + "aten::argsort", + "aten::batch_norm", + "aten::constant_pad_nd", + "aten::div", + "aten::div_", + "aten::exp", + "aten::log2", + "aten::max_pool2d", + "aten::meshgrid", + "aten::mul", + "aten::mul_", + "aten::neg", + "aten::nonzero_numpy", + "aten::reciprocal", + "aten::repeat_interleave", + "aten::rsub", + "aten::sigmoid", + "aten::sigmoid_", + "aten::softmax", + "aten::sort", + "aten::sqrt", + "aten::sub", + "torchvision::nms", # TODO estimate flop for nms +} + + +class FlopCountAnalysis(fvcore.nn.FlopCountAnalysis): + """ + Same as :class:`fvcore.nn.FlopCountAnalysis`, but supports detectron2 models. + """ + + def __init__(self, model, inputs): + """ + Args: + model (nn.Module): + inputs (Any): inputs of the given model. Does not have to be tuple of tensors. + """ + wrapper = TracingAdapter(model, inputs, allow_non_tensor=True) + super().__init__(wrapper, wrapper.flattened_inputs) + self.set_op_handle(**{k: None for k in _IGNORED_OPS}) + + +def flop_count_operators(model: nn.Module, inputs: list) -> typing.DefaultDict[str, float]: + """ + Implement operator-level flops counting using jit. + This is a wrapper of :func:`fvcore.nn.flop_count` and adds supports for standard + detection models in detectron2. + Please use :class:`FlopCountAnalysis` for more advanced functionalities. + + Note: + The function runs the input through the model to compute flops. + The flops of a detection model is often input-dependent, for example, + the flops of box & mask head depends on the number of proposals & + the number of detected objects. + Therefore, the flops counting using a single input may not accurately + reflect the computation cost of a model. It's recommended to average + across a number of inputs. + + Args: + model: a detectron2 model that takes `list[dict]` as input. + inputs (list[dict]): inputs to model, in detectron2's standard format. + Only "image" key will be used. + supported_ops (dict[str, Handle]): see documentation of :func:`fvcore.nn.flop_count` + + Returns: + Counter: Gflop count per operator + """ + old_train = model.training + model.eval() + ret = FlopCountAnalysis(model, inputs).by_operator() + model.train(old_train) + return {k: v / 1e9 for k, v in ret.items()} + + +def activation_count_operators( + model: nn.Module, inputs: list, **kwargs +) -> typing.DefaultDict[str, float]: + """ + Implement operator-level activations counting using jit. + This is a wrapper of fvcore.nn.activation_count, that supports standard detection models + in detectron2. + + Note: + The function runs the input through the model to compute activations. + The activations of a detection model is often input-dependent, for example, + the activations of box & mask head depends on the number of proposals & + the number of detected objects. + + Args: + model: a detectron2 model that takes `list[dict]` as input. + inputs (list[dict]): inputs to model, in detectron2's standard format. + Only "image" key will be used. + + Returns: + Counter: activation count per operator + """ + return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) + + +def _wrapper_count_operators( + model: nn.Module, inputs: list, mode: str, **kwargs +) -> typing.DefaultDict[str, float]: + # ignore some ops + supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} + supported_ops.update(kwargs.pop("supported_ops", {})) + kwargs["supported_ops"] = supported_ops + + assert len(inputs) == 1, "Please use batch size=1" + tensor_input = inputs[0]["image"] + inputs = [{"image": tensor_input}] # remove other keys, in case there are any + + old_train = model.training + if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)): + model = model.module + wrapper = TracingAdapter(model, inputs) + wrapper.eval() + if mode == FLOPS_MODE: + ret = flop_count(wrapper, (tensor_input,), **kwargs) + elif mode == ACTIVATIONS_MODE: + ret = activation_count(wrapper, (tensor_input,), **kwargs) + else: + raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) + # compatible with change in fvcore + if isinstance(ret, tuple): + ret = ret[0] + model.train(old_train) + return ret + + +def find_unused_parameters(model: nn.Module, inputs: Any) -> List[str]: + """ + Given a model, find parameters that do not contribute + to the loss. + + Args: + model: a model in training mode that returns losses + inputs: argument or a tuple of arguments. Inputs of the model + + Returns: + list[str]: the name of unused parameters + """ + assert model.training + for _, prm in model.named_parameters(): + prm.grad = None + + if isinstance(inputs, tuple): + losses = model(*inputs) + else: + losses = model(inputs) + + if isinstance(losses, dict): + losses = sum(losses.values()) + losses.backward() + + unused: List[str] = [] + for name, prm in model.named_parameters(): + if prm.grad is None: + unused.append(name) + prm.grad = None + return unused diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/collect_env.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/collect_env.py new file mode 100644 index 0000000000000000000000000000000000000000..bb25d297ee83c70fd244762e1a7fd554c1fa4b69 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/collect_env.py @@ -0,0 +1,246 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import importlib +import numpy as np +import os +import re +import subprocess +import sys +from collections import defaultdict +import PIL +import torch +import torchvision +from tabulate import tabulate + +__all__ = ["collect_env_info"] + + +def collect_torch_env(): + try: + import torch.__config__ + + return torch.__config__.show() + except ImportError: + # compatible with older versions of pytorch + from torch.utils.collect_env import get_pretty_env_info + + return get_pretty_env_info() + + +def get_env_module(): + var_name = "DETECTRON2_ENV_MODULE" + return var_name, os.environ.get(var_name, "") + + +def detect_compute_compatibility(CUDA_HOME, so_file): + try: + cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump") + if os.path.isfile(cuobjdump): + output = subprocess.check_output( + "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True + ) + output = output.decode("utf-8").strip().split("\n") + arch = [] + for line in output: + line = re.findall(r"\.sm_([0-9]*)\.", line)[0] + arch.append(".".join(line)) + arch = sorted(set(arch)) + return ", ".join(arch) + else: + return so_file + "; cannot find cuobjdump" + except Exception: + # unhandled failure + return so_file + + +def collect_env_info(): + has_gpu = torch.cuda.is_available() # true for both CUDA & ROCM + torch_version = torch.__version__ + + # NOTE that CUDA_HOME/ROCM_HOME could be None even when CUDA runtime libs are functional + from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME + + has_rocm = False + if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None): + has_rocm = True + has_cuda = has_gpu and (not has_rocm) + + data = [] + data.append(("sys.platform", sys.platform)) # check-template.yml depends on it + data.append(("Python", sys.version.replace("\n", ""))) + data.append(("numpy", np.__version__)) + + try: + import annotator.oneformer.detectron2 # noqa + + data.append( + ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__)) + ) + except ImportError: + data.append(("detectron2", "failed to import")) + except AttributeError: + data.append(("detectron2", "imported a wrong installation")) + + try: + import annotator.oneformer.detectron2._C as _C + except ImportError as e: + data.append(("detectron2._C", f"not built correctly: {e}")) + + # print system compilers when extension fails to build + if sys.platform != "win32": # don't know what to do for windows + try: + # this is how torch/utils/cpp_extensions.py choose compiler + cxx = os.environ.get("CXX", "c++") + cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True) + cxx = cxx.decode("utf-8").strip().split("\n")[0] + except subprocess.SubprocessError: + cxx = "Not found" + data.append(("Compiler ($CXX)", cxx)) + + if has_cuda and CUDA_HOME is not None: + try: + nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") + nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True) + nvcc = nvcc.decode("utf-8").strip().split("\n")[-1] + except subprocess.SubprocessError: + nvcc = "Not found" + data.append(("CUDA compiler", nvcc)) + if has_cuda and sys.platform != "win32": + try: + so_file = importlib.util.find_spec("detectron2._C").origin + except (ImportError, AttributeError): + pass + else: + data.append( + ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, so_file)) + ) + else: + # print compilers that are used to build extension + data.append(("Compiler", _C.get_compiler_version())) + data.append(("CUDA compiler", _C.get_cuda_version())) # cuda or hip + if has_cuda and getattr(_C, "has_cuda", lambda: True)(): + data.append( + ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__)) + ) + + data.append(get_env_module()) + data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__))) + data.append(("PyTorch debug build", torch.version.debug)) + try: + data.append(("torch._C._GLIBCXX_USE_CXX11_ABI", torch._C._GLIBCXX_USE_CXX11_ABI)) + except Exception: + pass + + if not has_gpu: + has_gpu_text = "No: torch.cuda.is_available() == False" + else: + has_gpu_text = "Yes" + data.append(("GPU available", has_gpu_text)) + if has_gpu: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k))) + name = torch.cuda.get_device_name(k) + f" (arch={cap})" + devices[name].append(str(k)) + for name, devids in devices.items(): + data.append(("GPU " + ",".join(devids), name)) + + if has_rocm: + msg = " - invalid!" if not (ROCM_HOME and os.path.isdir(ROCM_HOME)) else "" + data.append(("ROCM_HOME", str(ROCM_HOME) + msg)) + else: + try: + from torch.utils.collect_env import get_nvidia_driver_version, run as _run + + data.append(("Driver version", get_nvidia_driver_version(_run))) + except Exception: + pass + msg = " - invalid!" if not (CUDA_HOME and os.path.isdir(CUDA_HOME)) else "" + data.append(("CUDA_HOME", str(CUDA_HOME) + msg)) + + cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) + if cuda_arch_list: + data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) + data.append(("Pillow", PIL.__version__)) + + try: + data.append( + ( + "torchvision", + str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__), + ) + ) + if has_cuda: + try: + torchvision_C = importlib.util.find_spec("torchvision._C").origin + msg = detect_compute_compatibility(CUDA_HOME, torchvision_C) + data.append(("torchvision arch flags", msg)) + except (ImportError, AttributeError): + data.append(("torchvision._C", "Not found")) + except AttributeError: + data.append(("torchvision", "unknown")) + + try: + import fvcore + + data.append(("fvcore", fvcore.__version__)) + except (ImportError, AttributeError): + pass + + try: + import iopath + + data.append(("iopath", iopath.__version__)) + except (ImportError, AttributeError): + pass + + try: + import cv2 + + data.append(("cv2", cv2.__version__)) + except (ImportError, AttributeError): + data.append(("cv2", "Not found")) + env_str = tabulate(data) + "\n" + env_str += collect_torch_env() + return env_str + + +def test_nccl_ops(): + num_gpu = torch.cuda.device_count() + if os.access("/tmp", os.W_OK): + import torch.multiprocessing as mp + + dist_url = "file:///tmp/nccl_tmp_file" + print("Testing NCCL connectivity ... this should not hang.") + mp.spawn(_test_nccl_worker, nprocs=num_gpu, args=(num_gpu, dist_url), daemon=False) + print("NCCL succeeded.") + + +def _test_nccl_worker(rank, num_gpu, dist_url): + import torch.distributed as dist + + dist.init_process_group(backend="NCCL", init_method=dist_url, rank=rank, world_size=num_gpu) + dist.barrier(device_ids=[rank]) + + +if __name__ == "__main__": + try: + from annotator.oneformer.detectron2.utils.collect_env import collect_env_info as f + + print(f()) + except ImportError: + print(collect_env_info()) + + if torch.cuda.is_available(): + num_gpu = torch.cuda.device_count() + for k in range(num_gpu): + device = f"cuda:{k}" + try: + x = torch.tensor([1, 2.0], dtype=torch.float32) + x = x.to(device) + except Exception as e: + print( + f"Unable to copy tensor to device={device}: {e}. " + "Your CUDA environment is broken." + ) + if num_gpu > 1: + test_nccl_ops() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/colormap.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/colormap.py new file mode 100644 index 0000000000000000000000000000000000000000..14ded1659b40b161358c4aaf9cc84ffe0ffafe64 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/colormap.py @@ -0,0 +1,158 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +An awesome colormap for really neat visualizations. +Copied from Detectron, and removed gray colors. +""" + +import numpy as np +import random + +__all__ = ["colormap", "random_color", "random_colors"] + +# fmt: off +# RGB: +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.857, 0.857, 0.857, + 1.000, 1.000, 1.000 + ] +).astype(np.float32).reshape(-1, 3) +# fmt: on + + +def colormap(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns: + ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] + """ + assert maximum in [255, 1], maximum + c = _COLORS * maximum + if not rgb: + c = c[:, ::-1] + return c + + +def random_color(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns: + ndarray: a vector of 3 numbers + """ + idx = np.random.randint(0, len(_COLORS)) + ret = _COLORS[idx] * maximum + if not rgb: + ret = ret[::-1] + return ret + + +def random_colors(N, rgb=False, maximum=255): + """ + Args: + N (int): number of unique colors needed + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns: + ndarray: a list of random_color + """ + indices = random.sample(range(len(_COLORS)), N) + ret = [_COLORS[i] * maximum for i in indices] + if not rgb: + ret = [x[::-1] for x in ret] + return ret + + +if __name__ == "__main__": + import cv2 + + size = 100 + H, W = 10, 10 + canvas = np.random.rand(H * size, W * size, 3).astype("float32") + for h in range(H): + for w in range(W): + idx = h * W + w + if idx >= len(_COLORS): + break + canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] + cv2.imshow("a", canvas) + cv2.waitKey(0) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/comm.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..a9ea9a9f578c5704d1e7ff563ef156e9133ab465 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/comm.py @@ -0,0 +1,238 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import numpy as np +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +_MISSING_LOCAL_PG_ERROR = ( + "Local process group is not yet created! Please use detectron2's `launch()` " + "to start processes and initialize pytorch process group. If you need to start " + "processes in other ways, please call comm.create_local_process_group(" + "num_workers_per_machine) after calling torch.distributed.init_process_group()." +) + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +@functools.lru_cache() +def create_local_process_group(num_workers_per_machine: int) -> None: + """ + Create a process group that contains ranks within the same machine. + + Detectron2's launch() in engine/launch.py will call this function. If you start + workers without launch(), you'll have to also call this. Otherwise utilities + like `get_local_rank()` will not work. + + This function contains a barrier. All processes must call it together. + + Args: + num_workers_per_machine: the number of worker processes per machine. Typically + the number of GPUs. + """ + global _LOCAL_PROCESS_GROUP + assert _LOCAL_PROCESS_GROUP is None + assert get_world_size() % num_workers_per_machine == 0 + num_machines = get_world_size() // num_workers_per_machine + machine_rank = get_rank() // num_workers_per_machine + for i in range(num_machines): + ranks_on_i = list(range(i * num_workers_per_machine, (i + 1) * num_workers_per_machine)) + pg = dist.new_group(ranks_on_i) + if i == machine_rank: + _LOCAL_PROCESS_GROUP = pg + + +def get_local_process_group(): + """ + Returns: + A torch process group which only includes processes that are on the same + machine as the current process. This group can be useful for communication + within a machine, e.g. a per-machine SyncBN. + """ + assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR + return _LOCAL_PROCESS_GROUP + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + if dist.get_backend() == dist.Backend.NCCL: + # This argument is needed to avoid warnings. + # It's valid only for NCCL backend. + dist.barrier(device_ids=[torch.cuda.current_device()]) + else: + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() # use CPU group by default, to reduce GPU RAM usage. + world_size = dist.get_world_size(group) + if world_size == 1: + return [data] + + output = [None for _ in range(world_size)] + dist.all_gather_object(output, data, group=group) + return output + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + world_size = dist.get_world_size(group=group) + if world_size == 1: + return [data] + rank = dist.get_rank(group=group) + + if rank == dst: + output = [None for _ in range(world_size)] + dist.gather_object(data, output, dst=dst, group=group) + return output + else: + dist.gather_object(data, None, dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2**31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/develop.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/develop.py new file mode 100644 index 0000000000000000000000000000000000000000..e8416984954f7b32fc269100620e3c0d0d0f9585 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/develop.py @@ -0,0 +1,59 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" Utilities for developers only. +These are not visible to users (not automatically imported). And should not +appeared in docs.""" +# adapted from https://github.com/tensorpack/tensorpack/blob/master/tensorpack/utils/develop.py + + +def create_dummy_class(klass, dependency, message=""): + """ + When a dependency of a class is not available, create a dummy class which throws ImportError + when used. + + Args: + klass (str): name of the class. + dependency (str): name of the dependency. + message: extra message to print + Returns: + class: a class object + """ + err = "Cannot import '{}', therefore '{}' is not available.".format(dependency, klass) + if message: + err = err + " " + message + + class _DummyMetaClass(type): + # throw error on class attribute access + def __getattr__(_, __): # noqa: B902 + raise ImportError(err) + + class _Dummy(object, metaclass=_DummyMetaClass): + # throw error on constructor + def __init__(self, *args, **kwargs): + raise ImportError(err) + + return _Dummy + + +def create_dummy_func(func, dependency, message=""): + """ + When a dependency of a function is not available, create a dummy function which throws + ImportError when used. + + Args: + func (str): name of the function. + dependency (str or list[str]): name(s) of the dependency. + message: extra message to print + Returns: + function: a function object + """ + err = "Cannot import '{}', therefore '{}' is not available.".format(dependency, func) + if message: + err = err + " " + message + + if isinstance(dependency, (list, tuple)): + dependency = ",".join(dependency) + + def _dummy(*args, **kwargs): + raise ImportError(err) + + return _dummy diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/env.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/env.py new file mode 100644 index 0000000000000000000000000000000000000000..40634c17c73273ac8927632be164f466cfe7d1fa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/env.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import importlib +import importlib.util +import logging +import numpy as np +import os +import random +import sys +from datetime import datetime +import torch + +__all__ = ["seed_all_rng"] + + +TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) +""" +PyTorch version as a tuple of 2 ints. Useful for comparison. +""" + + +DOC_BUILDING = os.getenv("_DOC_BUILDING", False) # set in docs/conf.py +""" +Whether we're building documentation. +""" + + +def seed_all_rng(seed=None): + """ + Set the random seed for the RNG in torch, numpy and python. + + Args: + seed (int): if None, will use a strong random seed. + """ + if seed is None: + seed = ( + os.getpid() + + int(datetime.now().strftime("%S%f")) + + int.from_bytes(os.urandom(2), "big") + ) + logger = logging.getLogger(__name__) + logger.info("Using a generated random seed {}".format(seed)) + np.random.seed(seed) + torch.manual_seed(seed) + random.seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + + +# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path +def _import_file(module_name, file_path, make_importable=False): + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if make_importable: + sys.modules[module_name] = module + return module + + +def _configure_libraries(): + """ + Configurations for some libraries. + """ + # An environment option to disable `import cv2` globally, + # in case it leads to negative performance impact + disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) + if disable_cv2: + sys.modules["cv2"] = None + else: + # Disable opencl in opencv since its interaction with cuda often has negative effects + # This envvar is supported after OpenCV 3.4.0 + os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" + try: + import cv2 + + if int(cv2.__version__.split(".")[0]) >= 3: + cv2.ocl.setUseOpenCL(False) + except ModuleNotFoundError: + # Other types of ImportError, if happened, should not be ignored. + # Because a failed opencv import could mess up address space + # https://github.com/skvark/opencv-python/issues/381 + pass + + def get_version(module, digit=2): + return tuple(map(int, module.__version__.split(".")[:digit])) + + # fmt: off + assert get_version(torch) >= (1, 4), "Requires torch>=1.4" + import fvcore + assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2" + import yaml + assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" + # fmt: on + + +_ENV_SETUP_DONE = False + + +def setup_environment(): + """Perform environment setup work. The default setup is a no-op, but this + function allows the user to specify a Python source file or a module in + the $DETECTRON2_ENV_MODULE environment variable, that performs + custom setup work that may be necessary to their computing environment. + """ + global _ENV_SETUP_DONE + if _ENV_SETUP_DONE: + return + _ENV_SETUP_DONE = True + + _configure_libraries() + + custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") + + if custom_module_path: + setup_custom_environment(custom_module_path) + else: + # The default setup is a no-op + pass + + +def setup_custom_environment(custom_module): + """ + Load custom environment setup by importing a Python source file or a + module, and run the setup function. + """ + if custom_module.endswith(".py"): + module = _import_file("detectron2.utils.env.custom_module", custom_module) + else: + module = importlib.import_module(custom_module) + assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( + "Custom environment module defined in {} does not have the " + "required callable attribute 'setup_environment'." + ).format(custom_module) + module.setup_environment() + + +def fixup_module_metadata(module_name, namespace, keys=None): + """ + Fix the __qualname__ of module members to be their exported api name, so + when they are referenced in docs, sphinx can find them. Reference: + https://github.com/python-trio/trio/blob/6754c74eacfad9cc5c92d5c24727a2f3b620624e/trio/_util.py#L216-L241 + """ + if not DOC_BUILDING: + return + seen_ids = set() + + def fix_one(qualname, name, obj): + # avoid infinite recursion (relevant when using + # typing.Generic, for example) + if id(obj) in seen_ids: + return + seen_ids.add(id(obj)) + + mod = getattr(obj, "__module__", None) + if mod is not None and (mod.startswith(module_name) or mod.startswith("fvcore.")): + obj.__module__ = module_name + # Modules, unlike everything else in Python, put fully-qualitied + # names into their __name__ attribute. We check for "." to avoid + # rewriting these. + if hasattr(obj, "__name__") and "." not in obj.__name__: + obj.__name__ = name + obj.__qualname__ = qualname + if isinstance(obj, type): + for attr_name, attr_value in obj.__dict__.items(): + fix_one(objname + "." + attr_name, attr_name, attr_value) + + if keys is None: + keys = namespace.keys() + for objname in keys: + if not objname.startswith("_"): + obj = namespace[objname] + fix_one(objname, objname, obj) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/events.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/events.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a68b6b5b90cdef1ccdaffa4eb2225f3ab21e29 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/events.py @@ -0,0 +1,534 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import datetime +import json +import logging +import os +import time +from collections import defaultdict +from contextlib import contextmanager +from typing import Optional +import torch +from fvcore.common.history_buffer import HistoryBuffer + +from annotator.oneformer.detectron2.utils.file_io import PathManager + +__all__ = [ + "get_event_storage", + "JSONWriter", + "TensorboardXWriter", + "CommonMetricPrinter", + "EventStorage", +] + +_CURRENT_STORAGE_STACK = [] + + +def get_event_storage(): + """ + Returns: + The :class:`EventStorage` object that's currently being used. + Throws an error if no :class:`EventStorage` is currently enabled. + """ + assert len( + _CURRENT_STORAGE_STACK + ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" + return _CURRENT_STORAGE_STACK[-1] + + +class EventWriter: + """ + Base class for writers that obtain events from :class:`EventStorage` and process them. + """ + + def write(self): + raise NotImplementedError + + def close(self): + pass + + +class JSONWriter(EventWriter): + """ + Write scalars to a json file. + + It saves scalars as one json per line (instead of a big json) for easy parsing. + + Examples parsing such a json file: + :: + $ cat metrics.json | jq -s '.[0:2]' + [ + { + "data_time": 0.008433341979980469, + "iteration": 19, + "loss": 1.9228371381759644, + "loss_box_reg": 0.050025828182697296, + "loss_classifier": 0.5316952466964722, + "loss_mask": 0.7236229181289673, + "loss_rpn_box": 0.0856662318110466, + "loss_rpn_cls": 0.48198649287223816, + "lr": 0.007173333333333333, + "time": 0.25401854515075684 + }, + { + "data_time": 0.007216215133666992, + "iteration": 39, + "loss": 1.282649278640747, + "loss_box_reg": 0.06222952902317047, + "loss_classifier": 0.30682939291000366, + "loss_mask": 0.6970193982124329, + "loss_rpn_box": 0.038663312792778015, + "loss_rpn_cls": 0.1471673548221588, + "lr": 0.007706666666666667, + "time": 0.2490077018737793 + } + ] + + $ cat metrics.json | jq '.loss_mask' + 0.7126231789588928 + 0.689423680305481 + 0.6776131987571716 + ... + + """ + + def __init__(self, json_file, window_size=20): + """ + Args: + json_file (str): path to the json file. New data will be appended if the file exists. + window_size (int): the window size of median smoothing for the scalars whose + `smoothing_hint` are True. + """ + self._file_handle = PathManager.open(json_file, "a") + self._window_size = window_size + self._last_write = -1 + + def write(self): + storage = get_event_storage() + to_save = defaultdict(dict) + + for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): + # keep scalars that have not been written + if iter <= self._last_write: + continue + to_save[iter][k] = v + if len(to_save): + all_iters = sorted(to_save.keys()) + self._last_write = max(all_iters) + + for itr, scalars_per_iter in to_save.items(): + scalars_per_iter["iteration"] = itr + self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") + self._file_handle.flush() + try: + os.fsync(self._file_handle.fileno()) + except AttributeError: + pass + + def close(self): + self._file_handle.close() + + +class TensorboardXWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self, log_dir: str, window_size: int = 20, **kwargs): + """ + Args: + log_dir (str): the directory to save the output events + window_size (int): the scalars will be median-smoothed by this window size + + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._window_size = window_size + from torch.utils.tensorboard import SummaryWriter + + self._writer = SummaryWriter(log_dir, **kwargs) + self._last_write = -1 + + def write(self): + storage = get_event_storage() + new_last_write = self._last_write + for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): + if iter > self._last_write: + self._writer.add_scalar(k, v, iter) + new_last_write = max(new_last_write, iter) + self._last_write = new_last_write + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + for img_name, img, step_num in storage._vis_data: + self._writer.add_image(img_name, img, step_num) + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + for params in storage._histograms: + self._writer.add_histogram_raw(**params) + storage.clear_histograms() + + def close(self): + if hasattr(self, "_writer"): # doesn't exist when the code fails at import + self._writer.close() + + +class CommonMetricPrinter(EventWriter): + """ + Print **common** metrics to the terminal, including + iteration time, ETA, memory, all losses, and the learning rate. + It also applies smoothing using a window of 20 elements. + + It's meant to print common metrics in common ways. + To print something in more customized ways, please implement a similar printer by yourself. + """ + + def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): + """ + Args: + max_iter: the maximum number of iterations to train. + Used to compute ETA. If not given, ETA will not be printed. + window_size (int): the losses will be median-smoothed by this window size + """ + self.logger = logging.getLogger(__name__) + self._max_iter = max_iter + self._window_size = window_size + self._last_write = None # (step, time) of last call to write(). Used to compute ETA + + def _get_eta(self, storage) -> Optional[str]: + if self._max_iter is None: + return "" + iteration = storage.iter + try: + eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1) + storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) + return str(datetime.timedelta(seconds=int(eta_seconds))) + except KeyError: + # estimate eta on our own - more noisy + eta_string = None + if self._last_write is not None: + estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( + iteration - self._last_write[0] + ) + eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + self._last_write = (iteration, time.perf_counter()) + return eta_string + + def write(self): + storage = get_event_storage() + iteration = storage.iter + if iteration == self._max_iter: + # This hook only reports training progress (loss, ETA, etc) but not other data, + # therefore do not write anything after training succeeds, even if this method + # is called. + return + + try: + avg_data_time = storage.history("data_time").avg( + storage.count_samples("data_time", self._window_size) + ) + last_data_time = storage.history("data_time").latest() + except KeyError: + # they may not exist in the first few iterations (due to warmup) + # or when SimpleTrainer is not used + avg_data_time = None + last_data_time = None + try: + avg_iter_time = storage.history("time").global_avg() + last_iter_time = storage.history("time").latest() + except KeyError: + avg_iter_time = None + last_iter_time = None + try: + lr = "{:.5g}".format(storage.history("lr").latest()) + except KeyError: + lr = "N/A" + + eta_string = self._get_eta(storage) + + if torch.cuda.is_available(): + max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + else: + max_mem_mb = None + + # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" + self.logger.info( + str.format( + " {eta}iter: {iter} {losses} {non_losses} {avg_time}{last_time}" + + "{avg_data_time}{last_data_time} lr: {lr} {memory}", + eta=f"eta: {eta_string} " if eta_string else "", + iter=iteration, + losses=" ".join( + [ + "{}: {:.4g}".format( + k, v.median(storage.count_samples(k, self._window_size)) + ) + for k, v in storage.histories().items() + if "loss" in k + ] + ), + non_losses=" ".join( + [ + "{}: {:.4g}".format( + k, v.median(storage.count_samples(k, self._window_size)) + ) + for k, v in storage.histories().items() + if "[metric]" in k + ] + ), + avg_time="time: {:.4f} ".format(avg_iter_time) + if avg_iter_time is not None + else "", + last_time="last_time: {:.4f} ".format(last_iter_time) + if last_iter_time is not None + else "", + avg_data_time="data_time: {:.4f} ".format(avg_data_time) + if avg_data_time is not None + else "", + last_data_time="last_data_time: {:.4f} ".format(last_data_time) + if last_data_time is not None + else "", + lr=lr, + memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "", + ) + ) + + +class EventStorage: + """ + The user-facing class that provides metric storage functionalities. + + In the future we may add support for storing / logging other types of data if needed. + """ + + def __init__(self, start_iter=0): + """ + Args: + start_iter (int): the iteration number to start with + """ + self._history = defaultdict(HistoryBuffer) + self._smoothing_hints = {} + self._latest_scalars = {} + self._iter = start_iter + self._current_prefix = "" + self._vis_data = [] + self._histograms = [] + + def put_image(self, img_name, img_tensor): + """ + Add an `img_tensor` associated with `img_name`, to be shown on + tensorboard. + + Args: + img_name (str): The name of the image to put into tensorboard. + img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` + Tensor of shape `[channel, height, width]` where `channel` is + 3. The image format should be RGB. The elements in img_tensor + can either have values in [0, 1] (float32) or [0, 255] (uint8). + The `img_tensor` will be visualized in tensorboard. + """ + self._vis_data.append((img_name, img_tensor, self._iter)) + + def put_scalar(self, name, value, smoothing_hint=True): + """ + Add a scalar `value` to the `HistoryBuffer` associated with `name`. + + Args: + smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be + smoothed when logged. The hint will be accessible through + :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint + and apply custom smoothing rule. + + It defaults to True because most scalars we save need to be smoothed to + provide any useful signal. + """ + name = self._current_prefix + name + history = self._history[name] + value = float(value) + history.update(value, self._iter) + self._latest_scalars[name] = (value, self._iter) + + existing_hint = self._smoothing_hints.get(name) + if existing_hint is not None: + assert ( + existing_hint == smoothing_hint + ), "Scalar {} was put with a different smoothing_hint!".format(name) + else: + self._smoothing_hints[name] = smoothing_hint + + def put_scalars(self, *, smoothing_hint=True, **kwargs): + """ + Put multiple scalars from keyword arguments. + + Examples: + + storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) + """ + for k, v in kwargs.items(): + self.put_scalar(k, v, smoothing_hint=smoothing_hint) + + def put_histogram(self, hist_name, hist_tensor, bins=1000): + """ + Create a histogram from a tensor. + + Args: + hist_name (str): The name of the histogram to put into tensorboard. + hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted + into a histogram. + bins (int): Number of histogram bins. + """ + ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() + + # Create a histogram with PyTorch + hist_counts = torch.histc(hist_tensor, bins=bins) + hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) + + # Parameter for the add_histogram_raw function of SummaryWriter + hist_params = dict( + tag=hist_name, + min=ht_min, + max=ht_max, + num=len(hist_tensor), + sum=float(hist_tensor.sum()), + sum_squares=float(torch.sum(hist_tensor**2)), + bucket_limits=hist_edges[1:].tolist(), + bucket_counts=hist_counts.tolist(), + global_step=self._iter, + ) + self._histograms.append(hist_params) + + def history(self, name): + """ + Returns: + HistoryBuffer: the scalar history for name + """ + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + return ret + + def histories(self): + """ + Returns: + dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars + """ + return self._history + + def latest(self): + """ + Returns: + dict[str -> (float, int)]: mapping from the name of each scalar to the most + recent value and the iteration number its added. + """ + return self._latest_scalars + + def latest_with_smoothing_hint(self, window_size=20): + """ + Similar to :meth:`latest`, but the returned values + are either the un-smoothed original latest value, + or a median of the given window_size, + depend on whether the smoothing_hint is True. + + This provides a default behavior that other writers can use. + + Note: All scalars saved in the past `window_size` iterations are used for smoothing. + This is different from the `window_size` definition in HistoryBuffer. + Use :meth:`get_history_window_size` to get the `window_size` used in HistoryBuffer. + """ + result = {} + for k, (v, itr) in self._latest_scalars.items(): + result[k] = ( + self._history[k].median(self.count_samples(k, window_size)) + if self._smoothing_hints[k] + else v, + itr, + ) + return result + + def count_samples(self, name, window_size=20): + """ + Return the number of samples logged in the past `window_size` iterations. + """ + samples = 0 + data = self._history[name].values() + for _, iter_ in reversed(data): + if iter_ > data[-1][1] - window_size: + samples += 1 + else: + break + return samples + + def smoothing_hints(self): + """ + Returns: + dict[name -> bool]: the user-provided hint on whether the scalar + is noisy and needs smoothing. + """ + return self._smoothing_hints + + def step(self): + """ + User should either: (1) Call this function to increment storage.iter when needed. Or + (2) Set `storage.iter` to the correct iteration number before each iteration. + + The storage will then be able to associate the new data with an iteration number. + """ + self._iter += 1 + + @property + def iter(self): + """ + Returns: + int: The current iteration number. When used together with a trainer, + this is ensured to be the same as trainer.iter. + """ + return self._iter + + @iter.setter + def iter(self, val): + self._iter = int(val) + + @property + def iteration(self): + # for backward compatibility + return self._iter + + def __enter__(self): + _CURRENT_STORAGE_STACK.append(self) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + assert _CURRENT_STORAGE_STACK[-1] == self + _CURRENT_STORAGE_STACK.pop() + + @contextmanager + def name_scope(self, name): + """ + Yields: + A context within which all the events added to this storage + will be prefixed by the name scope. + """ + old_prefix = self._current_prefix + self._current_prefix = name.rstrip("/") + "/" + yield + self._current_prefix = old_prefix + + def clear_images(self): + """ + Delete all the stored images for visualization. This should be called + after images are written to tensorboard. + """ + self._vis_data = [] + + def clear_histograms(self): + """ + Delete all the stored histograms for visualization. + This should be called after histograms are written to tensorboard. + """ + self._histograms = [] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/file_io.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/file_io.py new file mode 100644 index 0000000000000000000000000000000000000000..09f7dffdb36199350bba57bd3b4e9e8babb40594 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/file_io.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from iopath.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler +from iopath.common.file_io import PathManager as PathManagerBase + +__all__ = ["PathManager", "PathHandler"] + + +PathManager = PathManagerBase() +""" +This is a detectron2 project-specific PathManager. +We try to stay away from global PathManager in fvcore as it +introduces potential conflicts among other libraries. +""" + + +class Detectron2Handler(PathHandler): + """ + Resolve anything that's hosted under detectron2's namespace. + """ + + PREFIX = "detectron2://" + S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + + def _get_supported_prefixes(self): + return [self.PREFIX] + + def _get_local_path(self, path, **kwargs): + name = path[len(self.PREFIX) :] + return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name, **kwargs) + + def _open(self, path, mode="r", **kwargs): + return PathManager.open( + self.S3_DETECTRON2_PREFIX + path[len(self.PREFIX) :], mode, **kwargs + ) + + +PathManager.register_handler(HTTPURLHandler()) +PathManager.register_handler(OneDrivePathHandler()) +PathManager.register_handler(Detectron2Handler()) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/logger.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..d77d42cbe86366e5d91e93311f92bb166c304184 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/logger.py @@ -0,0 +1,237 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import atexit +import functools +import logging +import os +import sys +import time +from collections import Counter +import torch +from tabulate import tabulate +from termcolor import colored + +from annotator.oneformer.detectron2.utils.file_io import PathManager + +__all__ = ["setup_logger", "log_first_n", "log_every_n", "log_every_n_seconds"] + + +class _ColorfulFormatter(logging.Formatter): + def __init__(self, *args, **kwargs): + self._root_name = kwargs.pop("root_name") + "." + self._abbrev_name = kwargs.pop("abbrev_name", "") + if len(self._abbrev_name): + self._abbrev_name = self._abbrev_name + "." + super(_ColorfulFormatter, self).__init__(*args, **kwargs) + + def formatMessage(self, record): + record.name = record.name.replace(self._root_name, self._abbrev_name) + log = super(_ColorfulFormatter, self).formatMessage(record) + if record.levelno == logging.WARNING: + prefix = colored("WARNING", "red", attrs=["blink"]) + elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: + prefix = colored("ERROR", "red", attrs=["blink", "underline"]) + else: + return log + return prefix + " " + log + + +@functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers +def setup_logger( + output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None +): + """ + Initialize the detectron2 logger and set its verbosity level to "DEBUG". + + Args: + output (str): a file name or a directory to save log. If None, will not save log file. + If ends with ".txt" or ".log", assumed to be a file name. + Otherwise, logs will be saved to `output/log.txt`. + name (str): the root module name of this logger + abbrev_name (str): an abbreviation of the module, to avoid long names in logs. + Set to "" to not log the root module in logs. + By default, will abbreviate "detectron2" to "d2" and leave other + modules unchanged. + + Returns: + logging.Logger: a logger + """ + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = False + + if abbrev_name is None: + abbrev_name = "d2" if name == "detectron2" else name + + plain_formatter = logging.Formatter( + "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S" + ) + # stdout logging: master only + if distributed_rank == 0: + ch = logging.StreamHandler(stream=sys.stdout) + ch.setLevel(logging.DEBUG) + if color: + formatter = _ColorfulFormatter( + colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", + datefmt="%m/%d %H:%M:%S", + root_name=name, + abbrev_name=str(abbrev_name), + ) + else: + formatter = plain_formatter + ch.setFormatter(formatter) + logger.addHandler(ch) + + # file logging: all workers + if output is not None: + if output.endswith(".txt") or output.endswith(".log"): + filename = output + else: + filename = os.path.join(output, "log.txt") + if distributed_rank > 0: + filename = filename + ".rank{}".format(distributed_rank) + PathManager.mkdirs(os.path.dirname(filename)) + + fh = logging.StreamHandler(_cached_log_stream(filename)) + fh.setLevel(logging.DEBUG) + fh.setFormatter(plain_formatter) + logger.addHandler(fh) + + return logger + + +# cache the opened file object, so that different calls to `setup_logger` +# with the same file name can safely write to the same file. +@functools.lru_cache(maxsize=None) +def _cached_log_stream(filename): + # use 1K buffer if writing to cloud storage + io = PathManager.open(filename, "a", buffering=1024 if "://" in filename else -1) + atexit.register(io.close) + return io + + +""" +Below are some other convenient logging methods. +They are mainly adopted from +https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py +""" + + +def _find_caller(): + """ + Returns: + str: module name of the caller + tuple: a hashable key to be used to identify different callers + """ + frame = sys._getframe(2) + while frame: + code = frame.f_code + if os.path.join("utils", "logger.") not in code.co_filename: + mod_name = frame.f_globals["__name__"] + if mod_name == "__main__": + mod_name = "detectron2" + return mod_name, (code.co_filename, frame.f_lineno, code.co_name) + frame = frame.f_back + + +_LOG_COUNTER = Counter() +_LOG_TIMER = {} + + +def log_first_n(lvl, msg, n=1, *, name=None, key="caller"): + """ + Log only for the first n times. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + key (str or tuple[str]): the string(s) can be one of "caller" or + "message", which defines how to identify duplicated logs. + For example, if called with `n=1, key="caller"`, this function + will only log the first call from the same caller, regardless of + the message content. + If called with `n=1, key="message"`, this function will log the + same content only once, even if they are called from different places. + If called with `n=1, key=("caller", "message")`, this function + will not log only if the same caller has logged the same message before. + """ + if isinstance(key, str): + key = (key,) + assert len(key) > 0 + + caller_module, caller_key = _find_caller() + hash_key = () + if "caller" in key: + hash_key = hash_key + caller_key + if "message" in key: + hash_key = hash_key + (msg,) + + _LOG_COUNTER[hash_key] += 1 + if _LOG_COUNTER[hash_key] <= n: + logging.getLogger(name or caller_module).log(lvl, msg) + + +def log_every_n(lvl, msg, n=1, *, name=None): + """ + Log once per n times. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + """ + caller_module, key = _find_caller() + _LOG_COUNTER[key] += 1 + if n == 1 or _LOG_COUNTER[key] % n == 1: + logging.getLogger(name or caller_module).log(lvl, msg) + + +def log_every_n_seconds(lvl, msg, n=1, *, name=None): + """ + Log no more than once per n seconds. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + """ + caller_module, key = _find_caller() + last_logged = _LOG_TIMER.get(key, None) + current_time = time.time() + if last_logged is None or current_time - last_logged >= n: + logging.getLogger(name or caller_module).log(lvl, msg) + _LOG_TIMER[key] = current_time + + +def create_small_table(small_dict): + """ + Create a small table using the keys of small_dict as headers. This is only + suitable for small dictionaries. + + Args: + small_dict (dict): a result dictionary of only a few items. + + Returns: + str: the table as a string. + """ + keys, values = tuple(zip(*small_dict.items())) + table = tabulate( + [values], + headers=keys, + tablefmt="pipe", + floatfmt=".3f", + stralign="center", + numalign="center", + ) + return table + + +def _log_api_usage(identifier: str): + """ + Internal function used to log the usage of different detectron2 components + inside facebook's infra. + """ + torch._C._log_api_usage_once("detectron2." + identifier) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/memory.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..bd494780b9dbbd1571688cd270bb9b53d113c13e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/memory.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +import logging +from contextlib import contextmanager +from functools import wraps +import torch + +__all__ = ["retry_if_cuda_oom"] + + +@contextmanager +def _ignore_torch_cuda_oom(): + """ + A context which ignores CUDA OOM exception from pytorch. + """ + try: + yield + except RuntimeError as e: + # NOTE: the string may change? + if "CUDA out of memory. " in str(e): + pass + else: + raise + + +def retry_if_cuda_oom(func): + """ + Makes a function retry itself after encountering + pytorch's CUDA OOM error. + It will first retry after calling `torch.cuda.empty_cache()`. + + If that still fails, it will then retry by trying to convert inputs to CPUs. + In this case, it expects the function to dispatch to CPU implementation. + The return values may become CPU tensors as well and it's user's + responsibility to convert it back to CUDA tensor if needed. + + Args: + func: a stateless callable that takes tensor-like objects as arguments + + Returns: + a callable which retries `func` if OOM is encountered. + + Examples: + :: + output = retry_if_cuda_oom(some_torch_function)(input1, input2) + # output may be on CPU even if inputs are on GPU + + Note: + 1. When converting inputs to CPU, it will only look at each argument and check + if it has `.device` and `.to` for conversion. Nested structures of tensors + are not supported. + + 2. Since the function might be called more than once, it has to be + stateless. + """ + + def maybe_to_cpu(x): + try: + like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") + except AttributeError: + like_gpu_tensor = False + if like_gpu_tensor: + return x.to(device="cpu") + else: + return x + + @wraps(func) + def wrapped(*args, **kwargs): + with _ignore_torch_cuda_oom(): + return func(*args, **kwargs) + + # Clear cache and retry + torch.cuda.empty_cache() + with _ignore_torch_cuda_oom(): + return func(*args, **kwargs) + + # Try on CPU. This slows down the code significantly, therefore print a notice. + logger = logging.getLogger(__name__) + logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) + new_args = (maybe_to_cpu(x) for x in args) + new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} + return func(*new_args, **new_kwargs) + + return wrapped diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/registry.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..4b01e9007c2578a7b5ae555c926cc06c8a3010f9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/registry.py @@ -0,0 +1,60 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +from typing import Any +import pydoc +from fvcore.common.registry import Registry # for backward compatibility. + +""" +``Registry`` and `locate` provide ways to map a string (typically found +in config files) to callable objects. +""" + +__all__ = ["Registry", "locate"] + + +def _convert_target_to_string(t: Any) -> str: + """ + Inverse of ``locate()``. + + Args: + t: any object with ``__module__`` and ``__qualname__`` + """ + module, qualname = t.__module__, t.__qualname__ + + # Compress the path to this object, e.g. ``module.submodule._impl.class`` + # may become ``module.submodule.class``, if the later also resolves to the same + # object. This simplifies the string, and also is less affected by moving the + # class implementation. + module_parts = module.split(".") + for k in range(1, len(module_parts)): + prefix = ".".join(module_parts[:k]) + candidate = f"{prefix}.{qualname}" + try: + if locate(candidate) is t: + return candidate + except ImportError: + pass + return f"{module}.{qualname}" + + +def locate(name: str) -> Any: + """ + Locate and return an object ``x`` using an input string ``{x.__module__}.{x.__qualname__}``, + such as "module.submodule.class_name". + + Raise Exception if it cannot be found. + """ + obj = pydoc.locate(name) + + # Some cases (e.g. torch.optim.sgd.SGD) not handled correctly + # by pydoc.locate. Try a private function from hydra. + if obj is None: + try: + # from hydra.utils import get_method - will print many errors + from hydra.utils import _locate + except ImportError as e: + raise ImportError(f"Cannot dynamically locate object {name}!") from e + else: + obj = _locate(name) # it raises if fails + + return obj diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/serialize.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/serialize.py new file mode 100644 index 0000000000000000000000000000000000000000..ed45065184f0512ef65c8f38d398de553ce576ca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/serialize.py @@ -0,0 +1,32 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# import cloudpickle + + +class PicklableWrapper(object): + """ + Wrap an object to make it more picklable, note that it uses + heavy weight serialization libraries that are slower than pickle. + It's best to use it only on closures (which are usually not picklable). + + This is a simplified version of + https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py + """ + + def __init__(self, obj): + while isinstance(obj, PicklableWrapper): + # Wrapping an object twice is no-op + obj = obj._obj + self._obj = obj + + # def __reduce__(self): + # s = cloudpickle.dumps(self._obj) + # return cloudpickle.loads, (s,) + + def __call__(self, *args, **kwargs): + return self._obj(*args, **kwargs) + + def __getattr__(self, attr): + # Ensure that the wrapped object can be used seamlessly as the previous object. + if attr not in ["_obj"]: + return getattr(self._obj, attr) + return getattr(self, attr) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/testing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..3c3f001a260c3df20f610f0336678d505fdce5aa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/testing.py @@ -0,0 +1,478 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import io +import numpy as np +import os +import re +import tempfile +import unittest +from typing import Callable +import torch +import torch.onnx.symbolic_helper as sym_help +from packaging import version +from torch._C import ListType +from torch.onnx import register_custom_op_symbolic + +from annotator.oneformer.detectron2 import model_zoo +from annotator.oneformer.detectron2.config import CfgNode, LazyConfig, instantiate +from annotator.oneformer.detectron2.data import DatasetCatalog +from annotator.oneformer.detectron2.data.detection_utils import read_image +from annotator.oneformer.detectron2.modeling import build_model +from annotator.oneformer.detectron2.structures import Boxes, Instances, ROIMasks +from annotator.oneformer.detectron2.utils.file_io import PathManager + + +""" +Internal utilities for tests. Don't use except for writing tests. +""" + + +def get_model_no_weights(config_path): + """ + Like model_zoo.get, but do not load any weights (even pretrained) + """ + cfg = model_zoo.get_config(config_path) + if isinstance(cfg, CfgNode): + if not torch.cuda.is_available(): + cfg.MODEL.DEVICE = "cpu" + return build_model(cfg) + else: + return instantiate(cfg.model) + + +def random_boxes(num_boxes, max_coord=100, device="cpu"): + """ + Create a random Nx4 boxes tensor, with coordinates < max_coord. + """ + boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5) + boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression + # Note: the implementation of this function in torchvision is: + # boxes[:, 2:] += torch.rand(N, 2) * 100 + # but it does not guarantee non-negative widths/heights constraints: + # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: + boxes[:, 2:] += boxes[:, :2] + return boxes + + +def get_sample_coco_image(tensor=True): + """ + Args: + tensor (bool): if True, returns 3xHxW tensor. + else, returns a HxWx3 numpy array. + + Returns: + an image, in BGR color. + """ + try: + file_name = DatasetCatalog.get("coco_2017_val_100")[0]["file_name"] + if not PathManager.exists(file_name): + raise FileNotFoundError() + except IOError: + # for public CI to run + file_name = PathManager.get_local_path( + "http://images.cocodataset.org/train2017/000000000009.jpg" + ) + ret = read_image(file_name, format="BGR") + if tensor: + ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1))) + return ret + + +def convert_scripted_instances(instances): + """ + Convert a scripted Instances object to a regular :class:`Instances` object + """ + assert hasattr( + instances, "image_size" + ), f"Expect an Instances object, but got {type(instances)}!" + ret = Instances(instances.image_size) + for name in instances._field_names: + val = getattr(instances, "_" + name, None) + if val is not None: + ret.set(name, val) + return ret + + +def assert_instances_allclose(input, other, *, rtol=1e-5, msg="", size_as_tensor=False): + """ + Args: + input, other (Instances): + size_as_tensor: compare image_size of the Instances as tensors (instead of tuples). + Useful for comparing outputs of tracing. + """ + if not isinstance(input, Instances): + input = convert_scripted_instances(input) + if not isinstance(other, Instances): + other = convert_scripted_instances(other) + + if not msg: + msg = "Two Instances are different! " + else: + msg = msg.rstrip() + " " + + size_error_msg = msg + f"image_size is {input.image_size} vs. {other.image_size}!" + if size_as_tensor: + assert torch.equal( + torch.tensor(input.image_size), torch.tensor(other.image_size) + ), size_error_msg + else: + assert input.image_size == other.image_size, size_error_msg + fields = sorted(input.get_fields().keys()) + fields_other = sorted(other.get_fields().keys()) + assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!" + + for f in fields: + val1, val2 = input.get(f), other.get(f) + if isinstance(val1, (Boxes, ROIMasks)): + # boxes in the range of O(100) and can have a larger tolerance + assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), ( + msg + f"Field {f} differs too much!" + ) + elif isinstance(val1, torch.Tensor): + if val1.dtype.is_floating_point: + mag = torch.abs(val1).max().cpu().item() + assert torch.allclose(val1, val2, atol=mag * rtol), ( + msg + f"Field {f} differs too much!" + ) + else: + assert torch.equal(val1, val2), msg + f"Field {f} is different!" + else: + raise ValueError(f"Don't know how to compare type {type(val1)}") + + +def reload_script_model(module): + """ + Save a jit module and load it back. + Similar to the `getExportImportCopy` function in torch/testing/ + """ + buffer = io.BytesIO() + torch.jit.save(module, buffer) + buffer.seek(0) + return torch.jit.load(buffer) + + +def reload_lazy_config(cfg): + """ + Save an object by LazyConfig.save and load it back. + This is used to test that a config still works the same after + serialization/deserialization. + """ + with tempfile.TemporaryDirectory(prefix="detectron2") as d: + fname = os.path.join(d, "d2_cfg_test.yaml") + LazyConfig.save(cfg, fname) + return LazyConfig.load(fname) + + +def min_torch_version(min_version: str) -> bool: + """ + Returns True when torch's version is at least `min_version`. + """ + try: + import torch + except ImportError: + return False + + installed_version = version.parse(torch.__version__.split("+")[0]) + min_version = version.parse(min_version) + return installed_version >= min_version + + +def has_dynamic_axes(onnx_model): + """ + Return True when all ONNX input/output have only dynamic axes for all ranks + """ + return all( + not dim.dim_param.isnumeric() + for inp in onnx_model.graph.input + for dim in inp.type.tensor_type.shape.dim + ) and all( + not dim.dim_param.isnumeric() + for out in onnx_model.graph.output + for dim in out.type.tensor_type.shape.dim + ) + + +def register_custom_op_onnx_export( + opname: str, symbolic_fn: Callable, opset_version: int, min_version: str +) -> None: + """ + Register `symbolic_fn` as PyTorch's symbolic `opname`-`opset_version` for ONNX export. + The registration is performed only when current PyTorch's version is < `min_version.` + IMPORTANT: symbolic must be manually unregistered after the caller function returns + """ + if min_torch_version(min_version): + return + register_custom_op_symbolic(opname, symbolic_fn, opset_version) + print(f"_register_custom_op_onnx_export({opname}, {opset_version}) succeeded.") + + +def unregister_custom_op_onnx_export(opname: str, opset_version: int, min_version: str) -> None: + """ + Unregister PyTorch's symbolic `opname`-`opset_version` for ONNX export. + The un-registration is performed only when PyTorch's version is < `min_version` + IMPORTANT: The symbolic must have been manually registered by the caller, otherwise + the incorrect symbolic may be unregistered instead. + """ + + # TODO: _unregister_custom_op_symbolic is introduced PyTorch>=1.10 + # Remove after PyTorch 1.10+ is used by ALL detectron2's CI + try: + from torch.onnx import unregister_custom_op_symbolic as _unregister_custom_op_symbolic + except ImportError: + + def _unregister_custom_op_symbolic(symbolic_name, opset_version): + import torch.onnx.symbolic_registry as sym_registry + from torch.onnx.symbolic_helper import _onnx_main_opset, _onnx_stable_opsets + + def _get_ns_op_name_from_custom_op(symbolic_name): + try: + from torch.onnx.utils import get_ns_op_name_from_custom_op + + ns, op_name = get_ns_op_name_from_custom_op(symbolic_name) + except ImportError as import_error: + if not bool( + re.match(r"^[a-zA-Z0-9-_]*::[a-zA-Z-_]+[a-zA-Z0-9-_]*$", symbolic_name) + ): + raise ValueError( + f"Invalid symbolic name {symbolic_name}. Must be `domain::name`" + ) from import_error + + ns, op_name = symbolic_name.split("::") + if ns == "onnx": + raise ValueError(f"{ns} domain cannot be modified.") from import_error + + if ns == "aten": + ns = "" + + return ns, op_name + + def _unregister_op(opname: str, domain: str, version: int): + try: + sym_registry.unregister_op(op_name, ns, ver) + except AttributeError as attribute_error: + if sym_registry.is_registered_op(opname, domain, version): + del sym_registry._registry[(domain, version)][opname] + if not sym_registry._registry[(domain, version)]: + del sym_registry._registry[(domain, version)] + else: + raise RuntimeError( + f"The opname {opname} is not registered." + ) from attribute_error + + ns, op_name = _get_ns_op_name_from_custom_op(symbolic_name) + for ver in _onnx_stable_opsets + [_onnx_main_opset]: + if ver >= opset_version: + _unregister_op(op_name, ns, ver) + + if min_torch_version(min_version): + return + _unregister_custom_op_symbolic(opname, opset_version) + print(f"_unregister_custom_op_onnx_export({opname}, {opset_version}) succeeded.") + + +skipIfOnCPUCI = unittest.skipIf( + os.environ.get("CI") and not torch.cuda.is_available(), + "The test is too slow on CPUs and will be executed on CircleCI's GPU jobs.", +) + + +def skipIfUnsupportedMinOpsetVersion(min_opset_version, current_opset_version=None): + """ + Skips tests for ONNX Opset versions older than min_opset_version. + """ + + def skip_dec(func): + def wrapper(self): + try: + opset_version = self.opset_version + except AttributeError: + opset_version = current_opset_version + if opset_version < min_opset_version: + raise unittest.SkipTest( + f"Unsupported opset_version {opset_version}" + f", required is {min_opset_version}" + ) + return func(self) + + return wrapper + + return skip_dec + + +def skipIfUnsupportedMinTorchVersion(min_version): + """ + Skips tests for PyTorch versions older than min_version. + """ + reason = f"module 'torch' has __version__ {torch.__version__}" f", required is: {min_version}" + return unittest.skipIf(not min_torch_version(min_version), reason) + + +# TODO: Remove after PyTorch 1.11.1+ is used by detectron2's CI +def _pytorch1111_symbolic_opset9_to(g, self, *args): + """aten::to() symbolic that must be used for testing with PyTorch < 1.11.1.""" + + def is_aten_to_device_only(args): + if len(args) == 4: + # aten::to(Tensor, Device, bool, bool, memory_format) + return ( + args[0].node().kind() == "prim::device" + or args[0].type().isSubtypeOf(ListType.ofInts()) + or ( + sym_help._is_value(args[0]) + and args[0].node().kind() == "onnx::Constant" + and isinstance(args[0].node()["value"], str) + ) + ) + elif len(args) == 5: + # aten::to(Tensor, Device, ScalarType, bool, bool, memory_format) + # When dtype is None, this is a aten::to(device) call + dtype = sym_help._get_const(args[1], "i", "dtype") + return dtype is None + elif len(args) in (6, 7): + # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, memory_format) + # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, bool, memory_format) + # When dtype is None, this is a aten::to(device) call + dtype = sym_help._get_const(args[0], "i", "dtype") + return dtype is None + return False + + # ONNX doesn't have a concept of a device, so we ignore device-only casts + if is_aten_to_device_only(args): + return self + + if len(args) == 4: + # TestONNXRuntime::test_ones_bool shows args[0] of aten::to can be onnx::Constant[Tensor] + # In this case, the constant value is a tensor not int, + # so sym_help._maybe_get_const(args[0], 'i') would not work. + dtype = args[0] + if sym_help._is_value(args[0]) and args[0].node().kind() == "onnx::Constant": + tval = args[0].node()["value"] + if isinstance(tval, torch.Tensor): + if len(tval.shape) == 0: + tval = tval.item() + dtype = int(tval) + else: + dtype = tval + + if sym_help._is_value(dtype) or isinstance(dtype, torch.Tensor): + # aten::to(Tensor, Tensor, bool, bool, memory_format) + dtype = args[0].type().scalarType() + return g.op("Cast", self, to_i=sym_help.cast_pytorch_to_onnx[dtype]) + else: + # aten::to(Tensor, ScalarType, bool, bool, memory_format) + # memory_format is ignored + return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) + elif len(args) == 5: + # aten::to(Tensor, Device, ScalarType, bool, bool, memory_format) + dtype = sym_help._get_const(args[1], "i", "dtype") + # memory_format is ignored + return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) + elif len(args) == 6: + # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, memory_format) + dtype = sym_help._get_const(args[0], "i", "dtype") + # Layout, device and memory_format are ignored + return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) + elif len(args) == 7: + # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, bool, memory_format) + dtype = sym_help._get_const(args[0], "i", "dtype") + # Layout, device and memory_format are ignored + return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) + else: + return sym_help._onnx_unsupported("Unknown aten::to signature") + + +# TODO: Remove after PyTorch 1.11.1+ is used by detectron2's CI +def _pytorch1111_symbolic_opset9_repeat_interleave(g, self, repeats, dim=None, output_size=None): + + # from torch.onnx.symbolic_helper import ScalarType + from torch.onnx.symbolic_opset9 import expand, unsqueeze + + input = self + # if dim is None flatten + # By default, use the flattened input array, and return a flat output array + if sym_help._is_none(dim): + input = sym_help._reshape_helper(g, self, g.op("Constant", value_t=torch.tensor([-1]))) + dim = 0 + else: + dim = sym_help._maybe_get_scalar(dim) + + repeats_dim = sym_help._get_tensor_rank(repeats) + repeats_sizes = sym_help._get_tensor_sizes(repeats) + input_sizes = sym_help._get_tensor_sizes(input) + if repeats_dim is None: + raise RuntimeError( + "Unsupported: ONNX export of repeat_interleave for unknown " "repeats rank." + ) + if repeats_sizes is None: + raise RuntimeError( + "Unsupported: ONNX export of repeat_interleave for unknown " "repeats size." + ) + if input_sizes is None: + raise RuntimeError( + "Unsupported: ONNX export of repeat_interleave for unknown " "input size." + ) + + input_sizes_temp = input_sizes.copy() + for idx, input_size in enumerate(input_sizes): + if input_size is None: + input_sizes[idx], input_sizes_temp[idx] = 0, -1 + + # Cases where repeats is an int or single value tensor + if repeats_dim == 0 or (repeats_dim == 1 and repeats_sizes[0] == 1): + if not sym_help._is_tensor(repeats): + repeats = g.op("Constant", value_t=torch.LongTensor(repeats)) + if input_sizes[dim] == 0: + return sym_help._onnx_opset_unsupported_detailed( + "repeat_interleave", + 9, + 13, + "Unsupported along dimension with unknown input size", + ) + else: + reps = input_sizes[dim] + repeats = expand(g, repeats, g.op("Constant", value_t=torch.tensor([reps])), None) + + # Cases where repeats is a 1 dim Tensor + elif repeats_dim == 1: + if input_sizes[dim] == 0: + return sym_help._onnx_opset_unsupported_detailed( + "repeat_interleave", + 9, + 13, + "Unsupported along dimension with unknown input size", + ) + if repeats_sizes[0] is None: + return sym_help._onnx_opset_unsupported_detailed( + "repeat_interleave", 9, 13, "Unsupported for cases with dynamic repeats" + ) + assert ( + repeats_sizes[0] == input_sizes[dim] + ), "repeats must have the same size as input along dim" + reps = repeats_sizes[0] + else: + raise RuntimeError("repeats must be 0-dim or 1-dim tensor") + + final_splits = list() + r_splits = sym_help._repeat_interleave_split_helper(g, repeats, reps, 0) + if isinstance(r_splits, torch._C.Value): + r_splits = [r_splits] + i_splits = sym_help._repeat_interleave_split_helper(g, input, reps, dim) + if isinstance(i_splits, torch._C.Value): + i_splits = [i_splits] + input_sizes[dim], input_sizes_temp[dim] = -1, 1 + for idx, r_split in enumerate(r_splits): + i_split = unsqueeze(g, i_splits[idx], dim + 1) + r_concat = [ + g.op("Constant", value_t=torch.LongTensor(input_sizes_temp[: dim + 1])), + r_split, + g.op("Constant", value_t=torch.LongTensor(input_sizes_temp[dim + 1 :])), + ] + r_concat = g.op("Concat", *r_concat, axis_i=0) + i_split = expand(g, i_split, r_concat, None) + i_split = sym_help._reshape_helper( + g, + i_split, + g.op("Constant", value_t=torch.LongTensor(input_sizes)), + allowzero=0, + ) + final_splits.append(i_split) + return g.op("Concat", *final_splits, axis_i=dim) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/tracing.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/tracing.py new file mode 100644 index 0000000000000000000000000000000000000000..75661131505cee2eecd0b1c9dabcd4d7bd5453b2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/tracing.py @@ -0,0 +1,71 @@ +import inspect +import torch + +from annotator.oneformer.detectron2.utils.env import TORCH_VERSION + +try: + from torch.fx._symbolic_trace import is_fx_tracing as is_fx_tracing_current + + tracing_current_exists = True +except ImportError: + tracing_current_exists = False + +try: + from torch.fx._symbolic_trace import _orig_module_call + + tracing_legacy_exists = True +except ImportError: + tracing_legacy_exists = False + + +@torch.jit.ignore +def is_fx_tracing_legacy() -> bool: + """ + Returns a bool indicating whether torch.fx is currently symbolically tracing a module. + Can be useful for gating module logic that is incompatible with symbolic tracing. + """ + return torch.nn.Module.__call__ is not _orig_module_call + + +@torch.jit.ignore +def is_fx_tracing() -> bool: + """Returns whether execution is currently in + Torch FX tracing mode""" + if TORCH_VERSION >= (1, 10) and tracing_current_exists: + return is_fx_tracing_current() + elif tracing_legacy_exists: + return is_fx_tracing_legacy() + else: + # Can't find either current or legacy tracing indication code. + # Enabling this assert_fx_safe() call regardless of tracing status. + return False + + +@torch.jit.ignore +def assert_fx_safe(condition: bool, message: str) -> torch.Tensor: + """An FX-tracing safe version of assert. + Avoids erroneous type assertion triggering when types are masked inside + an fx.proxy.Proxy object during tracing. + Args: condition - either a boolean expression or a string representing + the condition to test. If this assert triggers an exception when tracing + due to dynamic control flow, try encasing the expression in quotation + marks and supplying it as a string.""" + # Must return a concrete tensor for compatibility with PyTorch <=1.8. + # If <=1.8 compatibility is not needed, return type can be converted to None + if not is_fx_tracing(): + try: + if isinstance(condition, str): + caller_frame = inspect.currentframe().f_back + torch._assert( + eval(condition, caller_frame.f_globals, caller_frame.f_locals), message + ) + return torch.ones(1) + else: + torch._assert(condition, message) + return torch.ones(1) + except torch.fx.proxy.TraceError as e: + print( + "Found a non-FX compatible assertion. Skipping the check. Failure is shown below" + + str(e) + ) + return torch.zeros(1) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/video_visualizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/video_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..eaedfc4892ef6e34e9233e24fa06f0f24d4d0096 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/video_visualizer.py @@ -0,0 +1,287 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import numpy as np +from typing import List +import annotator.oneformer.pycocotools.mask as mask_util + +from annotator.oneformer.detectron2.structures import Instances +from annotator.oneformer.detectron2.utils.visualizer import ( + ColorMode, + Visualizer, + _create_text_labels, + _PanopticPrediction, +) + +from .colormap import random_color, random_colors + + +class _DetectedInstance: + """ + Used to store data about detected objects in video frame, + in order to transfer color to objects in the future frames. + + Attributes: + label (int): + bbox (tuple[float]): + mask_rle (dict): + color (tuple[float]): RGB colors in range (0, 1) + ttl (int): time-to-live for the instance. For example, if ttl=2, + the instance color can be transferred to objects in the next two frames. + """ + + __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"] + + def __init__(self, label, bbox, mask_rle, color, ttl): + self.label = label + self.bbox = bbox + self.mask_rle = mask_rle + self.color = color + self.ttl = ttl + + +class VideoVisualizer: + def __init__(self, metadata, instance_mode=ColorMode.IMAGE): + """ + Args: + metadata (MetadataCatalog): image metadata. + """ + self.metadata = metadata + self._old_instances = [] + assert instance_mode in [ + ColorMode.IMAGE, + ColorMode.IMAGE_BW, + ], "Other mode not supported yet." + self._instance_mode = instance_mode + self._max_num_instances = self.metadata.get("max_num_instances", 74) + self._assigned_colors = {} + self._color_pool = random_colors(self._max_num_instances, rgb=True, maximum=1) + self._color_idx_set = set(range(len(self._color_pool))) + + def draw_instance_predictions(self, frame, predictions): + """ + Draw instance-level prediction results on an image. + + Args: + frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + + Returns: + output (VisImage): image object with visualizations. + """ + frame_visualizer = Visualizer(frame, self.metadata) + num_instances = len(predictions) + if num_instances == 0: + return frame_visualizer.output + + boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + colors = predictions.COLOR if predictions.has("COLOR") else [None] * len(predictions) + periods = predictions.ID_period if predictions.has("ID_period") else None + period_threshold = self.metadata.get("period_threshold", 0) + visibilities = ( + [True] * len(predictions) + if periods is None + else [x > period_threshold for x in periods] + ) + + if predictions.has("pred_masks"): + masks = predictions.pred_masks + # mask IOU is not yet enabled + # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) + # assert len(masks_rles) == num_instances + else: + masks = None + + if not predictions.has("COLOR"): + if predictions.has("ID"): + colors = self._assign_colors_by_id(predictions) + else: + # ToDo: clean old assign color method and use a default tracker to assign id + detected = [ + _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=colors[i], ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + + labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) + + if self._instance_mode == ColorMode.IMAGE_BW: + # any() returns uint8 tensor + frame_visualizer.output.reset_image( + frame_visualizer._create_grayscale_image( + (masks.any(dim=0) > 0).numpy() if masks is not None else None + ) + ) + alpha = 0.3 + else: + alpha = 0.5 + + labels = ( + None + if labels is None + else [y[0] for y in filter(lambda x: x[1], zip(labels, visibilities))] + ) # noqa + assigned_colors = ( + None + if colors is None + else [y[0] for y in filter(lambda x: x[1], zip(colors, visibilities))] + ) # noqa + frame_visualizer.overlay_instances( + boxes=None if masks is not None else boxes[visibilities], # boxes are a bit distracting + masks=None if masks is None else masks[visibilities], + labels=labels, + keypoints=None if keypoints is None else keypoints[visibilities], + assigned_colors=assigned_colors, + alpha=alpha, + ) + + return frame_visualizer.output + + def draw_sem_seg(self, frame, sem_seg, area_threshold=None): + """ + Args: + sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), + each value is the integer label. + area_threshold (Optional[int]): only draw segmentations larger than the threshold + """ + # don't need to do anything special + frame_visualizer = Visualizer(frame, self.metadata) + frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) + return frame_visualizer.output + + def draw_panoptic_seg_predictions( + self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 + ): + frame_visualizer = Visualizer(frame, self.metadata) + pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata) + + if self._instance_mode == ColorMode.IMAGE_BW: + frame_visualizer.output.reset_image( + frame_visualizer._create_grayscale_image(pred.non_empty_mask()) + ) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + frame_visualizer.draw_binary_mask( + mask, + color=mask_color, + text=self.metadata.stuff_classes[category_idx], + alpha=alpha, + area_threshold=area_threshold, + ) + + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return frame_visualizer.output + # draw mask for all instances second + masks, sinfo = list(zip(*all_instances)) + num_instances = len(masks) + masks_rles = mask_util.encode( + np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") + ) + assert len(masks_rles) == num_instances + + category_ids = [x["category_id"] for x in sinfo] + detected = [ + _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + labels = [self.metadata.thing_classes[k] for k in category_ids] + + frame_visualizer.overlay_instances( + boxes=None, + masks=masks, + labels=labels, + keypoints=None, + assigned_colors=colors, + alpha=alpha, + ) + return frame_visualizer.output + + def _assign_colors(self, instances): + """ + Naive tracking heuristics to assign same color to the same instance, + will update the internal state of tracked instances. + + Returns: + list[tuple[float]]: list of colors. + """ + + # Compute iou with either boxes or masks: + is_crowd = np.zeros((len(instances),), dtype=bool) + if instances[0].bbox is None: + assert instances[0].mask_rle is not None + # use mask iou only when box iou is None + # because box seems good enough + rles_old = [x.mask_rle for x in self._old_instances] + rles_new = [x.mask_rle for x in instances] + ious = mask_util.iou(rles_old, rles_new, is_crowd) + threshold = 0.5 + else: + boxes_old = [x.bbox for x in self._old_instances] + boxes_new = [x.bbox for x in instances] + ious = mask_util.iou(boxes_old, boxes_new, is_crowd) + threshold = 0.6 + if len(ious) == 0: + ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") + + # Only allow matching instances of the same label: + for old_idx, old in enumerate(self._old_instances): + for new_idx, new in enumerate(instances): + if old.label != new.label: + ious[old_idx, new_idx] = 0 + + matched_new_per_old = np.asarray(ious).argmax(axis=1) + max_iou_per_old = np.asarray(ious).max(axis=1) + + # Try to find match for each old instance: + extra_instances = [] + for idx, inst in enumerate(self._old_instances): + if max_iou_per_old[idx] > threshold: + newidx = matched_new_per_old[idx] + if instances[newidx].color is None: + instances[newidx].color = inst.color + continue + # If an old instance does not match any new instances, + # keep it for the next frame in case it is just missed by the detector + inst.ttl -= 1 + if inst.ttl > 0: + extra_instances.append(inst) + + # Assign random color to newly-detected instances: + for inst in instances: + if inst.color is None: + inst.color = random_color(rgb=True, maximum=1) + self._old_instances = instances[:] + extra_instances + return [d.color for d in instances] + + def _assign_colors_by_id(self, instances: Instances) -> List: + colors = [] + untracked_ids = set(self._assigned_colors.keys()) + for id in instances.ID: + if id in self._assigned_colors: + colors.append(self._color_pool[self._assigned_colors[id]]) + untracked_ids.remove(id) + else: + assert ( + len(self._color_idx_set) >= 1 + ), f"Number of id exceeded maximum, \ + max = {self._max_num_instances}" + idx = self._color_idx_set.pop() + color = self._color_pool[idx] + self._assigned_colors[id] = idx + colors.append(color) + for id in untracked_ids: + self._color_idx_set.add(self._assigned_colors[id]) + del self._assigned_colors[id] + return colors diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/visualizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..48e915433efd4083849229713611b949e88565c5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/detectron2/utils/visualizer.py @@ -0,0 +1,1267 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import colorsys +import logging +import math +import numpy as np +from enum import Enum, unique +import cv2 +import matplotlib as mpl +import matplotlib.colors as mplc +import matplotlib.figure as mplfigure +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from matplotlib.backends.backend_agg import FigureCanvasAgg +from PIL import Image + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .colormap import random_color + +logger = logging.getLogger(__name__) + +__all__ = ["ColorMode", "VisImage", "Visualizer"] + + +_SMALL_OBJECT_AREA_THRESH = 1000 +_LARGE_MASK_AREA_THRESH = 120000 +_OFF_WHITE = (1.0, 1.0, 240.0 / 255) +_BLACK = (0, 0, 0) +_RED = (1.0, 0, 0) + +_KEYPOINT_THRESHOLD = 0.05 + + +@unique +class ColorMode(Enum): + """ + Enum of different color modes to use for instance visualizations. + """ + + IMAGE = 0 + """ + Picks a random color for every instance and overlay segmentations with low opacity. + """ + SEGMENTATION = 1 + """ + Let instances of the same category have similar colors + (from metadata.thing_colors), and overlay them with + high opacity. This provides more attention on the quality of segmentation. + """ + IMAGE_BW = 2 + """ + Same as IMAGE, but convert all areas without masks to gray-scale. + Only available for drawing per-instance mask predictions. + """ + + +class GenericMask: + """ + Attribute: + polygons (list[ndarray]): list[ndarray]: polygons for this mask. + Each ndarray has format [x, y, x, y, ...] + mask (ndarray): a binary mask + """ + + def __init__(self, mask_or_polygons, height, width): + self._mask = self._polygons = self._has_holes = None + self.height = height + self.width = width + + m = mask_or_polygons + if isinstance(m, dict): + # RLEs + assert "counts" in m and "size" in m + if isinstance(m["counts"], list): # uncompressed RLEs + h, w = m["size"] + assert h == height and w == width + m = mask_util.frPyObjects(m, h, w) + self._mask = mask_util.decode(m)[:, :] + return + + if isinstance(m, list): # list[ndarray] + self._polygons = [np.asarray(x).reshape(-1) for x in m] + return + + if isinstance(m, np.ndarray): # assumed to be a binary mask + assert m.shape[1] != 2, m.shape + assert m.shape == ( + height, + width, + ), f"mask shape: {m.shape}, target dims: {height}, {width}" + self._mask = m.astype("uint8") + return + + raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m))) + + @property + def mask(self): + if self._mask is None: + self._mask = self.polygons_to_mask(self._polygons) + return self._mask + + @property + def polygons(self): + if self._polygons is None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + return self._polygons + + @property + def has_holes(self): + if self._has_holes is None: + if self._mask is not None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + else: + self._has_holes = False # if original format is polygon, does not have holes + return self._has_holes + + def mask_to_polygons(self, mask): + # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level + # hierarchy. External contours (boundary) of the object are placed in hierarchy-1. + # Internal contours (holes) are placed in hierarchy-2. + # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours. + mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr + res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) + hierarchy = res[-1] + if hierarchy is None: # empty mask + return [], False + has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 + res = res[-2] + res = [x.flatten() for x in res] + # These coordinates from OpenCV are integers in range [0, W-1 or H-1]. + # We add 0.5 to turn them into real-value coordinate space. A better solution + # would be to first +0.5 and then dilate the returned polygon by 0.5. + res = [x + 0.5 for x in res if len(x) >= 6] + return res, has_holes + + def polygons_to_mask(self, polygons): + rle = mask_util.frPyObjects(polygons, self.height, self.width) + rle = mask_util.merge(rle) + return mask_util.decode(rle)[:, :] + + def area(self): + return self.mask.sum() + + def bbox(self): + p = mask_util.frPyObjects(self.polygons, self.height, self.width) + p = mask_util.merge(p) + bbox = mask_util.toBbox(p) + bbox[2] += bbox[0] + bbox[3] += bbox[1] + return bbox + + +class _PanopticPrediction: + """ + Unify different panoptic annotation/prediction formats + """ + + def __init__(self, panoptic_seg, segments_info, metadata=None): + if segments_info is None: + assert metadata is not None + # If "segments_info" is None, we assume "panoptic_img" is a + # H*W int32 image storing the panoptic_id in the format of + # category_id * label_divisor + instance_id. We reserve -1 for + # VOID label. + label_divisor = metadata.label_divisor + segments_info = [] + for panoptic_label in np.unique(panoptic_seg.numpy()): + if panoptic_label == -1: + # VOID region. + continue + pred_class = panoptic_label // label_divisor + isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values() + segments_info.append( + { + "id": int(panoptic_label), + "category_id": int(pred_class), + "isthing": bool(isthing), + } + ) + del metadata + + self._seg = panoptic_seg + + self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info + segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True) + areas = areas.numpy() + sorted_idxs = np.argsort(-areas) + self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs] + self._seg_ids = self._seg_ids.tolist() + for sid, area in zip(self._seg_ids, self._seg_areas): + if sid in self._sinfo: + self._sinfo[sid]["area"] = float(area) + + def non_empty_mask(self): + """ + Returns: + (H, W) array, a mask for all pixels that have a prediction + """ + empty_ids = [] + for id in self._seg_ids: + if id not in self._sinfo: + empty_ids.append(id) + if len(empty_ids) == 0: + return np.zeros(self._seg.shape, dtype=np.uint8) + assert ( + len(empty_ids) == 1 + ), ">1 ids corresponds to no labels. This is currently not supported" + return (self._seg != empty_ids[0]).numpy().astype(bool) + + def semantic_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or sinfo["isthing"]: + # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions. + continue + yield (self._seg == sid).numpy().astype(bool), sinfo + + def instance_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or not sinfo["isthing"]: + continue + mask = (self._seg == sid).numpy().astype(bool) + if mask.sum() > 0: + yield mask, sinfo + + +def _create_text_labels(classes, scores, class_names, is_crowd=None): + """ + Args: + classes (list[int] or None): + scores (list[float] or None): + class_names (list[str] or None): + is_crowd (list[bool] or None): + + Returns: + list[str] or None + """ + labels = None + if classes is not None: + if class_names is not None and len(class_names) > 0: + labels = [class_names[i] for i in classes] + else: + labels = [str(i) for i in classes] + if scores is not None: + if labels is None: + labels = ["{:.0f}%".format(s * 100) for s in scores] + else: + labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] + if labels is not None and is_crowd is not None: + labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)] + return labels + + +class VisImage: + def __init__(self, img, scale=1.0): + """ + Args: + img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255]. + scale (float): scale the input image + """ + self.img = img + self.scale = scale + self.width, self.height = img.shape[1], img.shape[0] + self._setup_figure(img) + + def _setup_figure(self, img): + """ + Args: + Same as in :meth:`__init__()`. + + Returns: + fig (matplotlib.pyplot.figure): top level container for all the image plot elements. + ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. + """ + fig = mplfigure.Figure(frameon=False) + self.dpi = fig.get_dpi() + # add a small 1e-2 to avoid precision lost due to matplotlib's truncation + # (https://github.com/matplotlib/matplotlib/issues/15363) + fig.set_size_inches( + (self.width * self.scale + 1e-2) / self.dpi, + (self.height * self.scale + 1e-2) / self.dpi, + ) + self.canvas = FigureCanvasAgg(fig) + # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) + ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) + ax.axis("off") + self.fig = fig + self.ax = ax + self.reset_image(img) + + def reset_image(self, img): + """ + Args: + img: same as in __init__ + """ + img = img.astype("uint8") + self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest") + + def save(self, filepath): + """ + Args: + filepath (str): a string that contains the absolute path, including the file name, where + the visualized image will be saved. + """ + self.fig.savefig(filepath) + + def get_image(self): + """ + Returns: + ndarray: + the visualized image of shape (H, W, 3) (RGB) in uint8 type. + The shape is scaled w.r.t the input image using the given `scale` argument. + """ + canvas = self.canvas + s, (width, height) = canvas.print_to_buffer() + # buf = io.BytesIO() # works for cairo backend + # canvas.print_rgba(buf) + # width, height = self.width, self.height + # s = buf.getvalue() + + buffer = np.frombuffer(s, dtype="uint8") + + img_rgba = buffer.reshape(height, width, 4) + rgb, alpha = np.split(img_rgba, [3], axis=2) + return rgb.astype("uint8") + + +class Visualizer: + """ + Visualizer that draws data about detection/segmentation on images. + + It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}` + that draw primitive objects to images, as well as high-level wrappers like + `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}` + that draw composite data in some pre-defined style. + + Note that the exact visualization style for the high-level wrappers are subject to change. + Style such as color, opacity, label contents, visibility of labels, or even the visibility + of objects themselves (e.g. when the object is too small) may change according + to different heuristics, as long as the results still look visually reasonable. + + To obtain a consistent style, you can implement custom drawing functions with the + abovementioned primitive methods instead. If you need more customized visualization + styles, you can process the data yourself following their format documented in + tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not + intend to satisfy everyone's preference on drawing styles. + + This visualizer focuses on high rendering quality rather than performance. It is not + designed to be used for real-time applications. + """ + + # TODO implement a fast, rasterized version using OpenCV + + def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE): + """ + Args: + img_rgb: a numpy array of shape (H, W, C), where H and W correspond to + the height and width of the image respectively. C is the number of + color channels. The image is required to be in RGB format since that + is a requirement of the Matplotlib library. The image is also expected + to be in the range [0, 255]. + metadata (Metadata): dataset metadata (e.g. class names and colors) + instance_mode (ColorMode): defines one of the pre-defined style for drawing + instances on an image. + """ + self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) + if metadata is None: + metadata = MetadataCatalog.get("__nonexist__") + self.metadata = metadata + self.output = VisImage(self.img, scale=scale) + self.cpu_device = torch.device("cpu") + + # too small texts are useless, therefore clamp to 9 + self._default_font_size = max( + np.sqrt(self.output.height * self.output.width) // 90, 10 // scale + ) + self._instance_mode = instance_mode + self.keypoint_threshold = _KEYPOINT_THRESHOLD + + def draw_instance_predictions(self, predictions): + """ + Draw instance-level prediction results on an image. + + Args: + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + + Returns: + output (VisImage): image object with visualizations. + """ + boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None + labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + + if predictions.has("pred_masks"): + masks = np.asarray(predictions.pred_masks) + masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] + else: + masks = None + + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): + colors = [ + self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes + ] + alpha = 0.8 + else: + colors = None + alpha = 0.5 + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image( + self._create_grayscale_image( + (predictions.pred_masks.any(dim=0) > 0).numpy() + if predictions.has("pred_masks") + else None + ) + ) + alpha = 0.3 + + self.overlay_instances( + masks=masks, + boxes=boxes, + labels=labels, + keypoints=keypoints, + assigned_colors=colors, + alpha=alpha, + ) + return self.output + + def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8): + """ + Draw semantic segmentation predictions/labels. + + Args: + sem_seg (Tensor or ndarray): the segmentation of shape (H, W). + Each value is the integer label of the pixel. + area_threshold (int): segments with less than `area_threshold` are not drawn. + alpha (float): the larger it is, the more opaque the segmentations are. + + Returns: + output (VisImage): image object with visualizations. + """ + if isinstance(sem_seg, torch.Tensor): + sem_seg = sem_seg.numpy() + labels, areas = np.unique(sem_seg, return_counts=True) + sorted_idxs = np.argsort(-areas).tolist() + labels = labels[sorted_idxs] + for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels): + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] + except (AttributeError, IndexError): + mask_color = None + + binary_mask = (sem_seg == label).astype(np.uint8) + text = self.metadata.stuff_classes[label] + self.draw_binary_mask( + binary_mask, + color=mask_color, + edge_color=_OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + return self.output + + def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7): + """ + Draw panoptic prediction annotations or results. + + Args: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each + segment. + segments_info (list[dict] or None): Describe each segment in `panoptic_seg`. + If it is a ``list[dict]``, each dict contains keys "id", "category_id". + If None, category id of each pixel is computed by + ``pixel // metadata.label_divisor``. + area_threshold (int): stuff segments with less than `area_threshold` are not drawn. + + Returns: + output (VisImage): image object with visualizations. + """ + pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata) + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask())) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + text = self.metadata.stuff_classes[category_idx] + self.draw_binary_mask( + mask, + color=mask_color, + edge_color=_OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + + # draw mask for all instances second + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return self.output + masks, sinfo = list(zip(*all_instances)) + category_ids = [x["category_id"] for x in sinfo] + + try: + scores = [x["score"] for x in sinfo] + except KeyError: + scores = None + labels = _create_text_labels( + category_ids, scores, self.metadata.thing_classes, [x.get("iscrowd", 0) for x in sinfo] + ) + + try: + colors = [ + self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids + ] + except AttributeError: + colors = None + self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) + + return self.output + + draw_panoptic_seg_predictions = draw_panoptic_seg # backward compatibility + + def draw_dataset_dict(self, dic): + """ + Draw annotations/segmentations in Detectron2 Dataset format. + + Args: + dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. + + Returns: + output (VisImage): image object with visualizations. + """ + annos = dic.get("annotations", None) + if annos: + if "segmentation" in annos[0]: + masks = [x["segmentation"] for x in annos] + else: + masks = None + if "keypoints" in annos[0]: + keypts = [x["keypoints"] for x in annos] + keypts = np.array(keypts).reshape(len(annos), -1, 3) + else: + keypts = None + + boxes = [ + BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) + if len(x["bbox"]) == 4 + else x["bbox"] + for x in annos + ] + + colors = None + category_ids = [x["category_id"] for x in annos] + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): + colors = [ + self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) + for c in category_ids + ] + names = self.metadata.get("thing_classes", None) + labels = _create_text_labels( + category_ids, + scores=None, + class_names=names, + is_crowd=[x.get("iscrowd", 0) for x in annos], + ) + self.overlay_instances( + labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors + ) + + sem_seg = dic.get("sem_seg", None) + if sem_seg is None and "sem_seg_file_name" in dic: + with PathManager.open(dic["sem_seg_file_name"], "rb") as f: + sem_seg = Image.open(f) + sem_seg = np.asarray(sem_seg, dtype="uint8") + if sem_seg is not None: + self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) + + pan_seg = dic.get("pan_seg", None) + if pan_seg is None and "pan_seg_file_name" in dic: + with PathManager.open(dic["pan_seg_file_name"], "rb") as f: + pan_seg = Image.open(f) + pan_seg = np.asarray(pan_seg) + from panopticapi.utils import rgb2id + + pan_seg = rgb2id(pan_seg) + if pan_seg is not None: + segments_info = dic["segments_info"] + pan_seg = torch.tensor(pan_seg) + self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5) + return self.output + + def overlay_instances( + self, + *, + boxes=None, + labels=None, + masks=None, + keypoints=None, + assigned_colors=None, + alpha=0.5, + ): + """ + Args: + boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, + or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, + or a :class:`RotatedBoxes`, + or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image, + labels (list[str]): the text to be displayed for each instance. + masks (masks-like object): Supported types are: + + * :class:`detectron2.structures.PolygonMasks`, + :class:`detectron2.structures.BitMasks`. + * list[list[ndarray]]: contains the segmentation masks for all objects in one image. + The first level of the list corresponds to individual instances. The second + level to all the polygon that compose the instance, and the third level + to the polygon coordinates. The third level should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + * list[ndarray]: each ndarray is a binary mask of shape (H, W). + * list[dict]: each dict is a COCO-style RLE. + keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), + where the N is the number of instances and K is the number of keypoints. + The last dimension corresponds to (x, y, visibility or score). + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = 0 + if boxes is not None: + boxes = self._convert_boxes(boxes) + num_instances = len(boxes) + if masks is not None: + masks = self._convert_masks(masks) + if num_instances: + assert len(masks) == num_instances + else: + num_instances = len(masks) + if keypoints is not None: + if num_instances: + assert len(keypoints) == num_instances + else: + num_instances = len(keypoints) + keypoints = self._convert_keypoints(keypoints) + if labels is not None: + assert len(labels) == num_instances + if assigned_colors is None: + assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + if num_instances == 0: + return self.output + if boxes is not None and boxes.shape[1] == 5: + return self.overlay_rotated_instances( + boxes=boxes, labels=labels, assigned_colors=assigned_colors + ) + + # Display in largest to smallest order to reduce occlusion. + areas = None + if boxes is not None: + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + elif masks is not None: + areas = np.asarray([x.area() for x in masks]) + + if areas is not None: + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None + assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] + keypoints = keypoints[sorted_idxs] if keypoints is not None else None + + for i in range(num_instances): + color = assigned_colors[i] + if boxes is not None: + self.draw_box(boxes[i], edge_color=color) + + if masks is not None: + for segment in masks[i].polygons: + self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) + + if labels is not None: + # first get a box + if boxes is not None: + x0, y0, x1, y1 = boxes[i] + text_pos = (x0, y0) # if drawing boxes, put text on the box corner. + horiz_align = "left" + elif masks is not None: + # skip small mask without polygon + if len(masks[i].polygons) == 0: + continue + + x0, y0, x1, y1 = masks[i].bbox() + + # draw text in the center (defined by median) when box is not drawn + # median is less sensitive to outliers. + text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] + horiz_align = "center" + else: + continue # drawing the box confidence for keypoints isn't very useful. + # for small objects, draw text at the side to avoid occlusion + instance_area = (y1 - y0) * (x1 - x0) + if ( + instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale + or y1 - y0 < 40 * self.output.scale + ): + if y1 >= self.output.height - 5: + text_pos = (x1, y0) + else: + text_pos = (x0, y1) + + height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) + * 0.5 + * self._default_font_size + ) + self.draw_text( + labels[i], + text_pos, + color=lighter_color, + horizontal_alignment=horiz_align, + font_size=font_size, + ) + + # draw keypoints + if keypoints is not None: + for keypoints_per_instance in keypoints: + self.draw_and_connect_keypoints(keypoints_per_instance) + + return self.output + + def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): + """ + Args: + boxes (ndarray): an Nx5 numpy array of + (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image. + labels (list[str]): the text to be displayed for each instance. + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = len(boxes) + + if assigned_colors is None: + assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + if boxes is not None: + areas = boxes[:, 2] * boxes[:, 3] + + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + colors = [assigned_colors[idx] for idx in sorted_idxs] + + for i in range(num_instances): + self.draw_rotated_box_with_label( + boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None + ) + + return self.output + + def draw_and_connect_keypoints(self, keypoints): + """ + Draws keypoints of an instance and follows the rules for keypoint connections + to draw lines between appropriate keypoints. This follows color heuristics for + line color. + + Args: + keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints + and the last dimension corresponds to (x, y, probability). + + Returns: + output (VisImage): image object with visualizations. + """ + visible = {} + keypoint_names = self.metadata.get("keypoint_names") + for idx, keypoint in enumerate(keypoints): + + # draw keypoint + x, y, prob = keypoint + if prob > self.keypoint_threshold: + self.draw_circle((x, y), color=_RED) + if keypoint_names: + keypoint_name = keypoint_names[idx] + visible[keypoint_name] = (x, y) + + if self.metadata.get("keypoint_connection_rules"): + for kp0, kp1, color in self.metadata.keypoint_connection_rules: + if kp0 in visible and kp1 in visible: + x0, y0 = visible[kp0] + x1, y1 = visible[kp1] + color = tuple(x / 255.0 for x in color) + self.draw_line([x0, x1], [y0, y1], color=color) + + # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip + # Note that this strategy is specific to person keypoints. + # For other keypoints, it should just do nothing + try: + ls_x, ls_y = visible["left_shoulder"] + rs_x, rs_y = visible["right_shoulder"] + mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2 + except KeyError: + pass + else: + # draw line from nose to mid-shoulder + nose_x, nose_y = visible.get("nose", (None, None)) + if nose_x is not None: + self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED) + + try: + # draw line from mid-shoulder to mid-hip + lh_x, lh_y = visible["left_hip"] + rh_x, rh_y = visible["right_hip"] + except KeyError: + pass + else: + mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2 + self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED) + return self.output + + """ + Primitive drawing functions: + """ + + def draw_text( + self, + text, + position, + *, + font_size=None, + color="g", + horizontal_alignment="center", + rotation=0, + ): + """ + Args: + text (str): class label + position (tuple): a tuple of the x and y coordinates to place text on image. + font_size (int, optional): font of the text. If not provided, a font size + proportional to the image width is calculated and used. + color: color of the text. Refer to `matplotlib.colors` for full list + of formats that are accepted. + horizontal_alignment (str): see `matplotlib.text.Text` + rotation: rotation angle in degrees CCW + + Returns: + output (VisImage): image object with text drawn. + """ + if not font_size: + font_size = self._default_font_size + + # since the text background is dark, we don't want the text to be dark + color = np.maximum(list(mplc.to_rgb(color)), 0.2) + color[np.argmax(color)] = max(0.8, np.max(color)) + + x, y = position + self.output.ax.text( + x, + y, + text, + size=font_size * self.output.scale, + family="sans-serif", + bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, + verticalalignment="top", + horizontalalignment=horizontal_alignment, + color=color, + zorder=10, + rotation=rotation, + ) + return self.output + + def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"): + """ + Args: + box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 + are the coordinates of the image's top left corner. x1 and y1 are the + coordinates of the image's bottom right corner. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + + Returns: + output (VisImage): image object with box drawn. + """ + x0, y0, x1, y1 = box_coord + width = x1 - x0 + height = y1 - y0 + + linewidth = max(self._default_font_size / 4, 1) + + self.output.ax.add_patch( + mpl.patches.Rectangle( + (x0, y0), + width, + height, + fill=False, + edgecolor=edge_color, + linewidth=linewidth * self.output.scale, + alpha=alpha, + linestyle=line_style, + ) + ) + return self.output + + def draw_rotated_box_with_label( + self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None + ): + """ + Draw a rotated box with label on its top-left corner. + + Args: + rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle), + where cnt_x and cnt_y are the center coordinates of the box. + w and h are the width and height of the box. angle represents how + many degrees the box is rotated CCW with regard to the 0-degree box. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + label (string): label for rotated box. It will not be rendered when set to None. + + Returns: + output (VisImage): image object with box drawn. + """ + cnt_x, cnt_y, w, h, angle = rotated_box + area = w * h + # use thinner lines when the box is small + linewidth = self._default_font_size / ( + 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3 + ) + + theta = angle * math.pi / 180.0 + c = math.cos(theta) + s = math.sin(theta) + rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)] + # x: left->right ; y: top->down + rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect] + for k in range(4): + j = (k + 1) % 4 + self.draw_line( + [rotated_rect[k][0], rotated_rect[j][0]], + [rotated_rect[k][1], rotated_rect[j][1]], + color=edge_color, + linestyle="--" if k == 1 else line_style, + linewidth=linewidth, + ) + + if label is not None: + text_pos = rotated_rect[1] # topleft corner + + height_ratio = h / np.sqrt(self.output.height * self.output.width) + label_color = self._change_color_brightness(edge_color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size + ) + self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle) + + return self.output + + def draw_circle(self, circle_coord, color, radius=3): + """ + Args: + circle_coord (list(int) or tuple(int)): contains the x and y coordinates + of the center of the circle. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + radius (int): radius of the circle. + + Returns: + output (VisImage): image object with box drawn. + """ + x, y = circle_coord + self.output.ax.add_patch( + mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color) + ) + return self.output + + def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None): + """ + Args: + x_data (list[int]): a list containing x values of all the points being drawn. + Length of list should match the length of y_data. + y_data (list[int]): a list containing y values of all the points being drawn. + Length of list should match the length of x_data. + color: color of the line. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + linestyle: style of the line. Refer to `matplotlib.lines.Line2D` + for a full list of formats that are accepted. + linewidth (float or None): width of the line. When it's None, + a default value will be computed and used. + + Returns: + output (VisImage): image object with line drawn. + """ + if linewidth is None: + linewidth = self._default_font_size / 3 + linewidth = max(linewidth, 1) + self.output.ax.add_line( + mpl.lines.Line2D( + x_data, + y_data, + linewidth=linewidth * self.output.scale, + color=color, + linestyle=linestyle, + ) + ) + return self.output + + def draw_binary_mask( + self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=10 + ): + """ + Args: + binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and + W is the image width. Each value in the array is either a 0 or 1 value of uint8 + type. + color: color of the mask. Refer to `matplotlib.colors` for a full list of + formats that are accepted. If None, will pick a random color. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. + text (str): if None, will be drawn on the object + alpha (float): blending efficient. Smaller values lead to more transparent masks. + area_threshold (float): a connected component smaller than this area will not be shown. + + Returns: + output (VisImage): image object with mask drawn. + """ + if color is None: + color = random_color(rgb=True, maximum=1) + color = mplc.to_rgb(color) + + has_valid_segment = False + binary_mask = binary_mask.astype("uint8") # opencv needs uint8 + mask = GenericMask(binary_mask, self.output.height, self.output.width) + shape2d = (binary_mask.shape[0], binary_mask.shape[1]) + + if not mask.has_holes: + # draw polygons for regular masks + for segment in mask.polygons: + area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) + if area < (area_threshold or 0): + continue + has_valid_segment = True + segment = segment.reshape(-1, 2) + self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) + else: + # TODO: Use Path/PathPatch to draw vector graphics: + # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon + rgba = np.zeros(shape2d + (4,), dtype="float32") + rgba[:, :, :3] = color + rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha + has_valid_segment = True + self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) + + if text is not None and has_valid_segment: + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + self._draw_text_in_mask(binary_mask, text, lighter_color) + return self.output + + def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5): + """ + Args: + soft_mask (ndarray): float array of shape (H, W), each value in [0, 1]. + color: color of the mask. Refer to `matplotlib.colors` for a full list of + formats that are accepted. If None, will pick a random color. + text (str): if None, will be drawn on the object + alpha (float): blending efficient. Smaller values lead to more transparent masks. + + Returns: + output (VisImage): image object with mask drawn. + """ + if color is None: + color = random_color(rgb=True, maximum=1) + color = mplc.to_rgb(color) + + shape2d = (soft_mask.shape[0], soft_mask.shape[1]) + rgba = np.zeros(shape2d + (4,), dtype="float32") + rgba[:, :, :3] = color + rgba[:, :, 3] = soft_mask * alpha + self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) + + if text is not None: + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + binary_mask = (soft_mask > 0.5).astype("uint8") + self._draw_text_in_mask(binary_mask, text, lighter_color) + return self.output + + def draw_polygon(self, segment, color, edge_color=None, alpha=0.5): + """ + Args: + segment: numpy array of shape Nx2, containing all the points in the polygon. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. If not provided, a darker shade + of the polygon color will be used instead. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + + Returns: + output (VisImage): image object with polygon drawn. + """ + if edge_color is None: + # make edge color darker than the polygon color + if alpha > 0.8: + edge_color = self._change_color_brightness(color, brightness_factor=-0.7) + else: + edge_color = color + edge_color = mplc.to_rgb(edge_color) + (1,) + + polygon = mpl.patches.Polygon( + segment, + fill=True, + facecolor=mplc.to_rgb(color) + (alpha,), + edgecolor=edge_color, + linewidth=max(self._default_font_size // 15 * self.output.scale, 1), + ) + self.output.ax.add_patch(polygon) + return self.output + + """ + Internal methods: + """ + + def _jitter(self, color): + """ + Randomly modifies given color to produce a slightly different color than the color given. + + Args: + color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color + picked. The values in the list are in the [0.0, 1.0] range. + + Returns: + jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the + color after being jittered. The values in the list are in the [0.0, 1.0] range. + """ + color = mplc.to_rgb(color) + vec = np.random.rand(3) + # better to do it in another color space + vec = vec / np.linalg.norm(vec) * 0.5 + res = np.clip(vec + color, 0, 1) + return tuple(res) + + def _create_grayscale_image(self, mask=None): + """ + Create a grayscale version of the original image. + The colors in masked area, if given, will be kept. + """ + img_bw = self.img.astype("f4").mean(axis=2) + img_bw = np.stack([img_bw] * 3, axis=2) + if mask is not None: + img_bw[mask] = self.img[mask] + return img_bw + + def _change_color_brightness(self, color, brightness_factor): + """ + Depending on the brightness_factor, gives a lighter or darker color i.e. a color with + less or more saturation than the original color. + + Args: + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of + 0 will correspond to no change, a factor in [-1.0, 0) range will result in + a darker color and a factor in (0, 1.0] range will result in a lighter color. + + Returns: + modified_color (tuple[double]): a tuple containing the RGB values of the + modified color. Each value in the tuple is in the [0.0, 1.0] range. + """ + assert brightness_factor >= -1.0 and brightness_factor <= 1.0 + color = mplc.to_rgb(color) + polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) + modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) + modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness + modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness + modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) + return tuple(np.clip(modified_color, 0.0, 1.0)) + + def _convert_boxes(self, boxes): + """ + Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension. + """ + if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes): + return boxes.tensor.detach().numpy() + else: + return np.asarray(boxes) + + def _convert_masks(self, masks_or_polygons): + """ + Convert different format of masks or polygons to a tuple of masks and polygons. + + Returns: + list[GenericMask]: + """ + + m = masks_or_polygons + if isinstance(m, PolygonMasks): + m = m.polygons + if isinstance(m, BitMasks): + m = m.tensor.numpy() + if isinstance(m, torch.Tensor): + m = m.numpy() + ret = [] + for x in m: + if isinstance(x, GenericMask): + ret.append(x) + else: + ret.append(GenericMask(x, self.output.height, self.output.width)) + return ret + + def _draw_text_in_mask(self, binary_mask, text, color): + """ + Find proper places to draw text given a binary mask. + """ + # TODO sometimes drawn on wrong objects. the heuristics here can improve. + _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) + if stats[1:, -1].size == 0: + return + largest_component_id = np.argmax(stats[1:, -1]) + 1 + + # draw text on the largest component, as well as other very large components. + for cid in range(1, _num_cc): + if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: + # median is more stable than centroid + # center = centroids[largest_component_id] + center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] + self.draw_text(text, center, color=color) + + def _convert_keypoints(self, keypoints): + if isinstance(keypoints, Keypoints): + keypoints = keypoints.tensor + keypoints = np.asarray(keypoints) + return keypoints + + def get_output(self): + """ + Returns: + output (VisImage): the image output containing the visualizations added + to the image. + """ + return self.output diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39ebcd384f616ae2ba170407cee3267d461a5914 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from . import data # register all new datasets +from . import modeling + +# config +from .config import * + +# models +from .oneformer_model import OneFormer \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/config.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/config.py new file mode 100644 index 0000000000000000000000000000000000000000..78879b1edd2a9edec5cdaf8e3cc1fd471c3a57be --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/config.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. +from annotator.oneformer.detectron2.config import CfgNode as CN + +__all__ = ["add_common_config", "add_oneformer_config", "add_swin_config", + "add_dinat_config", "add_beit_adapter_config", "add_convnext_config"] + +def add_common_config(cfg): + """ + Add config for common configuration + """ + # data config + # select the dataset mapper + cfg.INPUT.DATASET_MAPPER_NAME = "oneformer_unified" + # Color augmentation + cfg.INPUT.COLOR_AUG_SSD = False + # We retry random cropping until no single category in semantic segmentation GT occupies more + # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. + cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 + # Pad image and segmentation GT in dataset mapper. + cfg.INPUT.SIZE_DIVISIBILITY = -1 + + cfg.INPUT.TASK_SEQ_LEN = 77 + cfg.INPUT.MAX_SEQ_LEN = 77 + + cfg.INPUT.TASK_PROB = CN() + cfg.INPUT.TASK_PROB.SEMANTIC = 0.33 + cfg.INPUT.TASK_PROB.INSTANCE = 0.66 + + # test dataset + cfg.DATASETS.TEST_PANOPTIC = ("",) + cfg.DATASETS.TEST_INSTANCE = ("",) + cfg.DATASETS.TEST_SEMANTIC = ("",) + + # solver config + # weight decay on embedding + cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 + # optimizer + cfg.SOLVER.OPTIMIZER = "ADAMW" + cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 + + # wandb + cfg.WANDB = CN() + cfg.WANDB.PROJECT = "unified_dense_recognition" + cfg.WANDB.NAME = None + + cfg.MODEL.IS_TRAIN = False + cfg.MODEL.IS_DEMO = True + + # text encoder config + cfg.MODEL.TEXT_ENCODER = CN() + + cfg.MODEL.TEXT_ENCODER.WIDTH = 256 + cfg.MODEL.TEXT_ENCODER.CONTEXT_LENGTH = 77 + cfg.MODEL.TEXT_ENCODER.NUM_LAYERS = 12 + cfg.MODEL.TEXT_ENCODER.VOCAB_SIZE = 49408 + cfg.MODEL.TEXT_ENCODER.PROJ_NUM_LAYERS = 2 + cfg.MODEL.TEXT_ENCODER.N_CTX = 16 + + # mask_former inference config + cfg.MODEL.TEST = CN() + cfg.MODEL.TEST.SEMANTIC_ON = True + cfg.MODEL.TEST.INSTANCE_ON = False + cfg.MODEL.TEST.PANOPTIC_ON = False + cfg.MODEL.TEST.DETECTION_ON = False + cfg.MODEL.TEST.OBJECT_MASK_THRESHOLD = 0.0 + cfg.MODEL.TEST.OVERLAP_THRESHOLD = 0.0 + cfg.MODEL.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False + cfg.MODEL.TEST.TASK = "panoptic" + + # TEST AUG Slide + cfg.TEST.AUG.IS_SLIDE = False + cfg.TEST.AUG.CROP_SIZE = (640, 640) + cfg.TEST.AUG.STRIDE = (426, 426) + cfg.TEST.AUG.SCALE = (2048, 640) + cfg.TEST.AUG.SETR_MULTI_SCALE = True + cfg.TEST.AUG.KEEP_RATIO = True + cfg.TEST.AUG.SIZE_DIVISOR = 32 + + # pixel decoder config + cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256 + # adding transformer in pixel decoder + cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0 + # pixel decoder + cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder" + cfg.MODEL.SEM_SEG_HEAD.SEM_EMBED_DIM = 256 + cfg.MODEL.SEM_SEG_HEAD.INST_EMBED_DIM = 256 + + # LSJ aug + cfg.INPUT.IMAGE_SIZE = 1024 + cfg.INPUT.MIN_SCALE = 0.1 + cfg.INPUT.MAX_SCALE = 2.0 + + # MSDeformAttn encoder configs + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 + +def add_oneformer_config(cfg): + """ + Add config for ONE_FORMER. + """ + + # mask_former model config + cfg.MODEL.ONE_FORMER = CN() + + # loss + cfg.MODEL.ONE_FORMER.DEEP_SUPERVISION = True + cfg.MODEL.ONE_FORMER.NO_OBJECT_WEIGHT = 0.1 + cfg.MODEL.ONE_FORMER.CLASS_WEIGHT = 1.0 + cfg.MODEL.ONE_FORMER.DICE_WEIGHT = 1.0 + cfg.MODEL.ONE_FORMER.MASK_WEIGHT = 20.0 + cfg.MODEL.ONE_FORMER.CONTRASTIVE_WEIGHT = 0.5 + cfg.MODEL.ONE_FORMER.CONTRASTIVE_TEMPERATURE = 0.07 + + # transformer config + cfg.MODEL.ONE_FORMER.NHEADS = 8 + cfg.MODEL.ONE_FORMER.DROPOUT = 0.1 + cfg.MODEL.ONE_FORMER.DIM_FEEDFORWARD = 2048 + cfg.MODEL.ONE_FORMER.ENC_LAYERS = 0 + cfg.MODEL.ONE_FORMER.CLASS_DEC_LAYERS = 2 + cfg.MODEL.ONE_FORMER.DEC_LAYERS = 6 + cfg.MODEL.ONE_FORMER.PRE_NORM = False + + cfg.MODEL.ONE_FORMER.HIDDEN_DIM = 256 + cfg.MODEL.ONE_FORMER.NUM_OBJECT_QUERIES = 120 + cfg.MODEL.ONE_FORMER.NUM_OBJECT_CTX = 16 + cfg.MODEL.ONE_FORMER.USE_TASK_NORM = True + + cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE = "res5" + cfg.MODEL.ONE_FORMER.ENFORCE_INPUT_PROJ = False + + # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet) + # you can use this config to override + cfg.MODEL.ONE_FORMER.SIZE_DIVISIBILITY = 32 + + # transformer module + cfg.MODEL.ONE_FORMER.TRANSFORMER_DECODER_NAME = "ContrastiveMultiScaleMaskedTransformerDecoder" + + # point loss configs + # Number of points sampled during training for a mask point head. + cfg.MODEL.ONE_FORMER.TRAIN_NUM_POINTS = 112 * 112 + # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the + # original paper. + cfg.MODEL.ONE_FORMER.OVERSAMPLE_RATIO = 3.0 + # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in + # the original paper. + cfg.MODEL.ONE_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75 + +def add_swin_config(cfg): + """ + Add config forSWIN Backbone. + """ + + # swin transformer backbone + cfg.MODEL.SWIN = CN() + cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224 + cfg.MODEL.SWIN.PATCH_SIZE = 4 + cfg.MODEL.SWIN.EMBED_DIM = 96 + cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2] + cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24] + cfg.MODEL.SWIN.WINDOW_SIZE = 7 + cfg.MODEL.SWIN.MLP_RATIO = 4.0 + cfg.MODEL.SWIN.QKV_BIAS = True + cfg.MODEL.SWIN.QK_SCALE = None + cfg.MODEL.SWIN.DROP_RATE = 0.0 + cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0 + cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3 + cfg.MODEL.SWIN.APE = False + cfg.MODEL.SWIN.PATCH_NORM = True + cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"] + cfg.MODEL.SWIN.USE_CHECKPOINT = False + ## Semask additions + cfg.MODEL.SWIN.SEM_WINDOW_SIZE = 7 + cfg.MODEL.SWIN.NUM_SEM_BLOCKS = 1 + +def add_dinat_config(cfg): + """ + Add config for NAT Backbone. + """ + + # DINAT transformer backbone + cfg.MODEL.DiNAT = CN() + cfg.MODEL.DiNAT.DEPTHS = [3, 4, 18, 5] + cfg.MODEL.DiNAT.OUT_FEATURES = ["res2", "res3", "res4", "res5"] + cfg.MODEL.DiNAT.EMBED_DIM = 64 + cfg.MODEL.DiNAT.MLP_RATIO = 3.0 + cfg.MODEL.DiNAT.NUM_HEADS = [2, 4, 8, 16] + cfg.MODEL.DiNAT.DROP_PATH_RATE = 0.2 + cfg.MODEL.DiNAT.KERNEL_SIZE = 7 + cfg.MODEL.DiNAT.DILATIONS = [[1, 16, 1], [1, 4, 1, 8], [1, 2, 1, 3, 1, 4], [1, 2, 1, 2, 1]] + cfg.MODEL.DiNAT.OUT_INDICES = (0, 1, 2, 3) + cfg.MODEL.DiNAT.QKV_BIAS = True + cfg.MODEL.DiNAT.QK_SCALE = None + cfg.MODEL.DiNAT.DROP_RATE = 0 + cfg.MODEL.DiNAT.ATTN_DROP_RATE = 0. + cfg.MODEL.DiNAT.IN_PATCH_SIZE = 4 + +def add_convnext_config(cfg): + """ + Add config for ConvNeXt Backbone. + """ + + # swin transformer backbone + cfg.MODEL.CONVNEXT = CN() + cfg.MODEL.CONVNEXT.IN_CHANNELS = 3 + cfg.MODEL.CONVNEXT.DEPTHS = [3, 3, 27, 3] + cfg.MODEL.CONVNEXT.DIMS = [192, 384, 768, 1536] + cfg.MODEL.CONVNEXT.DROP_PATH_RATE = 0.4 + cfg.MODEL.CONVNEXT.LSIT = 1.0 + cfg.MODEL.CONVNEXT.OUT_INDICES = [0, 1, 2, 3] + cfg.MODEL.CONVNEXT.OUT_FEATURES = ["res2", "res3", "res4", "res5"] + +def add_beit_adapter_config(cfg): + """ + Add config for BEiT Adapter Backbone. + """ + + # beit adapter backbone + cfg.MODEL.BEiTAdapter = CN() + cfg.MODEL.BEiTAdapter.IMG_SIZE = 640 + cfg.MODEL.BEiTAdapter.PATCH_SIZE = 16 + cfg.MODEL.BEiTAdapter.EMBED_DIM = 1024 + cfg.MODEL.BEiTAdapter.DEPTH = 24 + cfg.MODEL.BEiTAdapter.NUM_HEADS = 16 + cfg.MODEL.BEiTAdapter.MLP_RATIO = 4 + cfg.MODEL.BEiTAdapter.QKV_BIAS = True + cfg.MODEL.BEiTAdapter.USE_ABS_POS_EMB = False + cfg.MODEL.BEiTAdapter.USE_REL_POS_BIAS = True + cfg.MODEL.BEiTAdapter.INIT_VALUES = 1e-6 + cfg.MODEL.BEiTAdapter.DROP_PATH_RATE = 0.3 + cfg.MODEL.BEiTAdapter.CONV_INPLANE = 64 + cfg.MODEL.BEiTAdapter.N_POINTS = 4 + cfg.MODEL.BEiTAdapter.DEFORM_NUM_HEADS = 16 + cfg.MODEL.BEiTAdapter.CFFN_RATIO = 0.25 + cfg.MODEL.BEiTAdapter.DEFORM_RATIO = 0.5 + cfg.MODEL.BEiTAdapter.WITH_CP = True + cfg.MODEL.BEiTAdapter.INTERACTION_INDEXES=[[0, 5], [6, 11], [12, 17], [18, 23]] + cfg.MODEL.BEiTAdapter.OUT_FEATURES = ["res2", "res3", "res4", "res5"] \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..63ba265b1effc69f1eef16e57a04db8902ee347e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from . import datasets diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/bpe_simple_vocab_16e6.txt.gz b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/bpe_simple_vocab_16e6.txt.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a +size 1356917 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/build.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/build.py new file mode 100644 index 0000000000000000000000000000000000000000..2611644589d6a5978c257a4e349a1b466f366c0c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/build.py @@ -0,0 +1,117 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from typing import Any, Callable, Dict, List, Optional, Union +import torch.utils.data as torchdata + +from annotator.oneformer.detectron2.config import configurable + + +from annotator.oneformer.detectron2.data.common import DatasetFromList, MapDataset +from annotator.oneformer.detectron2.data.dataset_mapper import DatasetMapper +from annotator.oneformer.detectron2.data.samplers import ( + InferenceSampler, +) +from annotator.oneformer.detectron2.data.build import ( + get_detection_dataset_dicts, + trivial_batch_collator +) +""" +This file contains the default logic to build a dataloader for training or testing. +""" + +__all__ = [ + "build_detection_test_loader", +] + + +def _test_loader_from_config(cfg, dataset_name, mapper=None): + """ + Uses the given `dataset_name` argument (instead of the names in cfg), because the + standard practice is to evaluate each test set individually (not combining them). + """ + if isinstance(dataset_name, str): + dataset_name = [dataset_name] + + dataset = get_detection_dataset_dicts( + dataset_name, + filter_empty=False, + proposal_files=[ + cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] for x in dataset_name + ] + if cfg.MODEL.LOAD_PROPOSALS + else None, + ) + if mapper is None: + mapper = DatasetMapper(cfg, False) + return { + "dataset": dataset, + "mapper": mapper, + "num_workers": cfg.DATALOADER.NUM_WORKERS, + "sampler": InferenceSampler(len(dataset)) + if not isinstance(dataset, torchdata.IterableDataset) + else None, + } + + +@configurable(from_config=_test_loader_from_config) +def build_detection_test_loader( + dataset: Union[List[Any], torchdata.Dataset], + *, + mapper: Callable[[Dict[str, Any]], Any], + sampler: Optional[torchdata.Sampler] = None, + batch_size: int = 1, + num_workers: int = 0, + collate_fn: Optional[Callable[[List[Any]], Any]] = None, +) -> torchdata.DataLoader: + """ + Similar to `build_detection_train_loader`, with default batch size = 1, + and sampler = :class:`InferenceSampler`. This sampler coordinates all workers + to produce the exact set of all samples. + + Args: + dataset: a list of dataset dicts, + or a pytorch dataset (either map-style or iterable). They can be obtained + by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. + mapper: a callable which takes a sample (dict) from dataset + and returns the format to be consumed by the model. + When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``. + sampler: a sampler that produces + indices to be applied on ``dataset``. Default to :class:`InferenceSampler`, + which splits the dataset across all workers. Sampler must be None + if `dataset` is iterable. + batch_size: the batch size of the data loader to be created. + Default to 1 image per worker since this is the standard when reporting + inference time in papers. + num_workers: number of parallel data loading workers + collate_fn: same as the argument of `torch.utils.data.DataLoader`. + Defaults to do no collation and return a list of data. + + Returns: + DataLoader: a torch DataLoader, that loads the given detection + dataset, with test-time transformation and batching. + + Examples: + :: + data_loader = build_detection_test_loader( + DatasetRegistry.get("my_test"), + mapper=DatasetMapper(...)) + + # or, instantiate with a CfgNode: + data_loader = build_detection_test_loader(cfg, "my_test") + """ + if isinstance(dataset, list): + dataset = DatasetFromList(dataset, copy=False) + if mapper is not None: + dataset = MapDataset(dataset, mapper) + if isinstance(dataset, torchdata.IterableDataset): + assert sampler is None, "sampler must be None if dataset is IterableDataset" + else: + if sampler is None: + sampler = InferenceSampler(len(dataset)) + return torchdata.DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + drop_last=False, + num_workers=num_workers, + collate_fn=trivial_batch_collator if collate_fn is None else collate_fn, + ) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/__init__.py @@ -0,0 +1 @@ + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/coco_unified_new_baseline_dataset_mapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/coco_unified_new_baseline_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..25a460bf73e0417916d2e09e2edc1f975155024c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/coco_unified_new_baseline_dataset_mapper.py @@ -0,0 +1,341 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import copy +import logging + +import numpy as np +import torch + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data import detection_utils as utils +from annotator.oneformer.detectron2.data import transforms as T +from annotator.oneformer.detectron2.structures import BitMasks, Instances +from annotator.oneformer.oneformer.utils.box_ops import masks_to_boxes +from annotator.oneformer.oneformer.data.tokenizer import SimpleTokenizer, Tokenize + +__all__ = ["COCOUnifiedNewBaselineDatasetMapper"] + + +def build_transform_gen(cfg, is_train): + """ + Create a list of default :class:`Augmentation` from config. + Now it includes resizing and flipping. + Returns: + list[Augmentation] + """ + assert is_train, "Only support training augmentation" + image_size = cfg.INPUT.IMAGE_SIZE + min_scale = cfg.INPUT.MIN_SCALE + max_scale = cfg.INPUT.MAX_SCALE + + augmentation = [] + + if cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + + augmentation.extend([ + T.ResizeScale( + min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size + ), + T.FixedSizeCrop(crop_size=(image_size, image_size)), + ]) + + return augmentation + + +# This is specifically designed for the COCO dataset. +class COCOUnifiedNewBaselineDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by OneFormer. + + This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + num_queries, + tfm_gens, + meta, + image_format, + max_seq_len, + task_seq_len, + semantic_prob, + instance_prob, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + crop_gen: crop augmentation + tfm_gens: data augmentation + image_format: an image format supported by :func:`detection_utils.read_image`. + """ + self.tfm_gens = tfm_gens + logging.getLogger(__name__).info( + "[COCOUnifiedNewBaselineDatasetMapper] Full TransformGens used in training: {}".format( + str(self.tfm_gens) + ) + ) + + self.img_format = image_format + self.is_train = is_train + self.meta = meta + self.ignore_label = self.meta.ignore_label + self.num_queries = num_queries + + self.things = [] + for k,v in self.meta.thing_dataset_id_to_contiguous_id.items(): + self.things.append(v) + self.class_names = self.meta.stuff_classes + self.text_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=max_seq_len) + self.task_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=task_seq_len) + self.semantic_prob = semantic_prob + self.instance_prob = instance_prob + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + tfm_gens = build_transform_gen(cfg, is_train) + dataset_names = cfg.DATASETS.TRAIN + meta = MetadataCatalog.get(dataset_names[0]) + + ret = { + "is_train": is_train, + "meta": meta, + "tfm_gens": tfm_gens, + "image_format": cfg.INPUT.FORMAT, + "num_queries": cfg.MODEL.ONE_FORMER.NUM_OBJECT_QUERIES - cfg.MODEL.TEXT_ENCODER.N_CTX, + "task_seq_len": cfg.INPUT.TASK_SEQ_LEN, + "max_seq_len": cfg.INPUT.MAX_SEQ_LEN, + "semantic_prob": cfg.INPUT.TASK_PROB.SEMANTIC, + "instance_prob": cfg.INPUT.TASK_PROB.INSTANCE, + } + return ret + + def _get_semantic_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + instances = Instances(image_shape) + + classes = [] + texts = ["a semantic photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + if class_id not in classes: + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + else: + idx = classes.index(class_id) + masks[idx] += mask + masks[idx] = np.clip(masks[idx], 0, 1).astype(np.bool) + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + # Placeholder bounding boxes for stuff regions. Note that these are not used during training. + instances.gt_bboxes = torch.stack([torch.tensor([0., 0., 1., 1.])] * instances.gt_masks.shape[0]) + return instances, texts, label + + def _get_instance_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + instances = Instances(image_shape) + + classes = [] + texts = ["an instance photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if class_id in self.things: + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + instances.gt_bboxes = masks_to_boxes(instances.gt_masks) + return instances, texts, label + + def _get_panoptic_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + instances = Instances(image_shape) + + classes = [] + texts = ["a panoptic photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + instances.gt_bboxes = masks_to_boxes(instances.gt_masks) + for i in range(instances.gt_classes.shape[0]): + # Placeholder bounding boxes for stuff regions. Note that these are not used during training. + if instances.gt_classes[i].item() not in self.things: + instances.gt_bboxes[i] = torch.tensor([0., 0., 1., 1.]) + return instances, texts, label + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + image, transforms = T.apply_transform_gens(self.tfm_gens, image) + image_shape = image.shape[:2] # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + return dataset_dict + + # semantic segmentation + if "sem_seg_file_name" in dataset_dict: + # PyTorch transformation not implemented for uint16, so converting it to double first + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double") + sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) + else: + sem_seg_gt = None + + if "pan_seg_file_name" in dataset_dict: + pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") + segments_info = dataset_dict["segments_info"] + + # apply the same transformation to panoptic segmentation + pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) + + from panopticapi.utils import rgb2id + pan_seg_gt = rgb2id(pan_seg_gt) + + prob_task = np.random.uniform(0,1.) + + num_class_obj = {} + + for name in self.class_names: + num_class_obj[name] = 0 + + if prob_task < self.semantic_prob: + task = "The task is semantic" + instances, text, sem_seg = self._get_semantic_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + elif prob_task < self.instance_prob: + task = "The task is instance" + instances, text, sem_seg = self._get_instance_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + else: + task = "The task is panoptic" + instances, text, sem_seg = self._get_panoptic_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + + + dataset_dict["sem_seg"] = torch.from_numpy(sem_seg).long() + dataset_dict["instances"] = instances + dataset_dict["orig_shape"] = image_shape + dataset_dict["task"] = task + dataset_dict["text"] = text + dataset_dict["thing_ids"] = self.things + + return dataset_dict diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/dataset_mapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..710c81bee298e9e6b21a93742d09e720024ceeff --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/dataset_mapper.py @@ -0,0 +1,203 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/dataset_mapper.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import copy +import logging +import numpy as np +from typing import List, Optional, Union +import torch + +from annotator.oneformer.detectron2.config import configurable + +from annotator.oneformer.detectron2.data import detection_utils as utils +from annotator.oneformer.detectron2.data import transforms as T +from annotator.oneformer.oneformer.data.tokenizer import SimpleTokenizer, Tokenize + +__all__ = ["DatasetMapper"] + + +class DatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by the model. + + This is the default callable to be used to map your dataset dict into training data. + You may need to follow it to implement your own one for customized logic, + such as a different way to read or transform images. + See :doc:`/tutorials/data_loading` for details. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies cropping/geometric transforms to the image and annotations + 3. Prepare data and annotations to Tensor and :class:`Instances` + """ + + @configurable + def __init__( + self, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + task_seq_len: int, + task: str = "panoptic", + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = T.AugmentationList(augmentations) + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + self.task_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=task_seq_len) + self.task = task + assert self.task in ["panoptic", "semantic", "instance"] + + # fmt: on + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON + else: + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "task_seq_len": cfg.INPUT.TASK_SEQ_LEN, + "recompute_boxes": recompute_boxes, + "task": cfg.MODEL.TEST.TASK, + } + + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def _transform_annotations(self, dataset_dict, transforms, image_shape): + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + # After transforms such as cropping are applied, the bounding box may no longer + # tightly bound the object. As an example, imagine a triangle object + # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight + # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to + # the intersection of original bounding box and the cropping box. + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + # USER: Write your own image loading if it's not from a file + image = utils.read_image(dataset_dict["file_name"], format=self.image_format) + utils.check_image_size(dataset_dict, image) + + task = f"The task is {self.task}" + dataset_dict["task"] = task + + # USER: Remove if you don't do semantic/panoptic segmentation. + if "sem_seg_file_name" in dataset_dict: + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) + else: + sem_seg_gt = None + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + # USER: Remove if you don't use pre-computed proposals. + # Most users would not need this feature. + if self.proposal_topk is not None: + utils.transform_proposals( + dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk + ) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + dataset_dict.pop("sem_seg_file_name", None) + return dataset_dict + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + return dataset_dict \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/oneformer_unified_dataset_mapper.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/oneformer_unified_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..e5dadbc2e4eb1e5f06e2294bccb23057dcfdf09d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/dataset_mappers/oneformer_unified_dataset_mapper.py @@ -0,0 +1,375 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import copy +import logging +import os + +import numpy as np +import torch +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data import detection_utils as utils +from annotator.oneformer.detectron2.data import transforms as T +from annotator.oneformer.detectron2.structures import BitMasks, Instances +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.projects.point_rend import ColorAugSSDTransform +from annotator.oneformer.oneformer.utils.box_ops import masks_to_boxes +from annotator.oneformer.oneformer.data.tokenizer import SimpleTokenizer, Tokenize + +__all__ = ["OneFormerUnifiedDatasetMapper"] + + +class OneFormerUnifiedDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by OneFormer for universal segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + name, + num_queries, + meta, + augmentations, + image_format, + ignore_label, + size_divisibility, + task_seq_len, + max_seq_len, + semantic_prob, + instance_prob, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + ignore_label: the label that is ignored to evaluation + size_divisibility: pad image size to be divisible by this value + """ + self.is_train = is_train + self.meta = meta + self.name = name + self.tfm_gens = augmentations + self.img_format = image_format + self.ignore_label = ignore_label + self.size_divisibility = size_divisibility + self.num_queries = num_queries + + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}") + + self.things = [] + for k,v in self.meta.thing_dataset_id_to_contiguous_id.items(): + self.things.append(v) + self.class_names = self.meta.stuff_classes + self.text_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=max_seq_len) + self.task_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=task_seq_len) + self.semantic_prob = semantic_prob + self.instance_prob = instance_prob + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + augs = [ + T.ResizeShortestEdge( + cfg.INPUT.MIN_SIZE_TRAIN, + cfg.INPUT.MAX_SIZE_TRAIN, + cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, + ) + ] + if cfg.INPUT.CROP.ENABLED: + augs.append( + T.RandomCrop_CategoryAreaConstraint( + cfg.INPUT.CROP.TYPE, + cfg.INPUT.CROP.SIZE, + cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA, + cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + ) + ) + if cfg.INPUT.COLOR_AUG_SSD: + augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) + augs.append(T.RandomFlip()) + + # Assume always applies to the training set. + dataset_names = cfg.DATASETS.TRAIN + meta = MetadataCatalog.get(dataset_names[0]) + ignore_label = meta.ignore_label + + ret = { + "is_train": is_train, + "meta": meta, + "name": dataset_names[0], + "num_queries": cfg.MODEL.ONE_FORMER.NUM_OBJECT_QUERIES - cfg.MODEL.TEXT_ENCODER.N_CTX, + "task_seq_len": cfg.INPUT.TASK_SEQ_LEN, + "max_seq_len": cfg.INPUT.MAX_SEQ_LEN, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "ignore_label": ignore_label, + "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY, + "semantic_prob": cfg.INPUT.TASK_PROB.SEMANTIC, + "instance_prob": cfg.INPUT.TASK_PROB.INSTANCE, + } + return ret + + def _get_semantic_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + pan_seg_gt = pan_seg_gt.numpy() + instances = Instances(image_shape) + + classes = [] + texts = ["a semantic photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + if class_id not in classes: + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + else: + idx = classes.index(class_id) + masks[idx] += mask + masks[idx] = np.clip(masks[idx], 0, 1).astype(np.bool) + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + # Placeholder bounding boxes for stuff regions. Note that these are not used during training. + instances.gt_bboxes = torch.stack([torch.tensor([0., 0., 1., 1.])] * instances.gt_masks.shape[0]) + return instances, texts, label + + def _get_instance_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + pan_seg_gt = pan_seg_gt.numpy() + instances = Instances(image_shape) + + classes = [] + texts = ["an instance photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if class_id in self.things: + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + instances.gt_bboxes = masks_to_boxes(instances.gt_masks) + return instances, texts, label + + def _get_panoptic_dict(self, pan_seg_gt, image_shape, segments_info, num_class_obj): + pan_seg_gt = pan_seg_gt.numpy() + instances = Instances(image_shape) + + classes = [] + texts = ["a panoptic photo"] * self.num_queries + masks = [] + label = np.ones_like(pan_seg_gt) * self.ignore_label + + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + mask = pan_seg_gt == segment_info["id"] + if not np.all(mask == False): + cls_name = self.class_names[class_id] + classes.append(class_id) + masks.append(mask) + num_class_obj[cls_name] += 1 + label[mask] = class_id + + num = 0 + for i, cls_name in enumerate(self.class_names): + if num_class_obj[cls_name] > 0: + for _ in range(num_class_obj[cls_name]): + if num >= len(texts): + break + texts[num] = f"a photo with a {cls_name}" + num += 1 + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_bboxes = torch.zeros((0, 4)) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + instances.gt_bboxes = masks_to_boxes(instances.gt_masks) + for i in range(instances.gt_classes.shape[0]): + # Placeholder bounding boxes for stuff regions. Note that these are not used during training. + if instances.gt_classes[i].item() not in self.things: + instances.gt_bboxes[i] = torch.tensor([0., 0., 1., 1.]) + return instances, texts, label + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + assert self.is_train, "OneFormerUnifiedDatasetMapper should only be used for training!" + + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + # semantic segmentation + if "sem_seg_file_name" in dataset_dict: + # PyTorch transformation not implemented for uint16, so converting it to double first + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double") + else: + sem_seg_gt = None + + # panoptic segmentation + if "pan_seg_file_name" in dataset_dict: + pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") + segments_info = dataset_dict["segments_info"] + else: + pan_seg_gt = None + segments_info = None + + if pan_seg_gt is None: + raise ValueError( + "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format( + dataset_dict["file_name"] + ) + ) + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) + image = aug_input.image + if sem_seg_gt is not None: + sem_seg_gt = aug_input.sem_seg + + # apply the same transformation to panoptic segmentation + pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) + + from panopticapi.utils import rgb2id + + pan_seg_gt = rgb2id(pan_seg_gt) + + # Pad image and segmentation label here! + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) + pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long")) + + if self.size_divisibility > 0: + image_size = (image.shape[-2], image.shape[-1]) + padding_size = [ + 0, + self.size_divisibility - image_size[1], + 0, + self.size_divisibility - image_size[0], + ] + image = F.pad(image, padding_size, value=128).contiguous() + if sem_seg_gt is not None: + sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous() + pan_seg_gt = F.pad( + pan_seg_gt, padding_size, value=0 + ).contiguous() # 0 is the VOID panoptic label + + image_shape = (image.shape[-2], image.shape[-1]) # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = image + + if "annotations" in dataset_dict: + raise ValueError("Pemantic segmentation dataset should not have 'annotations'.") + + prob_task = np.random.uniform(0,1.) + + num_class_obj = {} + + for name in self.class_names: + num_class_obj[name] = 0 + + if prob_task < self.semantic_prob: + task = "The task is semantic" + instances, text, sem_seg = self._get_semantic_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + elif prob_task < self.instance_prob: + task = "The task is instance" + instances, text, sem_seg = self._get_instance_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + else: + task = "The task is panoptic" + instances, text, sem_seg = self._get_panoptic_dict(pan_seg_gt, image_shape, segments_info, num_class_obj) + + dataset_dict["sem_seg"] = torch.from_numpy(sem_seg).long() + dataset_dict["instances"] = instances + dataset_dict["orig_shape"] = image_shape + dataset_dict["task"] = task + dataset_dict["text"] = text + dataset_dict["thing_ids"] = self.things + + return dataset_dict diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59ce30713f63d056107b2a06ecd434eb27a30b7d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/__init__.py @@ -0,0 +1,7 @@ +from . import ( + register_ade20k_panoptic, + register_cityscapes_panoptic, + register_coco_panoptic_annos_semseg, + register_ade20k_instance, + register_coco_panoptic2instance, +) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_instance.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..e32d2b0bf5e2a937ac0ecf46b76239d6bc889ab8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_instance.py @@ -0,0 +1,56 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_instance.py +# ------------------------------------------------------------------------------ + +import json +import logging +import numpy as np +import os +from PIL import Image + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.coco import load_coco_json, register_coco_instances +from annotator.oneformer.detectron2.utils.file_io import PathManager + +ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}] + + +_PREDEFINED_SPLITS = { + # point annotations without masks + "ade20k_instance_train": ( + "ADEChallengeData2016/images/training", + "ADEChallengeData2016/ade20k_instance_train.json", + ), + "ade20k_instance_val": ( + "ADEChallengeData2016/images/validation", + "ADEChallengeData2016/ade20k_instance_val.json", + ), +} + + +def _get_ade_instances_meta(): + thing_ids = [k["id"] for k in ADE_CATEGORIES] + assert len(thing_ids) == 100, len(thing_ids) + # Mapping from the incontiguous ADE category id to an id in [0, 99] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in ADE_CATEGORIES] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + } + return ret + + +def register_all_ade20k_instance(root): + for key, (image_root, json_file) in _PREDEFINED_SPLITS.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_ade_instances_meta(), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ade20k_instance(_root) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_panoptic.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..05094a617b0103b0f0250eb32e555df994e5331b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_ade20k_panoptic.py @@ -0,0 +1,394 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_panoptic.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import json +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.utils.file_io import PathManager + +ADE20K_150_CATEGORIES = [ + {"color": [120, 120, 120], "id": 0, "isthing": 0, "name": "wall"}, + {"color": [180, 120, 120], "id": 1, "isthing": 0, "name": "building"}, + {"color": [6, 230, 230], "id": 2, "isthing": 0, "name": "sky"}, + {"color": [80, 50, 50], "id": 3, "isthing": 0, "name": "floor"}, + {"color": [4, 200, 3], "id": 4, "isthing": 0, "name": "tree"}, + {"color": [120, 120, 80], "id": 5, "isthing": 0, "name": "ceiling"}, + {"color": [140, 140, 140], "id": 6, "isthing": 0, "name": "road, route"}, + {"color": [204, 5, 255], "id": 7, "isthing": 1, "name": "bed"}, + {"color": [230, 230, 230], "id": 8, "isthing": 1, "name": "window "}, + {"color": [4, 250, 7], "id": 9, "isthing": 0, "name": "grass"}, + {"color": [224, 5, 255], "id": 10, "isthing": 1, "name": "cabinet"}, + {"color": [235, 255, 7], "id": 11, "isthing": 0, "name": "sidewalk, pavement"}, + {"color": [150, 5, 61], "id": 12, "isthing": 1, "name": "person"}, + {"color": [120, 120, 70], "id": 13, "isthing": 0, "name": "earth, ground"}, + {"color": [8, 255, 51], "id": 14, "isthing": 1, "name": "door"}, + {"color": [255, 6, 82], "id": 15, "isthing": 1, "name": "table"}, + {"color": [143, 255, 140], "id": 16, "isthing": 0, "name": "mountain, mount"}, + {"color": [204, 255, 4], "id": 17, "isthing": 0, "name": "plant"}, + {"color": [255, 51, 7], "id": 18, "isthing": 1, "name": "curtain"}, + {"color": [204, 70, 3], "id": 19, "isthing": 1, "name": "chair"}, + {"color": [0, 102, 200], "id": 20, "isthing": 1, "name": "car"}, + {"color": [61, 230, 250], "id": 21, "isthing": 0, "name": "water"}, + {"color": [255, 6, 51], "id": 22, "isthing": 1, "name": "painting, picture"}, + {"color": [11, 102, 255], "id": 23, "isthing": 1, "name": "sofa"}, + {"color": [255, 7, 71], "id": 24, "isthing": 1, "name": "shelf"}, + {"color": [255, 9, 224], "id": 25, "isthing": 0, "name": "house"}, + {"color": [9, 7, 230], "id": 26, "isthing": 0, "name": "sea"}, + {"color": [220, 220, 220], "id": 27, "isthing": 1, "name": "mirror"}, + {"color": [255, 9, 92], "id": 28, "isthing": 0, "name": "rug"}, + {"color": [112, 9, 255], "id": 29, "isthing": 0, "name": "field"}, + {"color": [8, 255, 214], "id": 30, "isthing": 1, "name": "armchair"}, + {"color": [7, 255, 224], "id": 31, "isthing": 1, "name": "seat"}, + {"color": [255, 184, 6], "id": 32, "isthing": 1, "name": "fence"}, + {"color": [10, 255, 71], "id": 33, "isthing": 1, "name": "desk"}, + {"color": [255, 41, 10], "id": 34, "isthing": 0, "name": "rock, stone"}, + {"color": [7, 255, 255], "id": 35, "isthing": 1, "name": "wardrobe, closet, press"}, + {"color": [224, 255, 8], "id": 36, "isthing": 1, "name": "lamp"}, + {"color": [102, 8, 255], "id": 37, "isthing": 1, "name": "tub"}, + {"color": [255, 61, 6], "id": 38, "isthing": 1, "name": "rail"}, + {"color": [255, 194, 7], "id": 39, "isthing": 1, "name": "cushion"}, + {"color": [255, 122, 8], "id": 40, "isthing": 0, "name": "base, pedestal, stand"}, + {"color": [0, 255, 20], "id": 41, "isthing": 1, "name": "box"}, + {"color": [255, 8, 41], "id": 42, "isthing": 1, "name": "column, pillar"}, + {"color": [255, 5, 153], "id": 43, "isthing": 1, "name": "signboard, sign"}, + { + "color": [6, 51, 255], + "id": 44, + "isthing": 1, + "name": "chest of drawers, chest, bureau, dresser", + }, + {"color": [235, 12, 255], "id": 45, "isthing": 1, "name": "counter"}, + {"color": [160, 150, 20], "id": 46, "isthing": 0, "name": "sand"}, + {"color": [0, 163, 255], "id": 47, "isthing": 1, "name": "sink"}, + {"color": [140, 140, 140], "id": 48, "isthing": 0, "name": "skyscraper"}, + {"color": [250, 10, 15], "id": 49, "isthing": 1, "name": "fireplace"}, + {"color": [20, 255, 0], "id": 50, "isthing": 1, "name": "refrigerator, icebox"}, + {"color": [31, 255, 0], "id": 51, "isthing": 0, "name": "grandstand, covered stand"}, + {"color": [255, 31, 0], "id": 52, "isthing": 0, "name": "path"}, + {"color": [255, 224, 0], "id": 53, "isthing": 1, "name": "stairs"}, + {"color": [153, 255, 0], "id": 54, "isthing": 0, "name": "runway"}, + {"color": [0, 0, 255], "id": 55, "isthing": 1, "name": "case, display case, showcase, vitrine"}, + { + "color": [255, 71, 0], + "id": 56, + "isthing": 1, + "name": "pool table, billiard table, snooker table", + }, + {"color": [0, 235, 255], "id": 57, "isthing": 1, "name": "pillow"}, + {"color": [0, 173, 255], "id": 58, "isthing": 1, "name": "screen door, screen"}, + {"color": [31, 0, 255], "id": 59, "isthing": 0, "name": "stairway, staircase"}, + {"color": [11, 200, 200], "id": 60, "isthing": 0, "name": "river"}, + {"color": [255, 82, 0], "id": 61, "isthing": 0, "name": "bridge, span"}, + {"color": [0, 255, 245], "id": 62, "isthing": 1, "name": "bookcase"}, + {"color": [0, 61, 255], "id": 63, "isthing": 0, "name": "blind, screen"}, + {"color": [0, 255, 112], "id": 64, "isthing": 1, "name": "coffee table"}, + { + "color": [0, 255, 133], + "id": 65, + "isthing": 1, + "name": "toilet, can, commode, crapper, pot, potty, stool, throne", + }, + {"color": [255, 0, 0], "id": 66, "isthing": 1, "name": "flower"}, + {"color": [255, 163, 0], "id": 67, "isthing": 1, "name": "book"}, + {"color": [255, 102, 0], "id": 68, "isthing": 0, "name": "hill"}, + {"color": [194, 255, 0], "id": 69, "isthing": 1, "name": "bench"}, + {"color": [0, 143, 255], "id": 70, "isthing": 1, "name": "countertop"}, + {"color": [51, 255, 0], "id": 71, "isthing": 1, "name": "stove"}, + {"color": [0, 82, 255], "id": 72, "isthing": 1, "name": "palm, palm tree"}, + {"color": [0, 255, 41], "id": 73, "isthing": 1, "name": "kitchen island"}, + {"color": [0, 255, 173], "id": 74, "isthing": 1, "name": "computer"}, + {"color": [10, 0, 255], "id": 75, "isthing": 1, "name": "swivel chair"}, + {"color": [173, 255, 0], "id": 76, "isthing": 1, "name": "boat"}, + {"color": [0, 255, 153], "id": 77, "isthing": 0, "name": "bar"}, + {"color": [255, 92, 0], "id": 78, "isthing": 1, "name": "arcade machine"}, + {"color": [255, 0, 255], "id": 79, "isthing": 0, "name": "hovel, hut, hutch, shack, shanty"}, + {"color": [255, 0, 245], "id": 80, "isthing": 1, "name": "bus"}, + {"color": [255, 0, 102], "id": 81, "isthing": 1, "name": "towel"}, + {"color": [255, 173, 0], "id": 82, "isthing": 1, "name": "light"}, + {"color": [255, 0, 20], "id": 83, "isthing": 1, "name": "truck"}, + {"color": [255, 184, 184], "id": 84, "isthing": 0, "name": "tower"}, + {"color": [0, 31, 255], "id": 85, "isthing": 1, "name": "chandelier"}, + {"color": [0, 255, 61], "id": 86, "isthing": 1, "name": "awning, sunshade, sunblind"}, + {"color": [0, 71, 255], "id": 87, "isthing": 1, "name": "street lamp"}, + {"color": [255, 0, 204], "id": 88, "isthing": 1, "name": "booth"}, + {"color": [0, 255, 194], "id": 89, "isthing": 1, "name": "tv"}, + {"color": [0, 255, 82], "id": 90, "isthing": 1, "name": "plane"}, + {"color": [0, 10, 255], "id": 91, "isthing": 0, "name": "dirt track"}, + {"color": [0, 112, 255], "id": 92, "isthing": 1, "name": "clothes"}, + {"color": [51, 0, 255], "id": 93, "isthing": 1, "name": "pole"}, + {"color": [0, 194, 255], "id": 94, "isthing": 0, "name": "land, ground, soil"}, + { + "color": [0, 122, 255], + "id": 95, + "isthing": 1, + "name": "bannister, banister, balustrade, balusters, handrail", + }, + { + "color": [0, 255, 163], + "id": 96, + "isthing": 0, + "name": "escalator, moving staircase, moving stairway", + }, + { + "color": [255, 153, 0], + "id": 97, + "isthing": 1, + "name": "ottoman, pouf, pouffe, puff, hassock", + }, + {"color": [0, 255, 10], "id": 98, "isthing": 1, "name": "bottle"}, + {"color": [255, 112, 0], "id": 99, "isthing": 0, "name": "buffet, counter, sideboard"}, + { + "color": [143, 255, 0], + "id": 100, + "isthing": 0, + "name": "poster, posting, placard, notice, bill, card", + }, + {"color": [82, 0, 255], "id": 101, "isthing": 0, "name": "stage"}, + {"color": [163, 255, 0], "id": 102, "isthing": 1, "name": "van"}, + {"color": [255, 235, 0], "id": 103, "isthing": 1, "name": "ship"}, + {"color": [8, 184, 170], "id": 104, "isthing": 1, "name": "fountain"}, + { + "color": [133, 0, 255], + "id": 105, + "isthing": 0, + "name": "conveyer belt, conveyor belt, conveyer, conveyor, transporter", + }, + {"color": [0, 255, 92], "id": 106, "isthing": 0, "name": "canopy"}, + { + "color": [184, 0, 255], + "id": 107, + "isthing": 1, + "name": "washer, automatic washer, washing machine", + }, + {"color": [255, 0, 31], "id": 108, "isthing": 1, "name": "plaything, toy"}, + {"color": [0, 184, 255], "id": 109, "isthing": 0, "name": "pool"}, + {"color": [0, 214, 255], "id": 110, "isthing": 1, "name": "stool"}, + {"color": [255, 0, 112], "id": 111, "isthing": 1, "name": "barrel, cask"}, + {"color": [92, 255, 0], "id": 112, "isthing": 1, "name": "basket, handbasket"}, + {"color": [0, 224, 255], "id": 113, "isthing": 0, "name": "falls"}, + {"color": [112, 224, 255], "id": 114, "isthing": 0, "name": "tent"}, + {"color": [70, 184, 160], "id": 115, "isthing": 1, "name": "bag"}, + {"color": [163, 0, 255], "id": 116, "isthing": 1, "name": "minibike, motorbike"}, + {"color": [153, 0, 255], "id": 117, "isthing": 0, "name": "cradle"}, + {"color": [71, 255, 0], "id": 118, "isthing": 1, "name": "oven"}, + {"color": [255, 0, 163], "id": 119, "isthing": 1, "name": "ball"}, + {"color": [255, 204, 0], "id": 120, "isthing": 1, "name": "food, solid food"}, + {"color": [255, 0, 143], "id": 121, "isthing": 1, "name": "step, stair"}, + {"color": [0, 255, 235], "id": 122, "isthing": 0, "name": "tank, storage tank"}, + {"color": [133, 255, 0], "id": 123, "isthing": 1, "name": "trade name"}, + {"color": [255, 0, 235], "id": 124, "isthing": 1, "name": "microwave"}, + {"color": [245, 0, 255], "id": 125, "isthing": 1, "name": "pot"}, + {"color": [255, 0, 122], "id": 126, "isthing": 1, "name": "animal"}, + {"color": [255, 245, 0], "id": 127, "isthing": 1, "name": "bicycle"}, + {"color": [10, 190, 212], "id": 128, "isthing": 0, "name": "lake"}, + {"color": [214, 255, 0], "id": 129, "isthing": 1, "name": "dishwasher"}, + {"color": [0, 204, 255], "id": 130, "isthing": 1, "name": "screen"}, + {"color": [20, 0, 255], "id": 131, "isthing": 0, "name": "blanket, cover"}, + {"color": [255, 255, 0], "id": 132, "isthing": 1, "name": "sculpture"}, + {"color": [0, 153, 255], "id": 133, "isthing": 1, "name": "hood, exhaust hood"}, + {"color": [0, 41, 255], "id": 134, "isthing": 1, "name": "sconce"}, + {"color": [0, 255, 204], "id": 135, "isthing": 1, "name": "vase"}, + {"color": [41, 0, 255], "id": 136, "isthing": 1, "name": "traffic light"}, + {"color": [41, 255, 0], "id": 137, "isthing": 1, "name": "tray"}, + {"color": [173, 0, 255], "id": 138, "isthing": 1, "name": "trash can"}, + {"color": [0, 245, 255], "id": 139, "isthing": 1, "name": "fan"}, + {"color": [71, 0, 255], "id": 140, "isthing": 0, "name": "pier"}, + {"color": [122, 0, 255], "id": 141, "isthing": 0, "name": "crt screen"}, + {"color": [0, 255, 184], "id": 142, "isthing": 1, "name": "plate"}, + {"color": [0, 92, 255], "id": 143, "isthing": 1, "name": "monitor"}, + {"color": [184, 255, 0], "id": 144, "isthing": 1, "name": "bulletin board"}, + {"color": [0, 133, 255], "id": 145, "isthing": 0, "name": "shower"}, + {"color": [255, 214, 0], "id": 146, "isthing": 1, "name": "radiator"}, + {"color": [25, 194, 194], "id": 147, "isthing": 1, "name": "glass, drinking glass"}, + {"color": [102, 255, 0], "id": 148, "isthing": 1, "name": "clock"}, + {"color": [92, 0, 255], "id": 149, "isthing": 1, "name": "flag"}, +] + +ADE20k_COLORS = [k["color"] for k in ADE20K_150_CATEGORIES] + +MetadataCatalog.get("ade20k_sem_seg_train").set( + stuff_colors=ADE20k_COLORS[:], +) + +MetadataCatalog.get("ade20k_sem_seg_val").set( + stuff_colors=ADE20k_COLORS[:], +) + + +def load_ade20k_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + ret = [] + for ann in json_info["annotations"]: + image_id = ann["image_id"] + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + sem_label_file = os.path.join(semseg_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "sem_seg_file_name": sem_label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"] + return ret + + +def register_ade20k_panoptic( + name, metadata, image_root, panoptic_root, semantic_root, panoptic_json, instances_json=None, +): + """ + Register a "standard" version of ADE20k panoptic segmentation dataset named `name`. + The dictionaries in this registered dataset follows detectron2's standard format. + Hence it's called "standard". + Args: + name (str): the name that identifies a dataset, + e.g. "ade20k_panoptic_train" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images in COCO format + panoptic_json (str): path to the json panoptic annotation file in COCO format + sem_seg_root (none): not used, to be consistent with + `register_coco_panoptic_separated`. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + DatasetCatalog.register( + panoptic_name, + lambda: load_ade20k_panoptic_json( + panoptic_json, image_root, panoptic_root, semantic_root, metadata + ), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="ade20k_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **metadata, + ) + + +_PREDEFINED_SPLITS_ADE20K_PANOPTIC = { + "ade20k_panoptic_train": ( + "ADEChallengeData2016/images/training", + "ADEChallengeData2016/ade20k_panoptic_train", + "ADEChallengeData2016/ade20k_panoptic_train.json", + "ADEChallengeData2016/annotations_detectron2/training", + "ADEChallengeData2016/ade20k_instance_train.json", + ), + "ade20k_panoptic_val": ( + "ADEChallengeData2016/images/validation", + "ADEChallengeData2016/ade20k_panoptic_val", + "ADEChallengeData2016/ade20k_panoptic_val.json", + "ADEChallengeData2016/annotations_detectron2/validation", + "ADEChallengeData2016/ade20k_instance_val.json", + ), +} + + +def get_metadata(): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1] + stuff_classes = [k["name"] for k in ADE20K_150_CATEGORIES] + stuff_colors = [k["color"] for k in ADE20K_150_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for i, cat in enumerate(ADE20K_150_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + # else: + # stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + # in order to use sem_seg evaluator + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + return meta + + +def register_all_ade20k_panoptic(root): + metadata = get_metadata() + for ( + prefix, + (image_root, panoptic_root, panoptic_json, semantic_root, instance_json), + ) in _PREDEFINED_SPLITS_ADE20K_PANOPTIC.items(): + # The "standard" version of COCO panoptic segmentation dataset, + # e.g. used by Panoptic-DeepLab + register_ade20k_panoptic( + prefix, + metadata, + os.path.join(root, image_root), + os.path.join(root, panoptic_root), + os.path.join(root, semantic_root), + os.path.join(root, panoptic_json), + os.path.join(root, instance_json), + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ade20k_panoptic(_root) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_cityscapes_panoptic.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_cityscapes_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..5f2c2a69e8c396b4b6fa8eb4125d76b9d1f3a101 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_cityscapes_panoptic.py @@ -0,0 +1,199 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/datasets/cityscapes_panoptic.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import json +import logging +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES +from annotator.oneformer.detectron2.utils.file_io import PathManager + +""" +This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog. +""" + + +logger = logging.getLogger(__name__) + + +def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info): + files = [] + # scan through the directory + cities = PathManager.ls(image_dir) + logger.info(f"{len(cities)} cities found in '{image_dir}'.") + image_dict = {} + for city in cities: + city_img_dir = os.path.join(image_dir, city) + for basename in PathManager.ls(city_img_dir): + image_file = os.path.join(city_img_dir, basename) + + suffix = "_leftImg8bit.png" + assert basename.endswith(suffix), basename + basename = os.path.basename(basename)[: -len(suffix)] + + image_dict[basename] = image_file + + for ann in json_info["annotations"]: + image_file = image_dict.get(ann["image_id"], None) + assert image_file is not None, "No image {} found for annotation {}".format( + ann["image_id"], ann["file_name"] + ) + label_file = os.path.join(gt_dir, ann["file_name"]) + segments_info = ann["segments_info"] + files.append((image_file, label_file, segments_info)) + + assert len(files), "No images found in {}".format(image_dir) + assert PathManager.isfile(files[0][0]), files[0][0] + assert PathManager.isfile(files[0][1]), files[0][1] + return files + + +def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train". + gt_json (str): path to the json file. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train.json". + meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id" + and "stuff_dataset_id_to_contiguous_id" to map category ids to + contiguous ids for training. + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + return segment_info + + assert os.path.exists( + gt_json + ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa + + + with open(gt_json) as f: + json_info = json.load(f) + + files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info) + ret = [] + for image_file, label_file, segments_info in files: + sem_label_file = ( + image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png" + ) + segments_info = [_convert_category_id(x, meta) for x in segments_info] + ret.append( + { + "file_name": image_file, + "image_id": "_".join( + os.path.splitext(os.path.basename(image_file))[0].split("_")[:3] + ), + "sem_seg_file_name": sem_label_file, + "pan_seg_file_name": label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile( + ret[0]["sem_seg_file_name"] + ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa + assert PathManager.isfile( + ret[0]["pan_seg_file_name"] + ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa + return ret + + +_RAW_CITYSCAPES_PANOPTIC_SPLITS = { + "cityscapes_fine_panoptic_train": ( + "cityscapes/leftImg8bit/train", + "cityscapes/gtFine/cityscapes_panoptic_train", + "cityscapes/gtFine/cityscapes_panoptic_train.json", + ), + "cityscapes_fine_panoptic_val": ( + "cityscapes/leftImg8bit/val", + "cityscapes/gtFine/cityscapes_panoptic_val", + "cityscapes/gtFine/cityscapes_panoptic_val.json", + ), + # "cityscapes_fine_panoptic_test": not supported yet +} + + +def register_all_cityscapes_panoptic(root): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # There are three types of ids in cityscapes panoptic segmentation: + # (1) category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the classifier + # (2) instance id: this id is used to differentiate different instances from + # the same category. For "stuff" classes, the instance id is always 0; for + # "thing" classes, the instance id starts from 1 and 0 is reserved for + # ignored instances (e.g. crowd annotation). + # (3) panoptic id: this is the compact id that encode both category and + # instance id by: category_id * 1000 + instance_id. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for k in CITYSCAPES_CATEGORIES: + if k["isthing"] == 1: + thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + else: + stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items(): + image_dir = os.path.join(root, image_dir) + gt_dir = os.path.join(root, gt_dir) + gt_json = os.path.join(root, gt_json) + + if key in DatasetCatalog.list(): + DatasetCatalog.remove(key) + + DatasetCatalog.register( + key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta) + ) + MetadataCatalog.get(key).set( + panoptic_root=gt_dir, + image_root=image_dir, + panoptic_json=gt_json, + gt_dir=gt_dir.replace("cityscapes_panoptic_", ""), + evaluator_type="cityscapes_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **meta, + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_cityscapes_panoptic(_root) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic2instance.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic2instance.py new file mode 100644 index 0000000000000000000000000000000000000000..511c5b66fa1a1814baf6f83bf048622723551e7d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic2instance.py @@ -0,0 +1,44 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/datasets/builtin.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + + +""" +This file registers pre-defined datasets at hard-coded paths, and their metadata. + +We hard-code metadata for common datasets. This will enable: +1. Consistency check when loading the datasets +2. Use models on these standard datasets directly and run demos, + without having to download the dataset annotations + +We hard-code some paths to the dataset that's assumed to +exist in "./datasets/". + +Users SHOULD NOT use this file to create new dataset / metadata for new dataset. +To add new dataset, refer to the tutorial "docs/DATASETS.md". +""" + +import os +from annotator.oneformer.detectron2.data.datasets.builtin_meta import _get_builtin_metadata +from annotator.oneformer.detectron2.data.datasets.coco import register_coco_instances + + +_PREDEFINED_SPLITS_COCO = { + "coco_2017_val_panoptic2instance": ("coco/val2017", "coco/annotations/panoptic2instances_val2017.json"), +} + + +def register_panoptic2instances_coco(root): + for key, (image_root, json_file) in _PREDEFINED_SPLITS_COCO.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_builtin_metadata("coco"), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +_root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets")) +register_panoptic2instances_coco(_root) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic_annos_semseg.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic_annos_semseg.py new file mode 100644 index 0000000000000000000000000000000000000000..170daf3ee71d9302220370d70f7c0160a4c2a235 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/datasets/register_coco_panoptic_annos_semseg.py @@ -0,0 +1,367 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_coco_panoptic_annos_semseg.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import json +import os + +from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog +from annotator.oneformer.detectron2.data.datasets import load_sem_seg +from annotator.oneformer.detectron2.data.datasets.builtin_meta import COCO_CATEGORIES +from annotator.oneformer.detectron2.utils.file_io import PathManager +import contextlib +import logging +import io +from fvcore.common.timer import Timer +import annotator.oneformer.pycocotools.mask as mask_util +from annotator.oneformer.detectron2.structures import BoxMode + + +logger = logging.getLogger(__name__) + + +_PREDEFINED_SPLITS_COCO_PANOPTIC = { + "coco_2017_train_panoptic": ( + # This is the original panoptic annotation directory + "coco/panoptic_train2017", + "coco/annotations/panoptic_train2017.json", + # This directory contains semantic annotations that are + # converted from panoptic annotations. + # It is used by PanopticFPN. + # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py + # to create these directories. + "coco/panoptic_semseg_train2017", + ), + "coco_2017_val_panoptic": ( + "coco/panoptic_val2017", + "coco/annotations/panoptic_val2017.json", + "coco/panoptic_semseg_val2017", + ), +} + +def load_coco_instance_json(json_file, image_root, dataset_name=None): + from annotator.oneformer.pycocotools.coco import COCO + + timer = Timer() + json_file = PathManager.get_local_path(json_file) + with contextlib.redirect_stdout(io.StringIO()): + coco_api = COCO(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + + id_map = None + if dataset_name is not None: + meta = MetadataCatalog.get(dataset_name) + cat_ids = sorted(coco_api.getCatIds()) + cats = coco_api.loadCats(cat_ids) + # The categories in a custom json file may not be sorted. + thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] + meta.thing_classes = thing_classes + + # In COCO, certain category ids are artificially removed, + # and by convention they are always ignored. + # We deal with COCO's id issue and translate + # the category ids to contiguous ids in [0, 80). + + # It works by looking at the "categories" field in the json, therefore + # if users' own json also have incontiguous ids, we'll + # apply this mapping as well but print a warning. + if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): + if "coco" not in dataset_name: + logger.warning( + """ +Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. +""" + ) + id_map = {v: i for i, v in enumerate(cat_ids)} + meta.thing_dataset_id_to_contiguous_id = id_map + + # sort indices for reproducible results + img_ids = sorted(coco_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = coco_api.loadImgs(img_ids) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. Example of anns[0]: + # [{'segmentation': [[192.81, + # 247.09, + # ... + # 219.03, + # 249.06]], + # 'area': 1035.749, + # 'iscrowd': 0, + # 'image_id': 1268, + # 'bbox': [192.81, 224.8, 74.73, 33.43], + # 'category_id': 16, + # 'id': 42986}, + # ...] + anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] + total_num_valid_anns = sum([len(x) for x in anns]) + total_num_anns = len(coco_api.anns) + if total_num_valid_anns < total_num_anns: + logger.warning( + f"{json_file} contains {total_num_anns} annotations, but only " + f"{total_num_valid_anns} of them match to images in the file." + ) + + if "minival" not in json_file: + # The popular valminusminival & minival annotations for COCO2014 contain this bug. + # However the ratio of buggy annotations there is tiny and does not affect accuracy. + # Therefore we explicitly white-list them. + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( + json_file + ) + + imgs_anns = list(zip(imgs, anns)) + logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) + + dataset_dicts = {} + + ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + + num_instances_without_valid_segmentation = 0 + + for (img_dict, anno_dict_list) in imgs_anns: + record = {} + record["file_name"] = os.path.join(image_root, img_dict["file_name"]) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + image_id = record["image_id"] = img_dict["id"] + + objs = [] + for anno in anno_dict_list: + # Check that the image_id in this annotation is the same as + # the image_id we're looking at. + # This fails only when the data parsing logic or the annotation file is buggy. + + # The original COCO valminusminival2014 & minival2014 annotation files + # actually contains bugs that, together with certain ways of using COCO API, + # can trigger this assertion. + assert anno["image_id"] == image_id + + assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' + + obj = {key: anno[key] for key in ann_keys if key in anno} + if "bbox" in obj and len(obj["bbox"]) == 0: + raise ValueError( + f"One annotation of image {image_id} contains empty 'bbox' value! " + "This json does not have valid COCO format." + ) + + segm = anno.get("segmentation", None) + if segm: # either list[list[float]] or dict(RLE) + if isinstance(segm, dict): + if isinstance(segm["counts"], list): + # convert to compressed RLE + segm = mask_util.frPyObjects(segm, *segm["size"]) + else: + # filter out invalid polygons (< 3 points) + segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + if len(segm) == 0: + num_instances_without_valid_segmentation += 1 + continue # ignore this instance + obj["segmentation"] = segm + + keypts = anno.get("keypoints", None) + if keypts: # list[int] + for idx, v in enumerate(keypts): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # Therefore we assume the coordinates are "pixel indices" and + # add 0.5 to convert to floating point coordinates. + keypts[idx] = v + 0.5 + obj["keypoints"] = keypts + + obj["bbox_mode"] = BoxMode.XYWH_ABS + if id_map: + annotation_category_id = obj["category_id"] + try: + obj["category_id"] = id_map[annotation_category_id] + except KeyError as e: + raise KeyError( + f"Encountered category_id={annotation_category_id} " + "but this id does not exist in 'categories' of the json file." + ) from e + objs.append(obj) + record["annotations"] = objs + dataset_dicts[image_id] = record + + if num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. ".format( + num_instances_without_valid_segmentation + ) + + "There might be issues in your dataset generation process. Please " + "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" + ) + return dataset_dicts + +def get_metadata(): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] + stuff_classes = [k["name"] for k in COCO_CATEGORIES] + stuff_colors = [k["color"] for k in COCO_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for i, cat in enumerate(COCO_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + # else: + # stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + # in order to use sem_seg evaluator + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + return meta + + +def load_coco_panoptic_json(json_file, instances_json, instances_name, image_dir, gt_dir, semseg_dir, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + instance_data_dicts = load_coco_instance_json(instances_json, image_dir.replace("panoptic_", ""), instances_name) + + ret = [] + for ann in json_info["annotations"]: + image_id = int(ann["image_id"]) + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + sem_label_file = os.path.join(semseg_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "sem_seg_file_name": sem_label_file, + "segments_info": segments_info, + "annotations": instance_data_dicts[image_id]["annotations"], + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"] + return ret + + +def register_coco_panoptic_annos_sem_seg( + name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json, instances_name, +): + panoptic_name = name + delattr(MetadataCatalog.get(panoptic_name), "thing_classes") + delattr(MetadataCatalog.get(panoptic_name), "thing_colors") + MetadataCatalog.get(panoptic_name).set( + thing_classes=metadata["thing_classes"], + thing_colors=metadata["thing_colors"], + # thing_dataset_id_to_contiguous_id=metadata["thing_dataset_id_to_contiguous_id"], + ) + + # the name is "coco_2017_train_panoptic_with_sem_seg" and "coco_2017_val_panoptic_with_sem_seg" + semantic_name = name + "_with_sem_seg" + DatasetCatalog.register( + semantic_name, + lambda: load_coco_panoptic_json(panoptic_json, instances_json, instances_name, image_root, panoptic_root, sem_seg_root, metadata), + ) + MetadataCatalog.get(semantic_name).set( + sem_seg_root=sem_seg_root, + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="coco_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **metadata, + ) + + +def register_all_coco_panoptic_annos_sem_seg(root): + for ( + prefix, + (panoptic_root, panoptic_json, semantic_root), + ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): + + prefix_instances = prefix[: -len("_panoptic")] + instances_meta = MetadataCatalog.get(prefix_instances) + image_root, instances_json = instances_meta.image_root, instances_meta.json_file + + if 'val' in instances_json: + instances_json = instances_json.replace('instances_', 'panoptic2instances_') + + register_coco_panoptic_annos_sem_seg( + prefix, + get_metadata(), + image_root, + os.path.join(root, panoptic_root), + os.path.join(root, panoptic_json), + os.path.join(root, semantic_root), + instances_json, + prefix_instances, + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_coco_panoptic_annos_sem_seg(_root) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/tokenizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..05d4c29c2d1ed03e5748e7346eeea494a2cd9144 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/data/tokenizer.py @@ -0,0 +1,192 @@ +# ------------------------------------------------------------------------- +# MIT License +# +# Copyright (c) 2021 OpenAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Modified by Jiarui Xu +# ------------------------------------------------------------------------- + +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re +import torch + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bpe_simple_vocab_16e6.txt.gz') + +@lru_cache() +def bytes_to_unicode(): + """Returns list of utf-8 byte and a corresponding list of unicode strings. + + The reversible bpe codes work on unicode strings. This means you need a large # of unicode characters in your vocab + if you want to avoid UNKs. When you're at something like a 10B token dataset you end up needing around 5K for decent + coverage. This is a significant percentage of your normal, say, 32K bpe vocab. To avoid that, we want lookup tables + between utf-8 bytes and unicode strings. And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord('!'), ord('~') + 1)) + list(range(ord('¡'), ord('¬') + 1)) + list(range(ord('®'), ord('ÿ') + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + +class Tokenize: + + def __init__(self, tokenizer, max_seq_len=77, truncate=True): + self.tokenizer = tokenizer + self.max_seq_len = max_seq_len + self.truncate = truncate + + def __call__(self, texts): + expanded_dim = False + if isinstance(texts, str): + texts = [texts] + expanded_dim = True + + sot_token = self.tokenizer.encoder['<|startoftext|>'] + eot_token = self.tokenizer.encoder['<|endoftext|>'] + all_tokens = [[sot_token] + self.tokenizer.encode(text) + [eot_token] for text in texts] + result = torch.zeros(len(all_tokens), self.max_seq_len, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > self.max_seq_len: + if self.truncate: + tokens = tokens[:self.max_seq_len] + tokens[-1] = eot_token + else: + raise RuntimeError(f'Input {texts[i]} is too long for context length {self.max_seq_len}') + result[i, :len(tokens)] = torch.tensor(tokens) + + if expanded_dim: + return result[0] + + return result + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode('utf-8').split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: # noqa: E722 + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors='replace').replace('', ' ') + return text \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/colormap.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/colormap.py new file mode 100644 index 0000000000000000000000000000000000000000..3eff9a46d37a1926c48ef0ad6e3308128438140f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/colormap.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +An awesome colormap for really neat visualizations. +Copied from Detectron, and removed gray colors. +""" + +import numpy as np +import random +random.seed(0) + +__all__ = ["colormap", "random_color", "random_colors"] + +# fmt: off +# RGB: +# _COLORS = np.array( +# [ +# 0.000, 0.447, 0.741, +# 0.850, 0.325, 0.098, +# 0.929, 0.694, 0.125, +# 0.494, 0.184, 0.556, +# 0.466, 0.674, 0.188, +# 0.301, 0.745, 0.933, +# 0.635, 0.078, 0.184, +# 0.300, 0.300, 0.300, +# 0.600, 0.600, 0.600, +# 1.000, 0.000, 0.000, +# 1.000, 0.500, 0.000, +# 0.749, 0.749, 0.000, +# 0.000, 1.000, 0.000, +# 0.000, 0.000, 1.000, +# 0.667, 0.000, 1.000, +# 0.333, 0.333, 0.000, +# 0.333, 0.667, 0.000, +# 0.333, 1.000, 0.000, +# 0.667, 0.333, 0.000, +# 0.667, 0.667, 0.000, +# 0.667, 1.000, 0.000, +# 1.000, 0.333, 0.000, +# 1.000, 0.667, 0.000, +# 1.000, 1.000, 0.000, +# 0.000, 0.333, 0.500, +# 0.000, 0.667, 0.500, +# 0.000, 1.000, 0.500, +# 0.333, 0.000, 0.500, +# 0.333, 0.333, 0.500, +# 0.333, 0.667, 0.500, +# 0.333, 1.000, 0.500, +# 0.667, 0.000, 0.500, +# 0.667, 0.333, 0.500, +# 0.667, 0.667, 0.500, +# 0.667, 1.000, 0.500, +# 1.000, 0.000, 0.500, +# 1.000, 0.333, 0.500, +# 1.000, 0.667, 0.500, +# 1.000, 1.000, 0.500, +# 0.000, 0.333, 1.000, +# 0.000, 0.667, 1.000, +# 0.000, 1.000, 1.000, +# 0.333, 0.000, 1.000, +# 0.333, 0.333, 1.000, +# 0.333, 0.667, 1.000, +# 0.333, 1.000, 1.000, +# 0.667, 0.000, 1.000, +# 0.667, 0.333, 1.000, +# 0.667, 0.667, 1.000, +# 0.667, 1.000, 1.000, +# 1.000, 0.000, 1.000, +# 1.000, 0.333, 1.000, +# 1.000, 0.667, 1.000, +# 0.333, 0.000, 0.000, +# 0.500, 0.000, 0.000, +# 0.667, 0.000, 0.000, +# 0.833, 0.000, 0.000, +# 1.000, 0.000, 0.000, +# 0.000, 0.167, 0.000, +# 0.000, 0.333, 0.000, +# 0.000, 0.500, 0.000, +# 0.000, 0.667, 0.000, +# 0.000, 0.833, 0.000, +# 0.000, 1.000, 0.000, +# 0.000, 0.000, 0.167, +# 0.000, 0.000, 0.333, +# 0.000, 0.000, 0.500, +# 0.000, 0.000, 0.667, +# 0.000, 0.000, 0.833, +# 0.000, 0.000, 1.000, +# 0.000, 0.000, 0.000, +# 0.143, 0.143, 0.143, +# 0.857, 0.857, 0.857, +# 1.000, 1.000, 1.000 +# ] +# ).astype(np.float32).reshape(-1, 3) +# fmt: on + +_COLORS = [] + + +def gen_color(): + color = tuple(np.round(np.random.choice(range(256), size=3)/255, 3)) + if color not in _COLORS and np.mean(color) != 0.0: + _COLORS.append(color) + else: + gen_color() + + +for _ in range(300): + gen_color() + + +def colormap(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + Returns: + ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] + """ + assert maximum in [255, 1], maximum + c = _COLORS * maximum + if not rgb: + c = c[:, ::-1] + return c + + +def random_color(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + Returns: + ndarray: a vector of 3 numbers + """ + idx = np.random.randint(0, len(_COLORS)) + ret = _COLORS[idx] * maximum + if not rgb: + ret = ret[::-1] + return ret + + +def random_colors(N, rgb=False, maximum=255): + """ + Args: + N (int): number of unique colors needed + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + Returns: + ndarray: a list of random_color + """ + indices = random.sample(range(len(_COLORS)), N) + ret = [_COLORS[i] * maximum for i in indices] + if not rgb: + ret = [x[::-1] for x in ret] + return ret + + +if __name__ == "__main__": + import cv2 + + size = 100 + H, W = 10, 10 + canvas = np.random.rand(H * size, W * size, 3).astype("float32") + for h in range(H): + for w in range(W): + idx = h * W + w + if idx >= len(_COLORS): + break + canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] + cv2.imshow("a", canvas) + cv2.waitKey(0) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/defaults.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..ba99129950ce16adba975f8138d73d6883865f42 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/defaults.py @@ -0,0 +1,77 @@ +import torch +import annotator.oneformer.detectron2.data.transforms as T +from annotator.oneformer.detectron2.checkpoint import DetectionCheckpointer +from annotator.oneformer.detectron2.data import ( + MetadataCatalog, +) +from annotator.oneformer.detectron2.modeling import build_model + + +__all__ = [ + "DefaultPredictor", +] + + +class DefaultPredictor: + """ + Create a simple end-to-end predictor with the given config that runs on + single device for a single input image. + Compared to using the model directly, this class does the following additions: + 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. + 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. + 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. + 4. Take one input image and produce a single output, instead of a batch. + This is meant for simple demo purposes, so it does the above steps automatically. + This is not meant for benchmarks or running complicated inference logic. + If you'd like to do anything more complicated, please refer to its source code as + examples to build and use the model manually. + Attributes: + metadata (Metadata): the metadata of the underlying dataset, obtained from + cfg.DATASETS.TEST. + Examples: + :: + pred = DefaultPredictor(cfg) + inputs = cv2.imread("input.jpg") + outputs = pred(inputs) + """ + + def __init__(self, cfg): + self.cfg = cfg.clone() # cfg can be modified by model + self.model = build_model(self.cfg) + self.model.eval() + if len(cfg.DATASETS.TEST): + self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) + + checkpointer = DetectionCheckpointer(self.model) + checkpointer.load(cfg.MODEL.WEIGHTS) + + self.aug = T.ResizeShortestEdge( + [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST + ) + + self.input_format = cfg.INPUT.FORMAT + assert self.input_format in ["RGB", "BGR"], self.input_format + + def __call__(self, original_image, task): + """ + Args: + original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). + Returns: + predictions (dict): + the output of the model for one image only. + See :doc:`/tutorials/models` for details about the format. + """ + with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258 + # Apply pre-processing to image. + if self.input_format == "RGB": + # whether the model expects BGR inputs or RGB + original_image = original_image[:, :, ::-1] + height, width = original_image.shape[:2] + image = self.aug.get_transform(original_image).apply_image(original_image) + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + task = f"The task is {task}" + + inputs = {"image": image, "height": height, "width": width, "task": task} + predictions = self.model([inputs])[0] + return predictions \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/predictor.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..4b2de6c7db1baca4ea5d234b1f1ae99d341eef65 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/predictor.py @@ -0,0 +1,190 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Copied from: https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py +import atexit +import bisect +import multiprocessing as mp +from collections import deque + +import cv2 +import torch + +from annotator.oneformer.detectron2.data import MetadataCatalog +from defaults import DefaultPredictor +from annotator.oneformer.detectron2.utils.video_visualizer import VideoVisualizer +from visualizer import ColorMode, Visualizer + + +class VisualizationDemo(object): + def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): + """ + Args: + cfg (CfgNode): + instance_mode (ColorMode): + parallel (bool): whether to run the model in different processes from visualization. + Useful since the visualization logic can be slow. + """ + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST[0]: + from cityscapesscripts.helpers.labels import labels + stuff_colors = [k.color for k in labels if k.trainId != 255] + self.metadata = self.metadata.set(stuff_colors=stuff_colors) + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + if parallel: + num_gpu = torch.cuda.device_count() + self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) + else: + self.predictor = DefaultPredictor(cfg) + + def run_on_image(self, image, task, sem_gt, pan_gt, ins_gt, box_gt): + """ + Args: + image (np.ndarray): an image of shape (H, W, C) (in BGR order). + This is the format used by OpenCV. + Returns: + predictions (dict): the output of the model. + vis_output (VisImage): the visualized image output. + """ + vis_output = None + # Convert image from OpenCV BGR format to Matplotlib RGB format. + image = image[:, :, ::-1] + vis_output = {} + + if task == 'panoptic': + visualizer = Visualizer(image, metadata=self.metadata, instance_mode=0) + predictions = self.predictor(image, "panoptic") + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_output['panoptic'] = visualizer.draw_panoptic_seg_predictions( + panoptic_seg.to(self.cpu_device), segments_info, alpha=1 + ) + + # visualizer = Visualizer(image, metadata=self.metadata, instance_mode=0) + # vis_output['pan_gt'] = visualizer.draw_panoptic_seg( + # pan_gt[0].to(self.cpu_device), pan_gt[1], alpha=1 + # ) + + if task == 'panoptic' or task == 'semantic': + visualizer = Visualizer(image, metadata=self.metadata, instance_mode=1) + predictions = self.predictor(image, "semantic") + vis_output['semantic'] = visualizer.draw_sem_seg( + predictions["sem_seg"].argmax(dim=0).to(self.cpu_device), alpha=1 + ) + + # visualizer = Visualizer(image, metadata=self.metadata, instance_mode=1) + # vis_output['gt_sem'] = visualizer.draw_sem_seg( + # sem_gt.to(self.cpu_device), alpha=1 + # ) + + if task == 'panoptic' or task == 'instance': + visualizer = Visualizer(image, metadata=self.metadata, instance_mode=2) + predictions = self.predictor(image, "instance") + instances = predictions["instances"].to(self.cpu_device) + vis_output['instance'] = visualizer.draw_instance_predictions(predictions=instances, alpha=1) + + if 'boxes' in predictions: + boxes, labels, scores = predictions["boxes"] + visualizer = Visualizer(image, False, metadata=self.metadata, instance_mode=0) + vis_output['boxes'] = visualizer.draw_box_predictions( + boxes.to(self.cpu_device), labels.to(self.cpu_device), scores.to(self.cpu_device)) + + + # visualizer = Visualizer(image, metadata=self.metadata, instance_mode=2) + # vis_output['ins_gt'] = visualizer.draw_instance_predictions(predictions=ins_gt.to(self.cpu_device), alpha=1) + # vis_output['input'] = visualizer.get_image(image) + + return predictions, vis_output + + +class AsyncPredictor: + """ + A predictor that runs the model asynchronously, possibly on >1 GPUs. + Because rendering the visualization takes considerably amount of time, + this helps improve throughput a little bit when rendering videos. + """ + + class _StopToken: + pass + + class _PredictWorker(mp.Process): + def __init__(self, cfg, task_queue, result_queue): + self.cfg = cfg + self.task_queue = task_queue + self.result_queue = result_queue + super().__init__() + + def run(self): + predictor = DefaultPredictor(self.cfg) + + while True: + task = self.task_queue.get() + if isinstance(task, AsyncPredictor._StopToken): + break + idx, data = task + result = predictor(data) + self.result_queue.put((idx, result)) + + def __init__(self, cfg, num_gpus: int = 1): + """ + Args: + cfg (CfgNode): + num_gpus (int): if 0, will run on CPU + """ + num_workers = max(num_gpus, 1) + self.task_queue = mp.Queue(maxsize=num_workers * 3) + self.result_queue = mp.Queue(maxsize=num_workers * 3) + self.procs = [] + for gpuid in range(max(num_gpus, 1)): + cfg = cfg.clone() + cfg.defrost() + cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" + self.procs.append( + AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) + ) + + self.put_idx = 0 + self.get_idx = 0 + self.result_rank = [] + self.result_data = [] + + for p in self.procs: + p.start() + atexit.register(self.shutdown) + + def put(self, image): + self.put_idx += 1 + self.task_queue.put((self.put_idx, image)) + + def get(self): + self.get_idx += 1 # the index needed for this request + if len(self.result_rank) and self.result_rank[0] == self.get_idx: + res = self.result_data[0] + del self.result_data[0], self.result_rank[0] + return res + + while True: + # make sure the results are returned in the correct order + idx, res = self.result_queue.get() + if idx == self.get_idx: + return res + insert = bisect.bisect(self.result_rank, idx) + self.result_rank.insert(insert, idx) + self.result_data.insert(insert, res) + + def __len__(self): + return self.put_idx - self.get_idx + + def __call__(self, image): + self.put(image) + return self.get() + + def shutdown(self): + for _ in self.procs: + self.task_queue.put(AsyncPredictor._StopToken()) + + @property + def default_buffer_size(self): + return len(self.procs) * 5 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/visualizer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..91246d2f8cc3a26e3640449360f627c901e7dd3c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/demo/visualizer.py @@ -0,0 +1,1350 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import colorsys +import logging +import math +import numpy as np +from enum import Enum, unique +import cv2 +import matplotlib as mpl +import matplotlib.colors as mplc +import matplotlib.figure as mplfigure +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from matplotlib.backends.backend_agg import FigureCanvasAgg +from PIL import Image + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes +from annotator.oneformer.detectron2.utils.file_io import PathManager +import random +random.seed(0) +from .colormap import random_color, _COLORS +logger = logging.getLogger(__name__) + +__all__ = ["ColorMode", "VisImage", "Visualizer"] + + +_SMALL_OBJECT_AREA_THRESH = 1000 +_LARGE_MASK_AREA_THRESH = 120000 +_OFF_WHITE = (1.0, 1.0, 1.0) +_BLACK = (0, 0, 0) +_RED = (1.0, 0, 0) + +_KEYPOINT_THRESHOLD = 0.05 + + +def instance_color(rgb=False, idx=1, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + Returns: + ndarray: a vector of 3 numbers + """ + ret = _COLORS[idx] * maximum + if not rgb: + ret = ret[::-1] + return ret + +@unique +class ColorMode(Enum): + """ + Enum of different color modes to use for instance visualizations. + """ + + IMAGE = 0 + """ + Picks a random color for every instance and overlay segmentations with low opacity. + """ + SEGMENTATION = 1 + """ + Let instances of the same category have similar colors + (from metadata.thing_colors), and overlay them with + high opacity. This provides more attention on the quality of segmentation. + """ + IMAGE_BW = 2 + """ + Same as IMAGE, but convert all areas without masks to gray-scale. + Only available for drawing per-instance mask predictions. + """ + + +class GenericMask: + """ + Attribute: + polygons (list[ndarray]): list[ndarray]: polygons for this mask. + Each ndarray has format [x, y, x, y, ...] + mask (ndarray): a binary mask + """ + + def __init__(self, mask_or_polygons, height, width): + self._mask = self._polygons = self._has_holes = None + self.height = height + self.width = width + + m = mask_or_polygons + if isinstance(m, dict): + # RLEs + assert "counts" in m and "size" in m + if isinstance(m["counts"], list): # uncompressed RLEs + h, w = m["size"] + assert h == height and w == width + m = mask_util.frPyObjects(m, h, w) + self._mask = mask_util.decode(m)[:, :] + return + + if isinstance(m, list): # list[ndarray] + self._polygons = [np.asarray(x).reshape(-1) for x in m] + return + + if isinstance(m, np.ndarray): # assumed to be a binary mask + assert m.shape[1] != 2, m.shape + assert m.shape == ( + height, + width, + ), f"mask shape: {m.shape}, target dims: {height}, {width}" + self._mask = m.astype("uint8") + return + + raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m))) + + @property + def mask(self): + if self._mask is None: + self._mask = self.polygons_to_mask(self._polygons) + return self._mask + + @property + def polygons(self): + if self._polygons is None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + return self._polygons + + @property + def has_holes(self): + if self._has_holes is None: + if self._mask is not None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + else: + self._has_holes = False # if original format is polygon, does not have holes + return self._has_holes + + def mask_to_polygons(self, mask): + # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level + # hierarchy. External contours (boundary) of the object are placed in hierarchy-1. + # Internal contours (holes) are placed in hierarchy-2. + # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours. + mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr + res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) + hierarchy = res[-1] + if hierarchy is None: # empty mask + return [], False + has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 + res = res[-2] + res = [x.flatten() for x in res] + # These coordinates from OpenCV are integers in range [0, W-1 or H-1]. + # We add 0.5 to turn them into real-value coordinate space. A better solution + # would be to first +0.5 and then dilate the returned polygon by 0.5. + res = [x + 0.5 for x in res if len(x) >= 6] + return res, has_holes + + def polygons_to_mask(self, polygons): + rle = mask_util.frPyObjects(polygons, self.height, self.width) + rle = mask_util.merge(rle) + return mask_util.decode(rle)[:, :] + + def area(self): + return self.mask.sum() + + def bbox(self): + p = mask_util.frPyObjects(self.polygons, self.height, self.width) + p = mask_util.merge(p) + bbox = mask_util.toBbox(p) + bbox[2] += bbox[0] + bbox[3] += bbox[1] + return bbox + + +class _PanopticPrediction: + """ + Unify different panoptic annotation/prediction formats + """ + + def __init__(self, panoptic_seg, segments_info, metadata=None): + if segments_info is None: + assert metadata is not None + # If "segments_info" is None, we assume "panoptic_img" is a + # H*W int32 image storing the panoptic_id in the format of + # category_id * label_divisor + instance_id. We reserve -1 for + # VOID label. + label_divisor = metadata.label_divisor + segments_info = [] + for panoptic_label in np.unique(panoptic_seg.numpy()): + if panoptic_label == -1: + # VOID region. + continue + pred_class = panoptic_label // label_divisor + isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values() + segments_info.append( + { + "id": int(panoptic_label), + "category_id": int(pred_class), + "isthing": bool(isthing), + } + ) + del metadata + + self._seg = panoptic_seg + + self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info + segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True) + areas = areas.numpy() + sorted_idxs = np.argsort(-areas) + self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs] + self._seg_ids = self._seg_ids.tolist() + for sid, area in zip(self._seg_ids, self._seg_areas): + if sid in self._sinfo: + self._sinfo[sid]["area"] = float(area) + + def non_empty_mask(self): + """ + Returns: + (H, W) array, a mask for all pixels that have a prediction + """ + empty_ids = [] + for id in self._seg_ids: + if id not in self._sinfo: + empty_ids.append(id) + if len(empty_ids) == 0: + return np.zeros(self._seg.shape, dtype=np.uint8) + assert ( + len(empty_ids) == 1 + ), ">1 ids corresponds to no labels. This is currently not supported" + return (self._seg != empty_ids[0]).numpy().astype(np.bool) + + def semantic_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or sinfo["isthing"]: + # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions. + continue + yield (self._seg == sid).numpy().astype(np.bool), sinfo + + def instance_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or not sinfo["isthing"]: + continue + mask = (self._seg == sid).numpy().astype(np.bool) + if mask.sum() > 0: + yield mask, sinfo + + +def _create_text_labels(classes, scores, class_names, is_crowd=None): + """ + Args: + classes (list[int] or None): + scores (list[float] or None): + class_names (list[str] or None): + is_crowd (list[bool] or None): + Returns: + list[str] or None + """ + labels = None + if classes is not None: + if class_names is not None and len(class_names) > 0: + labels = [class_names[i] for i in classes] + else: + labels = [str(i) for i in classes] + if scores is not None: + if labels is None: + labels = ["{:.0f}%".format(s * 100) for s in scores] + else: + labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] + if labels is not None and is_crowd is not None: + labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)] + return labels + + +class VisImage: + def __init__(self, img, scale=1.0): + """ + Args: + img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255]. + scale (float): scale the input image + """ + self.img = img + self.scale = scale + self.width, self.height = img.shape[1], img.shape[0] + self._setup_figure(img) + + def _setup_figure(self, img): + """ + Args: + Same as in :meth:`__init__()`. + Returns: + fig (matplotlib.pyplot.figure): top level container for all the image plot elements. + ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. + """ + fig = mplfigure.Figure(frameon=False) + self.dpi = fig.get_dpi() + # add a small 1e-2 to avoid precision lost due to matplotlib's truncation + # (https://github.com/matplotlib/matplotlib/issues/15363) + fig.set_size_inches( + (self.width * self.scale + 1e-2) / self.dpi, + (self.height * self.scale + 1e-2) / self.dpi, + ) + self.canvas = FigureCanvasAgg(fig) + # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) + ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) + ax.axis("off") + self.fig = fig + self.ax = ax + self.reset_image(img) + + def reset_image(self, img): + """ + Args: + img: same as in __init__ + """ + img = img.astype("uint8") + self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest") + + def save(self, filepath): + """ + Args: + filepath (str): a string that contains the absolute path, including the file name, where + the visualized image will be saved. + """ + self.fig.savefig(filepath) + + def get_image(self): + """ + Returns: + ndarray: + the visualized image of shape (H, W, 3) (RGB) in uint8 type. + The shape is scaled w.r.t the input image using the given `scale` argument. + """ + canvas = self.canvas + s, (width, height) = canvas.print_to_buffer() + # buf = io.BytesIO() # works for cairo backend + # canvas.print_rgba(buf) + # width, height = self.width, self.height + # s = buf.getvalue() + + buffer = np.frombuffer(s, dtype="uint8") + + img_rgba = buffer.reshape(height, width, 4) + rgb, alpha = np.split(img_rgba, [3], axis=2) + return rgb.astype("uint8") + + +class Visualizer: + """ + Visualizer that draws data about detection/segmentation on images. + It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}` + that draw primitive objects to images, as well as high-level wrappers like + `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}` + that draw composite data in some pre-defined style. + Note that the exact visualization style for the high-level wrappers are subject to change. + Style such as color, opacity, label contents, visibility of labels, or even the visibility + of objects themselves (e.g. when the object is too small) may change according + to different heuristics, as long as the results still look visually reasonable. + To obtain a consistent style, you can implement custom drawing functions with the + abovementioned primitive methods instead. If you need more customized visualization + styles, you can process the data yourself following their format documented in + tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not + intend to satisfy everyone's preference on drawing styles. + This visualizer focuses on high rendering quality rather than performance. It is not + designed to be used for real-time applications. + """ + + # TODO implement a fast, rasterized version using OpenCV + + def __init__(self, img_rgb, is_img=True, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE): + """ + Args: + img_rgb: a numpy array of shape (H, W, C), where H and W correspond to + the height and width of the image respectively. C is the number of + color channels. The image is required to be in RGB format since that + is a requirement of the Matplotlib library. The image is also expected + to be in the range [0, 255]. + metadata (Metadata): dataset metadata (e.g. class names and colors) + instance_mode (ColorMode): defines one of the pre-defined style for drawing + instances on an image. + """ + if is_img: + self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) + else: + self.img = np.zeros_like(img_rgb).clip(0, 255).astype(np.uint8) + 255 + if metadata is None: + metadata = MetadataCatalog.get("__nonexist__") + self.metadata = metadata + self.output = VisImage(self.img, scale=scale) + self.cpu_device = torch.device("cpu") + + # too small texts are useless, therefore clamp to 9 + self._default_font_size = max( + np.sqrt(self.output.height * self.output.width) // 90, 10 // scale + ) + self._instance_mode = instance_mode + self.keypoint_threshold = _KEYPOINT_THRESHOLD + + def get_image(self, img): + img = np.asarray(img).clip(0, 255).astype(np.uint8) + return VisImage(img, scale=1.0) + + def draw_box_predictions( + self, + boxes=None, + labels=None, + scores=None, + assigned_colors=None + ): + """ + Args: + boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, + or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, + or a :class:`RotatedBoxes`, + or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image, + labels (list[str]): the text to be displayed for each instance. + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = 0 + boxes = self._convert_boxes(boxes) + classes = labels.tolist() + scores = scores.tolist() + labels = _create_text_labels(classes, scores, self.metadata.get("stuff_classes", None)) + num_instances = len(boxes) + assert len(labels) == num_instances + if assigned_colors is None: + # assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + assigned_colors = [instance_color(rgb=True, idx=i, maximum=1) for i in range(num_instances)] + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + areas = None + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + + if areas is not None: + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] + + for i in range(num_instances): + color = assigned_colors[i] + if boxes is not None: + self.draw_box(boxes[i], edge_color=color) + + if labels is not None: + # first get a box + if boxes is not None: + x0, y0, x1, y1 = boxes[i] + text_pos = (x0, y0) # if drawing boxes, put text on the box corner. + horiz_align = "left" + else: + continue # drawing the box confidence for keypoints isn't very useful. + # for small objects, draw text at the side to avoid occlusion + instance_area = (y1 - y0) * (x1 - x0) + if ( + instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale + or y1 - y0 < 40 * self.output.scale + ): + if y1 >= self.output.height - 5: + text_pos = (x1, y0) + else: + text_pos = (x0, y1) + + height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) + * 0.5 + * self._default_font_size + ) + self.draw_text( + labels[i], + text_pos, + color=lighter_color, + horizontal_alignment=horiz_align, + font_size=font_size, + ) + + return self.output + + + def draw_instance_predictions(self, predictions, alpha=0.8, is_text=True): + """ + Draw instance-level prediction results on an image. + Args: + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + Returns: + output (VisImage): image object with visualizations. + """ + boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None + labels = _create_text_labels(classes, scores, self.metadata.get("stuff_classes", None)) + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + + if predictions.has("pred_masks"): + masks = np.asarray(predictions.pred_masks) + masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] + else: + masks = None + + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("stuff_colors"): + # colors = [ + # self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes + # ] + colors = [ + instance_color(rgb=True, idx=c, maximum=1) for c in classes + ] + else: + colors = None + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image( + self._create_grayscale_image( + (predictions.pred_masks.any(dim=0) > 0).numpy() + if predictions.has("pred_masks") + else None + ) + ) + + self.overlay_instances( + masks=masks, + boxes=boxes, + labels=labels, + keypoints=keypoints, + assigned_colors=colors, + alpha=alpha, + is_text=is_text, + ) + return self.output + + def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8, is_text=True, edge_color=_OFF_WHITE): + """ + Draw semantic segmentation predictions/labels. + Args: + sem_seg (Tensor or ndarray): the segmentation of shape (H, W). + Each value is the integer label of the pixel. + area_threshold (int): segments with less than `area_threshold` are not drawn. + alpha (float): the larger it is, the more opaque the segmentations are. + Returns: + output (VisImage): image object with visualizations. + """ + if isinstance(sem_seg, torch.Tensor): + sem_seg = sem_seg.numpy() + labels, areas = np.unique(sem_seg, return_counts=True) + sorted_idxs = np.argsort(-areas).tolist() + labels = labels[sorted_idxs] + for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels): + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] + except (AttributeError, IndexError): + mask_color = None + + binary_mask = (sem_seg == label).astype(np.uint8) + text = self.metadata.stuff_classes[label] + self.draw_binary_mask( + binary_mask, + color=mask_color, + edge_color=edge_color, + text=text, + alpha=alpha, + area_threshold=area_threshold, + is_text=is_text, + ) + return self.output + + def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7, is_text=True,): + """ + Draw panoptic prediction annotations or results. + Args: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each + segment. + segments_info (list[dict] or None): Describe each segment in `panoptic_seg`. + If it is a ``list[dict]``, each dict contains keys "id", "category_id". + If None, category id of each pixel is computed by + ``pixel // metadata.label_divisor``. + area_threshold (int): stuff segments with less than `area_threshold` are not drawn. + Returns: + output (VisImage): image object with visualizations. + """ + pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata) + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask())) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + text = self.metadata.stuff_classes[category_idx] + self.draw_binary_mask( + mask, + color=mask_color, + edge_color=_OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + is_text=is_text, + ) + + # draw mask for all instances second + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return self.output + masks, sinfo = list(zip(*all_instances)) + category_ids = [x["category_id"] for x in sinfo] + + try: + scores = [x["score"] for x in sinfo] + except KeyError: + scores = None + labels = _create_text_labels( + category_ids, scores, self.metadata.stuff_classes, [x.get("iscrowd", 0) for x in sinfo] + ) + + try: + colors = [ + self._jitter([x / 255 for x in self.metadata.stuff_colors[c]]) for c in category_ids + ] + except AttributeError: + colors = None + self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha, is_text=is_text) + + return self.output + + draw_panoptic_seg_predictions = draw_panoptic_seg # backward compatibility + + def draw_dataset_dict(self, dic): + """ + Draw annotations/segmentaions in Detectron2 Dataset format. + Args: + dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. + Returns: + output (VisImage): image object with visualizations. + """ + annos = dic.get("annotations", None) + if annos: + if "segmentation" in annos[0]: + masks = [x["segmentation"] for x in annos] + else: + masks = None + if "keypoints" in annos[0]: + keypts = [x["keypoints"] for x in annos] + keypts = np.array(keypts).reshape(len(annos), -1, 3) + else: + keypts = None + + boxes = [ + BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) + if len(x["bbox"]) == 4 + else x["bbox"] + for x in annos + ] + + colors = None + category_ids = [x["category_id"] for x in annos] + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("stuff_colors"): + colors = [ + self._jitter([x / 255 for x in self.metadata.stuff_colors[c]]) + for c in category_ids + ] + names = self.metadata.get("stuff_classes", None) + labels = _create_text_labels( + category_ids, + scores=None, + class_names=names, + is_crowd=[x.get("iscrowd", 0) for x in annos], + ) + self.overlay_instances( + labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors + ) + + sem_seg = dic.get("sem_seg", None) + if sem_seg is None and "sem_seg_file_name" in dic: + with PathManager.open(dic["sem_seg_file_name"], "rb") as f: + sem_seg = Image.open(f) + sem_seg = np.asarray(sem_seg, dtype="uint8") + if sem_seg is not None: + self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) + + pan_seg = dic.get("pan_seg", None) + # if pan_seg is None and "pan_seg_file_name" in dic: + # with PathManager.open(dic["pan_seg_file_name"], "rb") as f: + # pan_seg = Image.open(f) + # pan_seg = np.asarray(pan_seg) + # from panopticapi.utils import rgb2id + # + # pan_seg = rgb2id(pan_seg) + if pan_seg is not None: + segments_info = dic["segments_info"] + pan_seg = torch.tensor(pan_seg) + self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5) + return self.output + + def overlay_instances( + self, + *, + boxes=None, + labels=None, + masks=None, + keypoints=None, + assigned_colors=None, + alpha=0.5, + is_text=True, + ): + """ + Args: + boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, + or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, + or a :class:`RotatedBoxes`, + or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image, + labels (list[str]): the text to be displayed for each instance. + masks (masks-like object): Supported types are: + * :class:`detectron2.structures.PolygonMasks`, + :class:`detectron2.structures.BitMasks`. + * list[list[ndarray]]: contains the segmentation masks for all objects in one image. + The first level of the list corresponds to individual instances. The second + level to all the polygon that compose the instance, and the third level + to the polygon coordinates. The third level should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + * list[ndarray]: each ndarray is a binary mask of shape (H, W). + * list[dict]: each dict is a COCO-style RLE. + keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), + where the N is the number of instances and K is the number of keypoints. + The last dimension corresponds to (x, y, visibility or score). + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = 0 + if boxes is not None: + boxes = self._convert_boxes(boxes) + num_instances = len(boxes) + if masks is not None: + masks = self._convert_masks(masks) + if num_instances: + assert len(masks) == num_instances + else: + num_instances = len(masks) + if keypoints is not None: + if num_instances: + assert len(keypoints) == num_instances + else: + num_instances = len(keypoints) + keypoints = self._convert_keypoints(keypoints) + if labels is not None: + assert len(labels) == num_instances + if assigned_colors is None: + # assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + assigned_colors = [instance_color(rgb=True, idx=i, maximum=1) for i in range(num_instances)] + if num_instances == 0: + return self.output + if boxes is not None and boxes.shape[1] == 5: + return self.overlay_rotated_instances( + boxes=boxes, labels=labels, assigned_colors=assigned_colors + ) + + # Display in largest to smallest order to reduce occlusion. + areas = None + if boxes is not None: + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + elif masks is not None: + areas = np.asarray([x.area() for x in masks]) + + if areas is not None: + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None + assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] + keypoints = keypoints[sorted_idxs] if keypoints is not None else None + + for i in range(num_instances): + color = assigned_colors[i] + if boxes is not None: + self.draw_box(boxes[i], edge_color=color) + + if masks is not None: + for segment in masks[i].polygons: + self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) + + if labels is not None: + # first get a box + if boxes is not None: + x0, y0, x1, y1 = boxes[i] + text_pos = (x0, y0) # if drawing boxes, put text on the box corner. + horiz_align = "left" + elif masks is not None: + # skip small mask without polygon + if len(masks[i].polygons) == 0: + continue + + x0, y0, x1, y1 = masks[i].bbox() + + # draw text in the center (defined by median) when box is not drawn + # median is less sensitive to outliers. + text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] + horiz_align = "center" + else: + continue # drawing the box confidence for keypoints isn't very useful. + # for small objects, draw text at the side to avoid occlusion + instance_area = (y1 - y0) * (x1 - x0) + if ( + instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale + or y1 - y0 < 40 * self.output.scale + ): + if y1 >= self.output.height - 5: + text_pos = (x1, y0) + else: + text_pos = (x0, y1) + + height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) + * 0.5 + * self._default_font_size + ) + if is_text: + self.draw_text( + labels[i], + text_pos, + color=lighter_color, + horizontal_alignment=horiz_align, + font_size=font_size, + ) + + # draw keypoints + if keypoints is not None: + for keypoints_per_instance in keypoints: + self.draw_and_connect_keypoints(keypoints_per_instance) + + return self.output + + def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): + """ + Args: + boxes (ndarray): an Nx5 numpy array of + (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image. + labels (list[str]): the text to be displayed for each instance. + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = len(boxes) + + if assigned_colors is None: + # assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + assigned_colors = [instance_color(rgb=True, idx=i, maximum=1) for i in range(num_instances)] + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + if boxes is not None: + areas = boxes[:, 2] * boxes[:, 3] + + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + colors = [assigned_colors[idx] for idx in sorted_idxs] + + for i in range(num_instances): + self.draw_rotated_box_with_label( + boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None + ) + + return self.output + + def draw_and_connect_keypoints(self, keypoints): + """ + Draws keypoints of an instance and follows the rules for keypoint connections + to draw lines between appropriate keypoints. This follows color heuristics for + line color. + Args: + keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints + and the last dimension corresponds to (x, y, probability). + Returns: + output (VisImage): image object with visualizations. + """ + visible = {} + keypoint_names = self.metadata.get("keypoint_names") + for idx, keypoint in enumerate(keypoints): + + # draw keypoint + x, y, prob = keypoint + if prob > self.keypoint_threshold: + self.draw_circle((x, y), color=_RED) + if keypoint_names: + keypoint_name = keypoint_names[idx] + visible[keypoint_name] = (x, y) + + if self.metadata.get("keypoint_connection_rules"): + for kp0, kp1, color in self.metadata.keypoint_connection_rules: + if kp0 in visible and kp1 in visible: + x0, y0 = visible[kp0] + x1, y1 = visible[kp1] + color = tuple(x / 255.0 for x in color) + self.draw_line([x0, x1], [y0, y1], color=color) + + # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip + # Note that this strategy is specific to person keypoints. + # For other keypoints, it should just do nothing + try: + ls_x, ls_y = visible["left_shoulder"] + rs_x, rs_y = visible["right_shoulder"] + mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2 + except KeyError: + pass + else: + # draw line from nose to mid-shoulder + nose_x, nose_y = visible.get("nose", (None, None)) + if nose_x is not None: + self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED) + + try: + # draw line from mid-shoulder to mid-hip + lh_x, lh_y = visible["left_hip"] + rh_x, rh_y = visible["right_hip"] + except KeyError: + pass + else: + mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2 + self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED) + return self.output + + """ + Primitive drawing functions: + """ + + def draw_text( + self, + text, + position, + *, + font_size=None, + color="g", + horizontal_alignment="center", + rotation=0, + ): + """ + Args: + text (str): class label + position (tuple): a tuple of the x and y coordinates to place text on image. + font_size (int, optional): font of the text. If not provided, a font size + proportional to the image width is calculated and used. + color: color of the text. Refer to `matplotlib.colors` for full list + of formats that are accepted. + horizontal_alignment (str): see `matplotlib.text.Text` + rotation: rotation angle in degrees CCW + Returns: + output (VisImage): image object with text drawn. + """ + if not font_size: + font_size = self._default_font_size + + # since the text background is dark, we don't want the text to be dark + color = np.maximum(list(mplc.to_rgb(color)), 0.2) + color[np.argmax(color)] = max(0.8, np.max(color)) + + x, y = position + self.output.ax.text( + x, + y, + text, + size=font_size * self.output.scale, + family="sans-serif", + bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, + verticalalignment="top", + horizontalalignment=horizontal_alignment, + color=color, + zorder=10, + rotation=rotation, + ) + return self.output + + def draw_box(self, box_coord, alpha=1.0, edge_color="g", line_style="-"): + """ + Args: + box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 + are the coordinates of the image's top left corner. x1 and y1 are the + coordinates of the image's bottom right corner. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + Returns: + output (VisImage): image object with box drawn. + """ + x0, y0, x1, y1 = box_coord + width = x1 - x0 + height = y1 - y0 + + linewidth = 2 + + self.output.ax.add_patch( + mpl.patches.Rectangle( + (x0, y0), + width, + height, + fill=False, + edgecolor=edge_color, + linewidth=linewidth * self.output.scale, + alpha=alpha, + linestyle=line_style, + ) + ) + return self.output + + def draw_rotated_box_with_label( + self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None + ): + """ + Draw a rotated box with label on its top-left corner. + Args: + rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle), + where cnt_x and cnt_y are the center coordinates of the box. + w and h are the width and height of the box. angle represents how + many degrees the box is rotated CCW with regard to the 0-degree box. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + label (string): label for rotated box. It will not be rendered when set to None. + Returns: + output (VisImage): image object with box drawn. + """ + cnt_x, cnt_y, w, h, angle = rotated_box + area = w * h + # use thinner lines when the box is small + linewidth = self._default_font_size / ( + 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3 + ) + + theta = angle * math.pi / 180.0 + c = math.cos(theta) + s = math.sin(theta) + rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)] + # x: left->right ; y: top->down + rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect] + for k in range(4): + j = (k + 1) % 4 + self.draw_line( + [rotated_rect[k][0], rotated_rect[j][0]], + [rotated_rect[k][1], rotated_rect[j][1]], + color=edge_color, + linestyle="--" if k == 1 else line_style, + linewidth=linewidth, + ) + + if label is not None: + text_pos = rotated_rect[1] # topleft corner + + height_ratio = h / np.sqrt(self.output.height * self.output.width) + label_color = self._change_color_brightness(edge_color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size + ) + self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle) + + return self.output + + def draw_circle(self, circle_coord, color, radius=3): + """ + Args: + circle_coord (list(int) or tuple(int)): contains the x and y coordinates + of the center of the circle. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + radius (int): radius of the circle. + Returns: + output (VisImage): image object with box drawn. + """ + x, y = circle_coord + self.output.ax.add_patch( + mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color) + ) + return self.output + + def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None): + """ + Args: + x_data (list[int]): a list containing x values of all the points being drawn. + Length of list should match the length of y_data. + y_data (list[int]): a list containing y values of all the points being drawn. + Length of list should match the length of x_data. + color: color of the line. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + linestyle: style of the line. Refer to `matplotlib.lines.Line2D` + for a full list of formats that are accepted. + linewidth (float or None): width of the line. When it's None, + a default value will be computed and used. + Returns: + output (VisImage): image object with line drawn. + """ + if linewidth is None: + linewidth = self._default_font_size / 3 + linewidth = max(linewidth, 1) + self.output.ax.add_line( + mpl.lines.Line2D( + x_data, + y_data, + linewidth=linewidth * self.output.scale, + color=color, + linestyle=linestyle, + ) + ) + return self.output + + def draw_binary_mask( + self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=10, is_text=True, + ): + """ + Args: + binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and + W is the image width. Each value in the array is either a 0 or 1 value of uint8 + type. + color: color of the mask. Refer to `matplotlib.colors` for a full list of + formats that are accepted. If None, will pick a random color. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. + text (str): if None, will be drawn on the object + alpha (float): blending efficient. Smaller values lead to more transparent masks. + area_threshold (float): a connected component smaller than this area will not be shown. + Returns: + output (VisImage): image object with mask drawn. + """ + if color is None: + color = random_color(rgb=True, maximum=1) + color = mplc.to_rgb(color) + + has_valid_segment = False + binary_mask = binary_mask.astype("uint8") # opencv needs uint8 + mask = GenericMask(binary_mask, self.output.height, self.output.width) + shape2d = (binary_mask.shape[0], binary_mask.shape[1]) + + if not mask.has_holes: + # draw polygons for regular masks + for segment in mask.polygons: + # area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) + # if area < (area_threshold or 0): + # continue + has_valid_segment = True + segment = segment.reshape(-1, 2) + self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) + else: + # TODO: Use Path/PathPatch to draw vector graphics: + # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon + rgba = np.zeros(shape2d + (4,), dtype="float32") + rgba[:, :, :3] = color + rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha + has_valid_segment = True + self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) + + if is_text: + if text is not None and has_valid_segment: + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + self._draw_text_in_mask(binary_mask, text, lighter_color) + return self.output + + def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5): + """ + Args: + soft_mask (ndarray): float array of shape (H, W), each value in [0, 1]. + color: color of the mask. Refer to `matplotlib.colors` for a full list of + formats that are accepted. If None, will pick a random color. + text (str): if None, will be drawn on the object + alpha (float): blending efficient. Smaller values lead to more transparent masks. + Returns: + output (VisImage): image object with mask drawn. + """ + if color is None: + color = random_color(rgb=True, maximum=1) + color = mplc.to_rgb(color) + + shape2d = (soft_mask.shape[0], soft_mask.shape[1]) + rgba = np.zeros(shape2d + (4,), dtype="float32") + rgba[:, :, :3] = color + rgba[:, :, 3] = soft_mask * alpha + self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) + + if text is not None: + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + binary_mask = (soft_mask > 0.5).astype("uint8") + # self._draw_text_in_mask(binary_mask, text, lighter_color) + return self.output + + def draw_polygon(self, segment, color, edge_color=None, alpha=0.5): + """ + Args: + segment: numpy array of shape Nx2, containing all the points in the polygon. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. If not provided, a darker shade + of the polygon color will be used instead. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + Returns: + output (VisImage): image object with polygon drawn. + """ + if edge_color is None: + # make edge color darker than the polygon color + if alpha > 0.8: + edge_color = self._change_color_brightness(color, brightness_factor=-0.7) + else: + edge_color = color + edge_color = mplc.to_rgb(edge_color) + (1,) + + polygon = mpl.patches.Polygon( + segment, + fill=True, + facecolor=mplc.to_rgb(color) + (alpha,), + edgecolor=edge_color, + linewidth=max(self._default_font_size // 15 * self.output.scale, 1), + ) + self.output.ax.add_patch(polygon) + return self.output + + """ + Internal methods: + """ + + def _jitter(self, color): + """ + Randomly modifies given color to produce a slightly different color than the color given. + Args: + color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color + picked. The values in the list are in the [0.0, 1.0] range. + Returns: + jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the + color after being jittered. The values in the list are in the [0.0, 1.0] range. + """ + color = mplc.to_rgb(color) + vec = np.random.rand(3) + # better to do it in another color space + vec = vec / np.linalg.norm(vec) * 0.5 + res = np.clip(vec + color, 0, 1) + return tuple(res) + + def _create_grayscale_image(self, mask=None): + """ + Create a grayscale version of the original image. + The colors in masked area, if given, will be kept. + """ + img_bw = self.img.astype("f4").mean(axis=2) + img_bw = np.stack([img_bw] * 3, axis=2) + if mask is not None: + img_bw[mask] = self.img[mask] + return img_bw + + def _change_color_brightness(self, color, brightness_factor): + """ + Depending on the brightness_factor, gives a lighter or darker color i.e. a color with + less or more saturation than the original color. + Args: + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of + 0 will correspond to no change, a factor in [-1.0, 0) range will result in + a darker color and a factor in (0, 1.0] range will result in a lighter color. + Returns: + modified_color (tuple[double]): a tuple containing the RGB values of the + modified color. Each value in the tuple is in the [0.0, 1.0] range. + """ + assert brightness_factor >= -1.0 and brightness_factor <= 1.0 + color = mplc.to_rgb(color) + polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) + modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) + modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness + modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness + modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) + return modified_color + + def _convert_boxes(self, boxes): + """ + Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension. + """ + if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes): + return boxes.tensor.detach().numpy() + else: + return np.asarray(boxes) + + def _convert_masks(self, masks_or_polygons): + """ + Convert different format of masks or polygons to a tuple of masks and polygons. + Returns: + list[GenericMask]: + """ + + m = masks_or_polygons + if isinstance(m, PolygonMasks): + m = m.polygons + if isinstance(m, BitMasks): + m = m.tensor.numpy() + if isinstance(m, torch.Tensor): + m = m.numpy() + ret = [] + for x in m: + if isinstance(x, GenericMask): + ret.append(x) + else: + ret.append(GenericMask(x, self.output.height, self.output.width)) + return ret + + def _draw_text_in_mask(self, binary_mask, text, color): + """ + Find proper places to draw text given a binary mask. + """ + # TODO sometimes drawn on wrong objects. the heuristics here can improve. + _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) + if stats[1:, -1].size == 0: + return + largest_component_id = np.argmax(stats[1:, -1]) + 1 + + # draw text on the largest component, as well as other very large components. + for cid in range(1, _num_cc): + if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: + # median is more stable than centroid + # center = centroids[largest_component_id] + center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] + self.draw_text(text, center, color=color) + + def _convert_keypoints(self, keypoints): + if isinstance(keypoints, Keypoints): + keypoints = keypoints.tensor + keypoints = np.asarray(keypoints) + return keypoints + + def get_output(self): + """ + Returns: + output (VisImage): the image output containing the visualizations added + to the image. + """ + return self.output \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..49f62369cca38a3c85884f8dea6baea674cb9060 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/__init__.py @@ -0,0 +1,3 @@ +from .detection_coco_evaluator import * +from .coco_evaluator import * +from .cityscapes_evaluation import CityscapesInstanceEvaluator \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/cityscapes_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/cityscapes_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..19b1cb779e5f493cf75c8e6913a90da5c174735f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/cityscapes_evaluation.py @@ -0,0 +1,201 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/cityscapes_evaluation.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import glob +import logging +import numpy as np +import os +import tempfile +from collections import OrderedDict +import torch +from PIL import Image + +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.file_io import PathManager + +from .evaluator import DatasetEvaluator + + +class CityscapesEvaluator(DatasetEvaluator): + """ + Base class for evaluation using cityscapes API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): the name of the dataset. + It must have the following metadata associated with it: + "thing_classes", "gt_dir". + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") + self._temp_dir = self._working_dir.name + # All workers will write to the same results directory + # TODO this does not work in distributed training + assert ( + comm.get_local_size() == comm.get_world_size() + ), "CityscapesEvaluator currently do not work with multiple machines." + self._temp_dir = comm.all_gather(self._temp_dir)[0] + if self._temp_dir != self._working_dir.name: + self._working_dir.cleanup() + self._logger.info( + "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) + ) + + +class CityscapesInstanceEvaluator(CityscapesEvaluator): + """ + Evaluate instance segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import name2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") + + if "instances" in output: + output = output["instances"].to(self._cpu_device) + num_instances = len(output) + with open(pred_txt, "w") as fout: + for i in range(num_instances): + pred_class = output.pred_classes[i] + classes = self._metadata.stuff_classes[pred_class] + class_id = name2label[classes].id + score = output.scores[i] + mask = output.pred_masks[i].numpy().astype("uint8") + png_filename = os.path.join( + self._temp_dir, basename + "_{}_{}.png".format(i, classes) + ) + + Image.fromarray(mask * 255).save(png_filename) + fout.write( + "{} {} {}\n".format(os.path.basename(png_filename), class_id, score) + ) + else: + # Cityscapes requires a prediction file for every ground truth image. + with open(pred_txt, "w") as fout: + pass + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP" and "AP50". + """ + comm.synchronize() + if comm.get_rank() > 0: + return + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + )["averages"] + + ret = OrderedDict() + ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} + self._working_dir.cleanup() + return ret + + +class CityscapesSemSegEvaluator(CityscapesEvaluator): + """ + Evaluate semantic segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import trainId2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") + + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() + pred = 255 * np.ones(output.shape, dtype=np.uint8) + for train_id, label in trainId2label.items(): + if label.ignoreInEval: + continue + pred[output == train_id] = label.id + Image.fromarray(pred).save(pred_filename) + + def evaluate(self): + comm.synchronize() + if comm.get_rank() > 0: + return + # Load the Cityscapes eval script *after* setting the required env var, + # since the script reads CITYSCAPES_DATASET into global variables at load time. + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + ) + ret = OrderedDict() + ret["sem_seg"] = { + "IoU": 100.0 * results["averageScoreClasses"], + "iIoU": 100.0 * results["averageScoreInstClasses"], + "IoU_sup": 100.0 * results["averageScoreCategories"], + "iIoU_sup": 100.0 * results["averageScoreInstCategories"], + } + self._working_dir.cleanup() + return ret diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/coco_evaluator.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/coco_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..c26107ee1bdec1e6f83831d6c9a0aaaf0b9cedf1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/coco_evaluator.py @@ -0,0 +1,563 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/coco_evaluation.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from annotator.oneformer.pycocotools.coco import COCO +from annotator.oneformer.pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.coco import convert_to_coco_json +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, pairwise_iou +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import create_small_table + +from .evaluator import DatasetEvaluator + +try: + from annotator.oneformer.detectron2.evaluation.fast_eval_api import COCOeval_opt +except ImportError: + COCOeval_opt = COCOeval + + +class COCOEvaluator(DatasetEvaluator): + """ + Evaluate AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means + the metric cannot be computed (e.g. due to no predictions made). + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def __init__( + self, + dataset_name, + tasks=None, + distributed=True, + output_dir=None, + *, + max_dets_per_image=None, + use_fast_impl=True, + kpt_oks_sigmas=(), + allow_cached_coco=True, + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have either the following corresponding metadata: + + "json_file": the path to the COCO format annotation + + Or it must be in detectron2's standard dataset format + so it can be converted to COCO format automatically. + tasks (tuple[str]): tasks that can be evaluated under the given + configuration. A task is one of "bbox", "segm", "keypoints". + By default, will infer this automatically from predictions. + distributed (True): if True, will collect results from all ranks and run evaluation + in the main process. + Otherwise, will only evaluate the results in the current process. + output_dir (str): optional, an output directory to dump all + results predicted on the dataset. The dump contains two files: + + 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and + contains all the results in the format they are produced by the model. + 2. "coco_instances_results.json" a json file in COCO's result format. + max_dets_per_image (int): limit on the maximum number of detections per image. + By default in COCO, this limit is to 100, but this can be customized + to be greater, as is needed in evaluation metrics AP fixed and AP pool + (see https://arxiv.org/pdf/2102.01066.pdf) + This doesn't affect keypoint evaluation. + use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. + Although the results should be very close to the official implementation in COCO + API, it is still recommended to compute results with the official API for use in + papers. The faster implementation also uses more RAM. + kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. + See http://cocodataset.org/#keypoints-eval + When empty, it will use the defaults in COCO. + Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. + allow_cached_coco (bool): Whether to use cached coco json from previous validation + runs. You should set this to False if you need to use different validation data. + Defaults to True. + """ + self._logger = logging.getLogger(__name__) + self._distributed = distributed + self._output_dir = output_dir + + if use_fast_impl and (COCOeval_opt is COCOeval): + self._logger.info("Fast COCO eval is not built. Falling back to official COCO eval.") + use_fast_impl = False + self._use_fast_impl = use_fast_impl + + # COCOeval requires the limit on the number of detections per image (maxDets) to be a list + # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the + # 3rd element (100) is used as the limit on the number of detections per image when + # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval, + # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] + else: + max_dets_per_image = [1, 10, max_dets_per_image] + self._max_dets_per_image = max_dets_per_image + + if tasks is not None and isinstance(tasks, CfgNode): + kpt_oks_sigmas = ( + tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas + ) + self._logger.warn( + "COCO Evaluator instantiated using config, this is deprecated behavior." + " Please pass in explicit arguments instead." + ) + self._tasks = None # Infering it from predictions should be better + else: + self._tasks = tasks + + self._cpu_device = torch.device("cpu") + + self._metadata = MetadataCatalog.get(dataset_name) + if not hasattr(self._metadata, "json_file"): + if output_dir is None: + raise ValueError( + "output_dir must be provided to COCOEvaluator " + "for datasets not in COCO format." + ) + self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...") + + cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") + self._metadata.json_file = cache_path + convert_to_coco_json(dataset_name, cache_path, allow_cached=allow_cached_coco) + + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + # Test set json files do not contain annotations (evaluation must be + # performed using the COCO evaluation server). + self._do_evaluation = "annotations" in self._coco_api.dataset + if self._do_evaluation: + self._kpt_oks_sigmas = kpt_oks_sigmas + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if len(prediction) > 1: + self._predictions.append(prediction) + + def evaluate(self, img_ids=None): + """ + Args: + img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset + """ + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return {} + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "instances" in predictions[0]: + self._eval_predictions(predictions, img_ids=img_ids) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _tasks_from_predictions(self, predictions): + """ + Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. + """ + for pred in predictions: + if "segmentation" in pred: + tasks = {"segm"} + if "keypoints" in pred: + tasks.add("keypoints") + return sorted(tasks) + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + num_classes = len(all_contiguous_ids) + assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + assert category_id < num_classes, ( + f"A prediction has class={category_id}, " + f"but the dataset only has {num_classes} classes and " + f"predicted class id should be in [0, {num_classes - 1}]." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + assert task in {"segm", "keypoints"}, f"Got unknown task: {task}!" + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + use_fast_impl=self._use_fast_impl, + img_ids=img_ids, + max_dets_per_image=self._max_dets_per_image, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _derive_coco_results(self, coco_eval, iou_type, class_names=None): + """ + Derive the desired score numbers from summarized COCOeval. + + Args: + coco_eval (None or COCOEval): None represents no predictions from model. + iou_type (str): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + + metrics = { + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], + }[iou_type] + + if coco_eval is None: + self._logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + # the standard metrics + results = { + metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") + for idx, metric in enumerate(metrics) + } + self._logger.info( + "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) + ) + if not np.isfinite(sum(results.values())): + self._logger.info("Some metrics cannot be computed and is shown as NaN.") + + if class_names is None or len(class_names) <= 1: + return results + # Compute per-category AP + # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa + precisions = coco_eval.eval["precision"] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + results_per_category = [] + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + results_per_category.append(("{}".format(name), float(ap * 100))) + + # tabulate it + N_COLS = min(6, len(results_per_category) * 2) + results_flatten = list(itertools.chain(*results_per_category)) + results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + results_2d, + tablefmt="pipe", + floatfmt=".3f", + headers=["category", "AP"] * (N_COLS // 2), + numalign="left", + ) + self._logger.info("Per-category {} AP: \n".format(iou_type) + table) + + results.update({"AP-" + name: ap for name, ap in results_per_category}) + return results + + +def instances_to_coco_json(instances, img_id): + """ + Dump an "Instances" object to a COCO-format json that's used for evaluation. + + Args: + instances (Instances): + img_id (int): the image id + + Returns: + list[dict]: list of json annotations in COCO format. + """ + num_instance = len(instances) + if num_instance == 0: + return [] + + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + has_mask = instances.has("pred_masks") + if has_mask: + # use RLE to encode the masks, because they are too large and takes memory + # since this evaluator stores outputs of the entire dataset + rles = [ + mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in instances.pred_masks + ] + for rle in rles: + # "counts" is an array encoded by mask_util as a byte-stream. Python3's + # json writer which always produces strings cannot serialize a bytestream + # unless you decode it. Thankfully, utf-8 works out (which is also what + # the annotator.oneformer.pycocotools/_mask.pyx does). + rle["counts"] = rle["counts"].decode("utf-8") + + has_keypoints = instances.has("pred_keypoints") + if has_keypoints: + keypoints = instances.pred_keypoints + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "score": scores[k], + } + if has_mask: + result["segmentation"] = rles[k] + if has_keypoints: + # In COCO annotations, + # keypoints coordinates are pixel indices. + # However our predictions are floating point coordinates. + # Therefore we subtract 0.5 to be consistent with the annotation format. + # This is the inverse of data loading logic in `datasets/coco.py`. + keypoints[k][:, :2] -= 0.5 + result["keypoints"] = keypoints[k].flatten().tolist() + results.append(result) + return results + +def _evaluate_predictions_on_coco( + coco_gt, + coco_results, + iou_type, + kpt_oks_sigmas=None, + use_fast_impl=True, + img_ids=None, + max_dets_per_image=None, +): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + if iou_type == "segm": + coco_results = copy.deepcopy(coco_results) + # When evaluating mask AP, if the results contain bbox, cocoapi will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in coco_results: + c.pop("bbox", None) + + coco_dt = coco_gt.loadRes(coco_results) + coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type) + # For COCO, the default max_dets_per_image is [1, 10, 100]. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] # Default from COCOEval + else: + assert ( + len(max_dets_per_image) >= 3 + ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3" + # In the case that user supplies a custom input for max_dets_per_image, + # apply COCOevalMaxDets to evaluate AP with the custom input. + if max_dets_per_image[2] != 100: + coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type) + if iou_type != "keypoints": + coco_eval.params.maxDets = max_dets_per_image + + if img_ids is not None: + coco_eval.params.imgIds = img_ids + + if iou_type == "keypoints": + # Use the COCO default keypoint OKS sigmas unless overrides are specified + if kpt_oks_sigmas: + assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "annotator.oneformer.pycocotools is too old!" + coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) + # COCOAPI requires every detection and every gt to have keypoints, so + # we just take the first entry from both + num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 + num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 + num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) + assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( + f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " + f"Ground truth contains {num_keypoints_gt} keypoints. " + f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " + "They have to agree with each other. For meaning of OKS, please refer to " + "http://cocodataset.org/#keypoints-eval." + ) + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval + + +class COCOevalMaxDets(COCOeval): + """ + Modified version of COCOeval for evaluating AP with a custom + maxDets (by default for COCO, maxDets is 100) + """ + + def summarize(self): + """ + Compute and display summary metrics for evaluation results given + a custom value for max_dets_per_image + """ + + def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): + p = self.params + iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" + titleStr = "Average Precision" if ap == 1 else "Average Recall" + typeStr = "(AP)" if ap == 1 else "(AR)" + iouStr = ( + "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) + if iouThr is None + else "{:0.2f}".format(iouThr) + ) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval["precision"] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval["recall"] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + + def _summarizeDets(): + stats = np.zeros((12,)) + # Evaluate AP using the custom limit on maximum detections per image + stats[0] = _summarize(1, maxDets=self.params.maxDets[2]) + stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) + return stats + + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=0.5) + stats[2] = _summarize(1, maxDets=20, iouThr=0.75) + stats[3] = _summarize(1, maxDets=20, areaRng="medium") + stats[4] = _summarize(1, maxDets=20, areaRng="large") + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=0.5) + stats[7] = _summarize(0, maxDets=20, iouThr=0.75) + stats[8] = _summarize(0, maxDets=20, areaRng="medium") + stats[9] = _summarize(0, maxDets=20, areaRng="large") + return stats + + if not self.eval: + raise Exception("Please run accumulate() first") + iouType = self.params.iouType + if iouType == "segm": + summarize = _summarizeDets + elif iouType == "keypoints": + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/detection_coco_evaluator.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/detection_coco_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..8ea65b7f2226bfea3a884a5f4aa37f6e658b7e83 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/detection_coco_evaluator.py @@ -0,0 +1,723 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/coco_evaluation.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from annotator.oneformer.pycocotools.coco import COCO +from annotator.oneformer.pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.coco import convert_to_coco_json +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, pairwise_iou +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import create_small_table + +from .evaluator import DatasetEvaluator + +try: + from annotator.oneformer.detectron2.evaluation.fast_eval_api import COCOeval_opt +except ImportError: + COCOeval_opt = COCOeval + + +class DetectionCOCOEvaluator(DatasetEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means + the metric cannot be computed (e.g. due to no predictions made). + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def __init__( + self, + dataset_name, + tasks=None, + distributed=True, + output_dir=None, + *, + max_dets_per_image=None, + use_fast_impl=True, + kpt_oks_sigmas=(), + allow_cached_coco=True, + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have either the following corresponding metadata: + + "json_file": the path to the COCO format annotation + + Or it must be in detectron2's standard dataset format + so it can be converted to COCO format automatically. + tasks (tuple[str]): tasks that can be evaluated under the given + configuration. A task is one of "bbox", "segm", "keypoints". + By default, will infer this automatically from predictions. + distributed (True): if True, will collect results from all ranks and run evaluation + in the main process. + Otherwise, will only evaluate the results in the current process. + output_dir (str): optional, an output directory to dump all + results predicted on the dataset. The dump contains two files: + + 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and + contains all the results in the format they are produced by the model. + 2. "coco_instances_results.json" a json file in COCO's result format. + max_dets_per_image (int): limit on the maximum number of detections per image. + By default in COCO, this limit is to 100, but this can be customized + to be greater, as is needed in evaluation metrics AP fixed and AP pool + (see https://arxiv.org/pdf/2102.01066.pdf) + This doesn't affect keypoint evaluation. + use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. + Although the results should be very close to the official implementation in COCO + API, it is still recommended to compute results with the official API for use in + papers. The faster implementation also uses more RAM. + kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. + See http://cocodataset.org/#keypoints-eval + When empty, it will use the defaults in COCO. + Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. + allow_cached_coco (bool): Whether to use cached coco json from previous validation + runs. You should set this to False if you need to use different validation data. + Defaults to True. + """ + self._logger = logging.getLogger(__name__) + self._distributed = distributed + self._output_dir = output_dir + + if use_fast_impl and (COCOeval_opt is COCOeval): + self._logger.info("Fast COCO eval is not built. Falling back to official COCO eval.") + use_fast_impl = False + self._use_fast_impl = use_fast_impl + + # COCOeval requires the limit on the number of detections per image (maxDets) to be a list + # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the + # 3rd element (100) is used as the limit on the number of detections per image when + # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval, + # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] + else: + max_dets_per_image = [1, 10, max_dets_per_image] + self._max_dets_per_image = max_dets_per_image + + if tasks is not None and isinstance(tasks, CfgNode): + kpt_oks_sigmas = ( + tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas + ) + self._logger.warn( + "COCO Evaluator instantiated using config, this is deprecated behavior." + " Please pass in explicit arguments instead." + ) + self._tasks = None # Infering it from predictions should be better + else: + self._tasks = tasks + + self._cpu_device = torch.device("cpu") + + self._metadata = MetadataCatalog.get(dataset_name) + if not hasattr(self._metadata, "json_file"): + if output_dir is None: + raise ValueError( + "output_dir must be provided to COCOEvaluator " + "for datasets not in COCO format." + ) + self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...") + + cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") + self._metadata.json_file = cache_path + convert_to_coco_json(dataset_name, cache_path, allow_cached=allow_cached_coco) + + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + # Test set json files do not contain annotations (evaluation must be + # performed using the COCO evaluation server). + self._do_evaluation = "annotations" in self._coco_api.dataset + if self._do_evaluation: + self._kpt_oks_sigmas = kpt_oks_sigmas + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "box_instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "box_instances" in output: + instances = output["box_instances"].to(self._cpu_device) + prediction["box_instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + if len(prediction) > 1: + self._predictions.append(prediction) + + def evaluate(self, img_ids=None): + """ + Args: + img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset + """ + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return {} + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "box_instances" in predictions[0]: + self._eval_predictions(predictions, img_ids=img_ids) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _tasks_from_predictions(self, predictions): + """ + Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. + """ + tasks = {"bbox"} + for pred in predictions: + if "keypoints" in pred: + tasks.add("keypoints") + return sorted(tasks) + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["box_instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + num_classes = len(all_contiguous_ids) + assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + assert category_id < num_classes, ( + f"A prediction has class={category_id}, " + f"but the dataset only has {num_classes} classes and " + f"predicted class id should be in [0, {num_classes - 1}]." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + assert task in {"bbox", "keypoints"}, f"Got unknown task: {task}!" + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + use_fast_impl=self._use_fast_impl, + img_ids=img_ids, + max_dets_per_image=self._max_dets_per_image, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + def _derive_coco_results(self, coco_eval, iou_type, class_names=None): + """ + Derive the desired score numbers from summarized COCOeval. + + Args: + coco_eval (None or COCOEval): None represents no predictions from model. + iou_type (str): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], + }[iou_type] + + if coco_eval is None: + self._logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + # the standard metrics + results = { + metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") + for idx, metric in enumerate(metrics) + } + self._logger.info( + "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) + ) + if not np.isfinite(sum(results.values())): + self._logger.info("Some metrics cannot be computed and is shown as NaN.") + + if class_names is None or len(class_names) <= 1: + return results + # Compute per-category AP + # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa + precisions = coco_eval.eval["precision"] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + results_per_category = [] + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + results_per_category.append(("{}".format(name), float(ap * 100))) + + # tabulate it + N_COLS = min(6, len(results_per_category) * 2) + results_flatten = list(itertools.chain(*results_per_category)) + results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + results_2d, + tablefmt="pipe", + floatfmt=".3f", + headers=["category", "AP"] * (N_COLS // 2), + numalign="left", + ) + self._logger.info("Per-category {} AP: \n".format(iou_type) + table) + + results.update({"AP-" + name: ap for name, ap in results_per_category}) + return results + + +def instances_to_coco_json(instances, img_id): + """ + Dump an "Instances" object to a COCO-format json that's used for evaluation. + + Args: + instances (Instances): + img_id (int): the image id + + Returns: + list[dict]: list of json annotations in COCO format. + """ + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + has_mask = instances.has("pred_masks") + if has_mask: + # use RLE to encode the masks, because they are too large and takes memory + # since this evaluator stores outputs of the entire dataset + rles = [ + mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in instances.pred_masks + ] + for rle in rles: + # "counts" is an array encoded by mask_util as a byte-stream. Python3's + # json writer which always produces strings cannot serialize a bytestream + # unless you decode it. Thankfully, utf-8 works out (which is also what + # the annotator.oneformer.pycocotools/_mask.pyx does). + rle["counts"] = rle["counts"].decode("utf-8") + + has_keypoints = instances.has("pred_keypoints") + if has_keypoints: + keypoints = instances.pred_keypoints + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + if has_mask: + result["segmentation"] = rles[k] + if has_keypoints: + # In COCO annotations, + # keypoints coordinates are pixel indices. + # However our predictions are floating point coordinates. + # Therefore we subtract 0.5 to be consistent with the annotation format. + # This is the inverse of data loading logic in `datasets/coco.py`. + keypoints[k][:, :2] -= 0.5 + result["keypoints"] = keypoints[k].flatten().tolist() + results.append(result) + return results + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official COCO API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0**2, 1e5**2], # all + [0**2, 32**2], # small + [32**2, 96**2], # medium + [96**2, 1e5**2], # large + [96**2, 128**2], # 96-128 + [128**2, 256**2], # 128-256 + [256**2, 512**2], # 256-512 + [512**2, 1e5**2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) + anno = coco_api.loadAnns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + for obj in anno + if obj["iscrowd"] == 0 + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_coco( + coco_gt, + coco_results, + iou_type, + kpt_oks_sigmas=None, + use_fast_impl=True, + img_ids=None, + max_dets_per_image=None, +): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + if iou_type == "segm": + coco_results = copy.deepcopy(coco_results) + # When evaluating mask AP, if the results contain bbox, cocoapi will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in coco_results: + c.pop("bbox", None) + + coco_dt = coco_gt.loadRes(coco_results) + coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type) + # For COCO, the default max_dets_per_image is [1, 10, 100]. + if max_dets_per_image is None: + max_dets_per_image = [1, 10, 100] # Default from COCOEval + else: + assert ( + len(max_dets_per_image) >= 3 + ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3" + # In the case that user supplies a custom input for max_dets_per_image, + # apply COCOevalMaxDets to evaluate AP with the custom input. + if max_dets_per_image[2] != 100: + coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type) + if iou_type != "keypoints": + coco_eval.params.maxDets = max_dets_per_image + + if img_ids is not None: + coco_eval.params.imgIds = img_ids + + if iou_type == "keypoints": + # Use the COCO default keypoint OKS sigmas unless overrides are specified + if kpt_oks_sigmas: + assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "annotator.oneformer.pycocotools is too old!" + coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) + # COCOAPI requires every detection and every gt to have keypoints, so + # we just take the first entry from both + num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 + num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 + num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) + assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( + f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " + f"Ground truth contains {num_keypoints_gt} keypoints. " + f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " + "They have to agree with each other. For meaning of OKS, please refer to " + "http://cocodataset.org/#keypoints-eval." + ) + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval + + +class COCOevalMaxDets(COCOeval): + """ + Modified version of COCOeval for evaluating AP with a custom + maxDets (by default for COCO, maxDets is 100) + """ + + def summarize(self): + """ + Compute and display summary metrics for evaluation results given + a custom value for max_dets_per_image + """ + + def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): + p = self.params + iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" + titleStr = "Average Precision" if ap == 1 else "Average Recall" + typeStr = "(AP)" if ap == 1 else "(AR)" + iouStr = ( + "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) + if iouThr is None + else "{:0.2f}".format(iouThr) + ) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval["precision"] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval["recall"] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + + def _summarizeDets(): + stats = np.zeros((12,)) + # Evaluate AP using the custom limit on maximum detections per image + stats[0] = _summarize(1, maxDets=self.params.maxDets[2]) + stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) + return stats + + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=0.5) + stats[2] = _summarize(1, maxDets=20, iouThr=0.75) + stats[3] = _summarize(1, maxDets=20, areaRng="medium") + stats[4] = _summarize(1, maxDets=20, areaRng="large") + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=0.5) + stats[7] = _summarize(0, maxDets=20, iouThr=0.75) + stats[8] = _summarize(0, maxDets=20, areaRng="medium") + stats[9] = _summarize(0, maxDets=20, areaRng="large") + return stats + + if not self.eval: + raise Exception("Please run accumulate() first") + iouType = self.params.iouType + if iouType == "segm" or iouType == "bbox": + summarize = _summarizeDets + elif iouType == "keypoints": + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/evaluator.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..2c85d90eaa5236773a901a68c43c28d42bfd47ec --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/evaluator.py @@ -0,0 +1,228 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/evaluator.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import datetime +import logging +import time +from collections import OrderedDict, abc +from contextlib import ExitStack, contextmanager +from typing import List, Union +import torch +from torch import nn + +from annotator.oneformer.detectron2.utils.comm import get_world_size, is_main_process +from annotator.oneformer.detectron2.utils.logger import log_every_n_seconds + + +class DatasetEvaluator: + """ + Base class for a dataset evaluator. + + The function :func:`inference_on_dataset` runs the model over + all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. + + This class will accumulate information of the inputs/outputs (by :meth:`process`), + and produce evaluation results in the end (by :meth:`evaluate`). + """ + + def reset(self): + """ + Preparation for a new round of evaluation. + Should be called before starting a round of evaluation. + """ + pass + + def process(self, inputs, outputs): + """ + Process the pair of inputs and outputs. + If they contain batches, the pairs can be consumed one-by-one using `zip`: + + .. code-block:: python + + for input_, output in zip(inputs, outputs): + # do evaluation on single input/output pair + ... + + Args: + inputs (list): the inputs that's used to call the model. + outputs (list): the return value of `model(inputs)` + """ + pass + + def evaluate(self): + """ + Evaluate/summarize the performance, after processing all input/output pairs. + + Returns: + dict: + A new evaluator class can return a dict of arbitrary format + as long as the user can process the results. + In our train_net.py, we expect the following format: + + * key: the name of the task (e.g., bbox) + * value: a dict of {metric name: score}, e.g.: {"AP50": 80} + """ + pass + + +class DatasetEvaluators(DatasetEvaluator): + """ + Wrapper class to combine multiple :class:`DatasetEvaluator` instances. + + This class dispatches every evaluation call to + all of its :class:`DatasetEvaluator`. + """ + + def __init__(self, evaluators): + """ + Args: + evaluators (list): the evaluators to combine. + """ + super().__init__() + self._evaluators = evaluators + + def reset(self): + for evaluator in self._evaluators: + evaluator.reset() + + def process(self, inputs, outputs): + for evaluator in self._evaluators: + evaluator.process(inputs, outputs) + + def evaluate(self): + results = OrderedDict() + for evaluator in self._evaluators: + result = evaluator.evaluate() + if is_main_process() and result is not None: + for k, v in result.items(): + assert ( + k not in results + ), "Different evaluators produce results with the same key {}".format(k) + results[k] = v + return results + + +def inference_on_dataset( + model, data_loader, evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None] +): + """ + Run model on the data_loader and evaluate the metrics with evaluator. + Also benchmark the inference speed of `model.__call__` accurately. + The model will be used in eval mode. + + Args: + model (callable): a callable which takes an object from + `data_loader` and returns some outputs. + + If it's an nn.Module, it will be temporarily set to `eval` mode. + If you wish to evaluate a model in `training` mode instead, you can + wrap the given model and override its behavior of `.eval()` and `.train()`. + data_loader: an iterable object with a length. + The elements it generates will be the inputs to the model. + evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark, + but don't want to do any evaluation. + + Returns: + The return value of `evaluator.evaluate()` + """ + num_devices = get_world_size() + logger = logging.getLogger(__name__) + logger.info("Start inference on {} batches".format(len(data_loader))) + + total = len(data_loader) # inference data loader must have a fixed length + if evaluator is None: + # create a no-op evaluator + evaluator = DatasetEvaluators([]) + if isinstance(evaluator, abc.MutableSequence): + evaluator = DatasetEvaluators(evaluator) + evaluator.reset() + + num_warmup = min(5, total - 1) + start_time = time.perf_counter() + total_data_time = 0 + total_compute_time = 0 + total_eval_time = 0 + with ExitStack() as stack: + if isinstance(model, nn.Module): + stack.enter_context(inference_context(model)) + stack.enter_context(torch.no_grad()) + + start_data_time = time.perf_counter() + for idx, inputs in enumerate(data_loader): + total_data_time += time.perf_counter() - start_data_time + if idx == num_warmup: + start_time = time.perf_counter() + total_data_time = 0 + total_compute_time = 0 + total_eval_time = 0 + + start_compute_time = time.perf_counter() + outputs = model(inputs) + if torch.cuda.is_available(): + torch.cuda.synchronize() + total_compute_time += time.perf_counter() - start_compute_time + + start_eval_time = time.perf_counter() + evaluator.process(inputs, outputs) + total_eval_time += time.perf_counter() - start_eval_time + + iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) + data_seconds_per_iter = total_data_time / iters_after_start + compute_seconds_per_iter = total_compute_time / iters_after_start + eval_seconds_per_iter = total_eval_time / iters_after_start + total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start + if idx >= num_warmup * 2 or compute_seconds_per_iter > 5: + eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) + log_every_n_seconds( + logging.INFO, + ( + f"Inference done {idx + 1}/{total}. " + f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " + f"Inference: {compute_seconds_per_iter:.4f} s/iter. " + f"Eval: {eval_seconds_per_iter:.4f} s/iter. " + f"Total: {total_seconds_per_iter:.4f} s/iter. " + f"ETA={eta}" + ), + n=5, + ) + start_data_time = time.perf_counter() + + # Measure the time only for this worker (before the synchronization barrier) + total_time = time.perf_counter() - start_time + total_time_str = str(datetime.timedelta(seconds=total_time)) + # NOTE this format is parsed by grep + logger.info( + "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format( + total_time_str, total_time / (total - num_warmup), num_devices + ) + ) + total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) + logger.info( + "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format( + total_compute_time_str, total_compute_time / (total - num_warmup), num_devices + ) + ) + + results = evaluator.evaluate() + # An evaluator may return None when not in main process. + # Replace it by an empty dict instead to make it easier for downstream code to handle + if results is None: + results = {} + return results + + +@contextmanager +def inference_context(model): + """ + A context where the model is temporarily changed to eval mode, + and restored to previous mode afterwards. + + Args: + model: a torch Module + """ + training_mode = model.training + model.eval() + yield + model.train(training_mode) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/instance_evaluation.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/instance_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..12ce1d722987d1b6daa030423bb6aed4624e8310 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/evaluation/instance_evaluation.py @@ -0,0 +1,110 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/evaluation/instance_evaluation.py +# ------------------------------------------------------------------------------ + +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import annotator.oneformer.pycocotools.mask as mask_util +import torch +from annotator.oneformer.pycocotools.coco import COCO +from annotator.oneformer.pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import annotator.oneformer.detectron2.utils.comm as comm +from annotator.oneformer.detectron2.config import CfgNode +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.data.datasets.coco import convert_to_coco_json +from annotator.oneformer.detectron2.evaluation.coco_evaluation import COCOEvaluator, _evaluate_predictions_on_coco +from annotator.oneformer.detectron2.evaluation.fast_eval_api import COCOeval_opt +from annotator.oneformer.detectron2.structures import Boxes, BoxMode, pairwise_iou +from annotator.oneformer.detectron2.utils.file_io import PathManager +from annotator.oneformer.detectron2.utils.logger import create_small_table + + +# modified from COCOEvaluator for instance segmetnat +class InstanceSegEvaluator(COCOEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means + the metric cannot be computed (e.g. due to no predictions made). + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + # all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + # num_classes = len(all_contiguous_ids) + # assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + # assert category_id < num_classes, ( + # f"A prediction has class={category_id}, " + # f"but the dataset only has {num_classes} classes and " + # f"predicted class id should be in [0, {num_classes - 1}]." + # ) + assert category_id in reverse_id_mapping, ( + f"A prediction has class={category_id}, " + f"but the dataset only has class ids in {dataset_id_to_contiguous_id}." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + use_fast_impl=self._use_fast_impl, + img_ids=img_ids, + max_dets_per_image=self._max_dets_per_image, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1338369a958062d6ca4a122435b2be6ad27315 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/__init__.py @@ -0,0 +1,5 @@ +from .backbone.swin import D2SwinTransformer +from .backbone.dinat import D2DiNAT +from .pixel_decoder.fpn import BasePixelDecoder +from .pixel_decoder.msdeformattn import MSDeformAttnPixelDecoder +from .meta_arch.oneformer_head import OneFormerHead diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/dinat.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/dinat.py new file mode 100644 index 0000000000000000000000000000000000000000..17027574c7acc49aa6c452cb546736737ff1c752 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/dinat.py @@ -0,0 +1,324 @@ +# -------------------------------------------------------- +# Neighborhood Attention Transformer +# Licensed under The MIT License +# Written by Ali Hassani +# -------------------------------------------------------- + +# Modified by Jitesh Jain + +import torch +import torch.nn as nn +from timm.models.layers import DropPath +from annotator.oneformer.detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec + +class NeighborhoodAttention(nn.Module): + """ + Neighborhood Attention 2D Module + """ + + def __init__( + self, + dim, + num_heads, + kernel_size, + dilation=1, + bias=True, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + + + def forward(self, x): + + return x + + def extra_repr(self) -> str: + return ( + f"head_dim={self.head_dim}, num_heads={self.num_heads}, " + + f"kernel_size={self.kernel_size}, dilation={self.dilation}, " + + f"rel_pos_bias={self.rpb is not None}" + ) + +class ConvTokenizer(nn.Module): + def __init__(self, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + self.proj = nn.Sequential( + nn.Conv2d(in_chans, embed_dim // 2, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), + nn.Conv2d(embed_dim // 2, embed_dim, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)), + ) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = self.proj(x).permute(0, 2, 3, 1) + if self.norm is not None: + x = self.norm(x) + return x + + +class ConvDownsampler(nn.Module): + def __init__(self, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.reduction = nn.Conv2d(dim, 2 * dim, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) + self.norm = norm_layer(2 * dim) + + def forward(self, x): + x = self.reduction(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) + x = self.norm(x) + return x + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class NATLayer(nn.Module): + def __init__(self, dim, num_heads, kernel_size=7, dilation=None, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm, layer_scale=None): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.mlp_ratio = mlp_ratio + + self.norm1 = norm_layer(dim) + self.attn = NeighborhoodAttention( + dim, kernel_size=kernel_size, dilation=dilation, num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop) + self.layer_scale = False + if layer_scale is not None and type(layer_scale) in [int, float]: + self.layer_scale = True + self.gamma1 = nn.Parameter(layer_scale * torch.ones(dim), requires_grad=True) + self.gamma2 = nn.Parameter(layer_scale * torch.ones(dim), requires_grad=True) + + def forward(self, x): + if not self.layer_scale: + shortcut = x + x = self.norm1(x) + x = self.attn(x) + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + shortcut = x + x = self.norm1(x) + x = self.attn(x) + x = shortcut + self.drop_path(self.gamma1 * x) + x = x + self.drop_path(self.gamma2 * self.mlp(self.norm2(x))) + return x + + + +class NATBlock(nn.Module): + def __init__(self, dim, depth, num_heads, kernel_size, dilations=None, + downsample=True, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., norm_layer=nn.LayerNorm, layer_scale=None): + super().__init__() + self.dim = dim + self.depth = depth + + self.blocks = nn.ModuleList([ + NATLayer(dim=dim, + num_heads=num_heads, + kernel_size=kernel_size, + dilation=None if dilations is None else dilations[i], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop, attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer, + layer_scale=layer_scale) + for i in range(depth)]) + + self.downsample = None if not downsample else ConvDownsampler(dim=dim, norm_layer=norm_layer) + + def forward(self, x): + for blk in self.blocks: + x = blk(x) + if self.downsample is None: + return x, x + return self.downsample(x), x + + +class DiNAT(nn.Module): + def __init__(self, + embed_dim, + mlp_ratio, + depths, + num_heads, + drop_path_rate=0.2, + in_chans=3, + kernel_size=7, + dilations=None, + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + norm_layer=nn.LayerNorm, + frozen_stages=-1, + layer_scale=None, + **kwargs): + super().__init__() + self.num_levels = len(depths) + self.embed_dim = embed_dim + self.num_features = [int(embed_dim * 2 ** i) for i in range(self.num_levels)] + self.mlp_ratio = mlp_ratio + + self.patch_embed = ConvTokenizer(in_chans=in_chans, embed_dim=embed_dim, norm_layer=norm_layer) + + self.pos_drop = nn.Dropout(p=drop_rate) + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] + self.levels = nn.ModuleList() + for i in range(self.num_levels): + level = NATBlock(dim=int(embed_dim * 2 ** i), + depth=depths[i], + num_heads=num_heads[i], + kernel_size=kernel_size, + dilations=None if dilations is None else dilations[i], + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i]):sum(depths[:i + 1])], + norm_layer=norm_layer, + downsample=(i < self.num_levels - 1), + layer_scale=layer_scale) + self.levels.append(level) + + # add a norm layer for each output + self.out_indices = out_indices + for i_layer in self.out_indices: + layer = norm_layer(self.num_features[i_layer]) + layer_name = f'norm{i_layer}' + self.add_module(layer_name, layer) + + self.frozen_stages = frozen_stages + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + + if self.frozen_stages >= 2: + for i in range(0, self.frozen_stages - 1): + m = self.network[i] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(DiNAT, self).train(mode) + self._freeze_stages() + + def forward_embeddings(self, x): + x = self.patch_embed(x) + return x + + def forward_tokens(self, x): + outs = {} + for idx, level in enumerate(self.levels): + x, xo = level(x) + if idx in self.out_indices: + norm_layer = getattr(self, f'norm{idx}') + x_out = norm_layer(xo) + outs["res{}".format(idx + 2)] = x_out.permute(0, 3, 1, 2).contiguous() + return outs + + def forward(self, x): + x = self.forward_embeddings(x) + return self.forward_tokens(x) + + +@BACKBONE_REGISTRY.register() +class D2DiNAT(DiNAT, Backbone): + def __init__(self, cfg, input_shape): + + embed_dim = cfg.MODEL.DiNAT.EMBED_DIM + mlp_ratio = cfg.MODEL.DiNAT.MLP_RATIO + depths = cfg.MODEL.DiNAT.DEPTHS + num_heads = cfg.MODEL.DiNAT.NUM_HEADS + drop_path_rate = cfg.MODEL.DiNAT.DROP_PATH_RATE + kernel_size = cfg.MODEL.DiNAT.KERNEL_SIZE + out_indices = cfg.MODEL.DiNAT.OUT_INDICES + dilations = cfg.MODEL.DiNAT.DILATIONS + + super().__init__( + embed_dim=embed_dim, + mlp_ratio=mlp_ratio, + depths=depths, + num_heads=num_heads, + drop_path_rate=drop_path_rate, + kernel_size=kernel_size, + out_indices=out_indices, + dilations=dilations, + ) + + self._out_features = cfg.MODEL.DiNAT.OUT_FEATURES + + self._out_feature_strides = { + "res2": 4, + "res3": 8, + "res4": 16, + "res5": 32, + } + self._out_feature_channels = { + "res2": self.num_features[0], + "res3": self.num_features[1], + "res4": self.num_features[2], + "res5": self.num_features[3], + } + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + Returns: + dict[str->Tensor]: names and the corresponding features + """ + assert ( + x.dim() == 4 + ), f"DiNAT takes an input of shape (N, C, H, W). Got {x.shape} instead!" + outputs = {} + y = super().forward(x) + for k in y.keys(): + if k in self._out_features: + outputs[k] = y[k] + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + @property + def size_divisibility(self): + return 32 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/swin.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/swin.py new file mode 100644 index 0000000000000000000000000000000000000000..2380cde59570e5d5b8fb2536d0961f8e27a07fd4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/backbone/swin.py @@ -0,0 +1,771 @@ +# -------------------------------------------------------- +# Swin Transformer +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ze Liu, Yutong Lin, Yixuan Wei +# -------------------------------------------------------- + +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former +# ------------------------------------------------------------------------------ + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +from annotator.oneformer.detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec + + +class Mlp(nn.Module): + """Multilayer perceptron.""" + + def __init__( + self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0 + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + """Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """Forward function. + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = ( + self.qkv(x) + .reshape(B_, N, 3, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1 + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SwinTransformerBlock(nn.Module): + """Swin Transformer Block. + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__( + self, + dim, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop + ) + + self.H = None + self.W = None + + def forward(self, x, mask_matrix): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + mask_matrix: Attention mask for cyclic shift. + """ + B, L, C = x.shape + H, W = self.H, self.W + assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + attn_mask = mask_matrix + else: + shifted_x = x + attn_mask = None + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nW*B, window_size, window_size, C + x_windows = x_windows.view( + -1, self.window_size * self.window_size, C + ) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class PatchMerging(nn.Module): + """Patch Merging Layer + Args: + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x, H, W): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + + x = x.view(B, H, W, C) + + # padding + pad_input = (H % 2 == 1) or (W % 2 == 1) + if pad_input: + x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of feature channels + depth (int): Depths of this stage. + num_heads (int): Number of attention head. + window_size (int): Local window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + dim, + depth, + num_heads, + window_size=7, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + ): + super().__init__() + self.window_size = window_size + self.shift_size = window_size // 2 + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim=dim, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, H, W): + """Forward function. + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + + # calculate attention mask for SW-MSA + Hp = int(np.ceil(H / self.window_size)) * self.window_size + Wp = int(np.ceil(W / self.window_size)) * self.window_size + img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( + attn_mask == 0, float(0.0) + ) + + for blk in self.blocks: + blk.H, blk.W = H, W + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, attn_mask) + else: + x = blk(x, attn_mask) + if self.downsample is not None: + x_down = self.downsample(x, H, W) + Wh, Ww = (H + 1) // 2, (W + 1) // 2 + return x, H, W, x_down, Wh, Ww + else: + return x, H, W, x, H, W + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding + Args: + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + patch_size = to_2tuple(patch_size) + self.patch_size = patch_size + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + """Forward function.""" + # padding + _, _, H, W = x.size() + if W % self.patch_size[1] != 0: + x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) + if H % self.patch_size[0] != 0: + x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) + + x = self.proj(x) # B C Wh Ww + if self.norm is not None: + Wh, Ww = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) + + return x + + +class SwinTransformer(nn.Module): + """Swin Transformer backbone. + A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - + https://arxiv.org/pdf/2103.14030 + Args: + pretrain_img_size (int): Input image size for training the pretrained model, + used in absolute postion embedding. Default 224. + patch_size (int | tuple(int)): Patch size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + depths (tuple[int]): Depths of each Swin Transformer stage. + num_heads (tuple[int]): Number of attention head of each stage. + window_size (int): Window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): Dropout rate. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Default: 0.2. + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False. + patch_norm (bool): If True, add normalization after patch embedding. Default: True. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + pretrain_img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + use_checkpoint=False, + ): + super().__init__() + + self.pretrain_img_size = pretrain_img_size + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.out_indices = out_indices + self.frozen_stages = frozen_stages + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + patch_size=patch_size, + in_chans=in_chans, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + + # absolute position embedding + if self.ape: + pretrain_img_size = to_2tuple(pretrain_img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + pretrain_img_size[0] // patch_size[0], + pretrain_img_size[1] // patch_size[1], + ] + + self.absolute_pos_embed = nn.Parameter( + torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]) + ) + trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2 ** i_layer), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint, + ) + self.layers.append(layer) + + num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)] + self.num_features = num_features + + # add a norm layer for each output + for i_layer in out_indices: + layer = norm_layer(num_features[i_layer]) + layer_name = f"norm{i_layer}" + self.add_module(layer_name, layer) + + self._freeze_stages() + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + + if self.frozen_stages >= 1 and self.ape: + self.absolute_pos_embed.requires_grad = False + + if self.frozen_stages >= 2: + self.pos_drop.eval() + for i in range(0, self.frozen_stages - 1): + m = self.layers[i] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initialize the weights in backbone. + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, x): + """Forward function.""" + x = self.patch_embed(x) + + Wh, Ww = x.size(2), x.size(3) + if self.ape: + # interpolate the position embedding to the corresponding size + absolute_pos_embed = F.interpolate( + self.absolute_pos_embed, size=(Wh, Ww), mode="bicubic" + ) + x = (x + absolute_pos_embed).flatten(2).transpose(1, 2) # B Wh*Ww C + else: + x = x.flatten(2).transpose(1, 2) + x = self.pos_drop(x) + + outs = {} + for i in range(self.num_layers): + layer = self.layers[i] + x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) + + if i in self.out_indices: + norm_layer = getattr(self, f"norm{i}") + x_out = norm_layer(x_out) + + out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() + outs["res{}".format(i + 2)] = out + + return outs + + def train(self, mode=True): + """Convert the model into training mode while keep layers freezed.""" + super(SwinTransformer, self).train(mode) + self._freeze_stages() + + +@BACKBONE_REGISTRY.register() +class D2SwinTransformer(SwinTransformer, Backbone): + def __init__(self, cfg, input_shape): + + pretrain_img_size = cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE + patch_size = cfg.MODEL.SWIN.PATCH_SIZE + in_chans = 3 + embed_dim = cfg.MODEL.SWIN.EMBED_DIM + depths = cfg.MODEL.SWIN.DEPTHS + num_heads = cfg.MODEL.SWIN.NUM_HEADS + window_size = cfg.MODEL.SWIN.WINDOW_SIZE + mlp_ratio = cfg.MODEL.SWIN.MLP_RATIO + qkv_bias = cfg.MODEL.SWIN.QKV_BIAS + qk_scale = cfg.MODEL.SWIN.QK_SCALE + drop_rate = cfg.MODEL.SWIN.DROP_RATE + attn_drop_rate = cfg.MODEL.SWIN.ATTN_DROP_RATE + drop_path_rate = cfg.MODEL.SWIN.DROP_PATH_RATE + norm_layer = nn.LayerNorm + ape = cfg.MODEL.SWIN.APE + patch_norm = cfg.MODEL.SWIN.PATCH_NORM + use_checkpoint = cfg.MODEL.SWIN.USE_CHECKPOINT + + super().__init__( + pretrain_img_size, + patch_size, + in_chans, + embed_dim, + depths, + num_heads, + window_size, + mlp_ratio, + qkv_bias, + qk_scale, + drop_rate, + attn_drop_rate, + drop_path_rate, + norm_layer, + ape, + patch_norm, + use_checkpoint=use_checkpoint, + ) + + self._out_features = cfg.MODEL.SWIN.OUT_FEATURES + + self._out_feature_strides = { + "res2": 4, + "res3": 8, + "res4": 16, + "res5": 32, + } + self._out_feature_channels = { + "res2": self.num_features[0], + "res3": self.num_features[1], + "res4": self.num_features[2], + "res5": self.num_features[3], + } + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + Returns: + dict[str->Tensor]: names and the corresponding features + """ + assert ( + x.dim() == 4 + ), f"SwinTransformer takes an input of shape (N, C, H, W). Got {x.shape} instead!" + outputs = {} + y = super().forward(x) + for k in y.keys(): + if k in self._out_features: + outputs[k] = y[k] + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + @property + def size_divisibility(self): + return 32 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/matcher.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..4dba337a0f99ccd394931f52b063c8fb575bafbd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/matcher.py @@ -0,0 +1,212 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/matcher.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +""" +Modules to compute the matching cost and solve the corresponding LSAP. +""" +import torch +import torch.nn.functional as F +from scipy.optimize import linear_sum_assignment +from torch import nn +from torch.cuda.amp import autocast +import numpy as np + +# from annotator.oneformer.detectron2.projects.point_rend.point_features import point_sample + + +def linear_sum_assignment_with_nan(cost_matrix): + cost_matrix = np.asarray(cost_matrix) + nan = np.isnan(cost_matrix).any() + nan_all = np.isnan(cost_matrix).all() + empty = cost_matrix.size == 0 + + if not empty: + if nan_all: + print('Matrix contains all NaN values!') + elif nan: + print('Matrix contains NaN values!') + + if nan_all: + cost_matrix = np.empty(shape=(0, 0)) + elif nan: + cost_matrix[np.isnan(cost_matrix)] = 100 + + return linear_sum_assignment(cost_matrix) + +def batch_dice_loss(inputs: torch.Tensor, targets: torch.Tensor): + """ + Compute the DICE loss, similar to generalized IOU for masks + Args: + inputs: A float tensor of arbitrary shape. + The predictions for each example. + targets: A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + """ + inputs = inputs.sigmoid() + inputs = inputs.flatten(1) + numerator = 2 * torch.einsum("nc,mc->nm", inputs, targets) + denominator = inputs.sum(-1)[:, None] + targets.sum(-1)[None, :] + loss = 1 - (numerator + 1) / (denominator + 1) + return loss + + +batch_dice_loss_jit = torch.jit.script( + batch_dice_loss +) # type: torch.jit.ScriptModule + + +def batch_sigmoid_ce_loss(inputs: torch.Tensor, targets: torch.Tensor): + """ + Args: + inputs: A float tensor of arbitrary shape. + The predictions for each example. + targets: A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + Returns: + Loss tensor + """ + hw = inputs.shape[1] + + pos = F.binary_cross_entropy_with_logits( + inputs, torch.ones_like(inputs), reduction="none" + ) + neg = F.binary_cross_entropy_with_logits( + inputs, torch.zeros_like(inputs), reduction="none" + ) + + loss = torch.einsum("nc,mc->nm", pos, targets) + torch.einsum( + "nc,mc->nm", neg, (1 - targets) + ) + + return loss / hw + + +batch_sigmoid_ce_loss_jit = torch.jit.script( + batch_sigmoid_ce_loss +) # type: torch.jit.ScriptModule + + +class HungarianMatcher(nn.Module): + """This class computes an assignment between the targets and the predictions of the network + + For efficiency reasons, the targets don't include the no_object. Because of this, in general, + there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, + while the others are un-matched (and thus treated as non-objects). + """ + + def __init__(self, cost_class: float = 1, cost_mask: float = 1, + cost_dice: float = 1, num_points: int = 0): + """Creates the matcher + + Params: + cost_class: This is the relative weight of the classification error in the matching cost + cost_mask: This is the relative weight of the focal loss of the binary mask in the matching cost + cost_dice: This is the relative weight of the dice loss of the binary mask in the matching cost + """ + super().__init__() + self.cost_class = cost_class + self.cost_mask = cost_mask + self.cost_dice = cost_dice + + assert cost_class != 0 or cost_mask != 0 or cost_dice != 0, "all costs cant be 0" + + self.num_points = num_points + + @torch.no_grad() + def memory_efficient_forward(self, outputs, targets): + """More memory-friendly matching""" + bs, num_queries = outputs["pred_logits"].shape[:2] + + indices = [] + + # Iterate through batch size + for b in range(bs): + out_prob = outputs["pred_logits"][b].softmax(-1) # [num_queries, num_classes] + tgt_ids = targets[b]["labels"] + + # Compute the classification cost. Contrary to the loss, we don't use the NLL, + # but approximate it in 1 - proba[target class]. + # The 1 is a constant that doesn't change the matching, it can be ommitted. + cost_class = -out_prob[:, tgt_ids] + + out_mask = outputs["pred_masks"][b] # [num_queries, H_pred, W_pred] + # gt masks are already padded when preparing target + tgt_mask = targets[b]["masks"].to(out_mask) + + out_mask = out_mask[:, None] + tgt_mask = tgt_mask[:, None] + # all masks share the same set of points for efficient matching! + point_coords = torch.rand(1, self.num_points, 2, device=out_mask.device) + # get gt labels + tgt_mask = point_sample( + tgt_mask, + point_coords.repeat(tgt_mask.shape[0], 1, 1), + align_corners=False, + ).squeeze(1) + + out_mask = point_sample( + out_mask, + point_coords.repeat(out_mask.shape[0], 1, 1), + align_corners=False, + ).squeeze(1) + + with autocast(enabled=False): + out_mask = out_mask.float() + tgt_mask = tgt_mask.float() + # Compute the focal loss between masks + cost_mask = batch_sigmoid_ce_loss_jit(out_mask, tgt_mask) + # Compute the dice loss betwen masks + cost_dice = batch_dice_loss(out_mask, tgt_mask) + + # Final cost matrix + C = ( + self.cost_mask * cost_mask + + self.cost_class * cost_class + + self.cost_dice * cost_dice + ) + C = C.reshape(num_queries, -1).cpu() + + indices.append(linear_sum_assignment_with_nan(C)) + + return [ + (torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) + for i, j in indices + ] + + @torch.no_grad() + def forward(self, outputs, targets): + """Performs the matching + + Params: + outputs: This is a dict that contains at least these entries: + "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits + "pred_masks": Tensor of dim [batch_size, num_queries, H_pred, W_pred] with the predicted masks + + targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: + "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth + objects in the target) containing the class labels + "masks": Tensor of dim [num_target_boxes, H_gt, W_gt] containing the target masks + + Returns: + A list of size batch_size, containing tuples of (index_i, index_j) where: + - index_i is the indices of the selected predictions (in order) + - index_j is the indices of the corresponding selected targets (in order) + For each batch element, it holds: + len(index_i) = len(index_j) = min(num_queries, num_target_boxes) + """ + + return self.memory_efficient_forward(outputs, targets) + + def __repr__(self, _repr_indent=4): + head = "Matcher " + self.__class__.__name__ + body = [ + "cost_class: {}".format(self.cost_class), + "cost_mask: {}".format(self.cost_mask), + "cost_dice: {}".format(self.cost_dice), + ] + lines = [head] + [" " * _repr_indent + line for line in body] + return "\n".join(lines) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/__init__.py @@ -0,0 +1 @@ + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/oneformer_head.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/oneformer_head.py new file mode 100644 index 0000000000000000000000000000000000000000..f8f8eb11b95838d2b61de5fa249a318877182c01 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/meta_arch/oneformer_head.py @@ -0,0 +1,135 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/meta_arch/mask_former_head.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import logging +from copy import deepcopy +from typing import Callable, Dict, List, Optional, Tuple, Union + +import fvcore.nn.weight_init as weight_init +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, get_norm +from annotator.oneformer.detectron2.modeling import SEM_SEG_HEADS_REGISTRY +from ..pixel_decoder.fpn import build_pixel_decoder +from ..transformer_decoder.oneformer_transformer_decoder import build_transformer_decoder + +@SEM_SEG_HEADS_REGISTRY.register() +class OneFormerHead(nn.Module): + + _version = 2 + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get("version", None) + if version is None or version < 2: + # Do not warn if train from scratch + scratch = True + logger = logging.getLogger(__name__) + for k in list(state_dict.keys()): + newk = k + if "sem_seg_head" in k and not k.startswith(prefix + "predictor"): + newk = k.replace(prefix, prefix + "pixel_decoder.") + # logger.debug(f"{k} ==> {newk}") + if newk != k: + state_dict[newk] = state_dict[k] + del state_dict[k] + scratch = False + + if not scratch: + logger.warning( + f"Weight format of {self.__class__.__name__} have changed! " + "Please upgrade your models. Applying automatic conversion now ..." + ) + + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + num_classes: int, + pixel_decoder: nn.Module, + loss_weight: float = 1.0, + ignore_value: int = -1, + # extra parameters + transformer_predictor: nn.Module, + transformer_in_feature: str, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + num_classes: number of classes to predict + pixel_decoder: the pixel decoder module + loss_weight: loss weight + ignore_value: category id to be ignored during training. + transformer_predictor: the transformer decoder that makes prediction + transformer_in_feature: input feature name to the transformer_predictor + """ + super().__init__() + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + self.in_features = [k for k, v in input_shape] + feature_strides = [v.stride for k, v in input_shape] + feature_channels = [v.channels for k, v in input_shape] + + self.ignore_value = ignore_value + self.common_stride = 4 + self.loss_weight = loss_weight + + self.pixel_decoder = pixel_decoder + self.predictor = transformer_predictor + self.transformer_in_feature = transformer_in_feature + + self.num_classes = num_classes + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + # figure out in_channels to transformer predictor + if cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE == "transformer_encoder": + transformer_predictor_in_channels = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + elif cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE == "pixel_embedding": + transformer_predictor_in_channels = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM + elif cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE == "multi_scale_pixel_decoder": + transformer_predictor_in_channels = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + else: + transformer_predictor_in_channels = input_shape[cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE].channels + + return { + "input_shape": { + k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + }, + "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + "pixel_decoder": build_pixel_decoder(cfg, input_shape), + "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT, + "transformer_in_feature": cfg.MODEL.ONE_FORMER.TRANSFORMER_IN_FEATURE, + "transformer_predictor": build_transformer_decoder( + cfg, + transformer_predictor_in_channels, + mask_classification=True, + ), + } + + def forward(self, features, tasks, mask=None): + return self.layers(features, tasks, mask) + + def layers(self, features, tasks, mask=None): + mask_features, transformer_encoder_features, multi_scale_features, _, _ = self.pixel_decoder.forward_features(features) + + if self.transformer_in_feature == "multi_scale_pixel_decoder": + predictions = self.predictor(multi_scale_features, mask_features, tasks, mask) + else: + if self.transformer_in_feature == "transformer_encoder": + assert ( + transformer_encoder_features is not None + ), "Please use the TransformerEncoderPixelDecoder." + predictions = self.predictor(transformer_encoder_features, mask_features, mask) + elif self.transformer_in_feature == "pixel_embedding": + predictions = self.predictor(mask_features, mask_features, mask) + else: + predictions = self.predictor(features[self.transformer_in_feature], mask_features, mask) + return predictions diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/fpn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..0f724775b5e237cd01f2b369950a04b38059f9bf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/fpn.py @@ -0,0 +1,312 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +from typing import Callable, Dict, List, Optional, Tuple, Union + +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F +from torch.nn.init import xavier_uniform_, constant_, uniform_, normal_ +from torch.cuda.amp import autocast + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, DeformConv, ShapeSpec, get_norm +from annotator.oneformer.detectron2.modeling import SEM_SEG_HEADS_REGISTRY + +from ..transformer_decoder.position_encoding import PositionEmbeddingSine +from ..transformer_decoder.transformer import TransformerEncoder, TransformerEncoderLayer, _get_clones, _get_activation_fn + + +def build_pixel_decoder(cfg, input_shape): + """ + Build a pixel decoder from `cfg.MODEL.MASK_FORMER.PIXEL_DECODER_NAME`. + """ + name = cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME + model = SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape) + forward_features = getattr(model, "forward_features", None) + if not callable(forward_features): + raise ValueError( + "Only SEM_SEG_HEADS with forward_features method can be used as pixel decoder. " + f"Please implement forward_features for {name} to only return mask features." + ) + return model + + +# This is a modified FPN decoder. +@SEM_SEG_HEADS_REGISTRY.register() +class BasePixelDecoder(nn.Module): + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + conv_dim: int, + mask_dim: int, + norm: Optional[Union[str, Callable]] = None, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + conv_dims: number of output channels for the intermediate conv layers. + mask_dim: number of output channels for the final conv layer. + norm (str or callable): normalization for all conv layers + """ + super().__init__() + + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + self.in_features = [k for k, v in input_shape] # starting from "res2" to "res5" + feature_channels = [v.channels for k, v in input_shape] + + lateral_convs = [] + output_convs = [] + + use_bias = norm == "" + for idx, in_channels in enumerate(feature_channels): + if idx == len(self.in_features) - 1: + output_norm = get_norm(norm, conv_dim) + output_conv = Conv2d( + in_channels, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + activation=F.relu, + ) + weight_init.c2_xavier_fill(output_conv) + self.add_module("layer_{}".format(idx + 1), output_conv) + + lateral_convs.append(None) + output_convs.append(output_conv) + else: + lateral_norm = get_norm(norm, conv_dim) + output_norm = get_norm(norm, conv_dim) + + lateral_conv = Conv2d( + in_channels, conv_dim, kernel_size=1, bias=use_bias, norm=lateral_norm + ) + output_conv = Conv2d( + conv_dim, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + activation=F.relu, + ) + weight_init.c2_xavier_fill(lateral_conv) + weight_init.c2_xavier_fill(output_conv) + self.add_module("adapter_{}".format(idx + 1), lateral_conv) + self.add_module("layer_{}".format(idx + 1), output_conv) + + lateral_convs.append(lateral_conv) + output_convs.append(output_conv) + # Place convs into top-down order (from low to high resolution) + # to make the top-down computation in forward clearer. + self.lateral_convs = lateral_convs[::-1] + self.output_convs = output_convs[::-1] + + self.mask_dim = mask_dim + self.mask_features = Conv2d( + conv_dim, + mask_dim, + kernel_size=3, + stride=1, + padding=1, + ) + weight_init.c2_xavier_fill(self.mask_features) + + self.oneformer_num_feature_levels = 3 # always use 3 scales + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + ret = {} + ret["input_shape"] = { + k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + } + ret["conv_dim"] = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + ret["mask_dim"] = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM + ret["norm"] = cfg.MODEL.SEM_SEG_HEAD.NORM + return ret + + def forward_features(self, features): + multi_scale_features = [] + num_cur_levels = 0 + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.in_features[::-1]): + x = features[f] + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + if lateral_conv is None: + y = output_conv(x) + else: + cur_fpn = lateral_conv(x) + # Following FPN implementation, we use nearest upsampling here + y = cur_fpn + F.interpolate(y, size=cur_fpn.shape[-2:], mode="nearest") + y = output_conv(y) + if num_cur_levels < self.oneformer_num_feature_levels: + multi_scale_features.append(y) + num_cur_levels += 1 + return self.mask_features(y), None, multi_scale_features + + def forward(self, features, targets=None): + logger = logging.getLogger(__name__) + logger.warning("Calling forward() may cause unpredicted behavior of PixelDecoder module.") + return self.forward_features(features) + + +class TransformerEncoderOnly(nn.Module): + def __init__( + self, + d_model=512, + nhead=8, + num_encoder_layers=6, + dim_feedforward=2048, + dropout=0.1, + activation="relu", + normalize_before=False, + ): + super().__init__() + + encoder_layer = TransformerEncoderLayer( + d_model, nhead, dim_feedforward, dropout, activation, normalize_before + ) + encoder_norm = nn.LayerNorm(d_model) if normalize_before else None + self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) + + self._reset_parameters() + + self.d_model = d_model + self.nhead = nhead + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, src, mask, pos_embed): + # flatten NxCxHxW to HWxNxC + bs, c, h, w = src.shape + src = src.flatten(2).permute(2, 0, 1) + pos_embed = pos_embed.flatten(2).permute(2, 0, 1) + if mask is not None: + mask = mask.flatten(1) + + memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed) + return memory.permute(1, 2, 0).view(bs, c, h, w) + + +# This is a modified FPN decoder with extra Transformer encoder that processes the lowest-resolution feature map. +@SEM_SEG_HEADS_REGISTRY.register() +class TransformerEncoderPixelDecoder(BasePixelDecoder): + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + transformer_dropout: float, + transformer_nheads: int, + transformer_dim_feedforward: int, + transformer_enc_layers: int, + transformer_pre_norm: bool, + conv_dim: int, + mask_dim: int, + norm: Optional[Union[str, Callable]] = None, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + transformer_dropout: dropout probability in transformer + transformer_nheads: number of heads in transformer + transformer_dim_feedforward: dimension of feedforward network + transformer_enc_layers: number of transformer encoder layers + transformer_pre_norm: whether to use pre-layernorm or not + conv_dims: number of output channels for the intermediate conv layers. + mask_dim: number of output channels for the final conv layer. + norm (str or callable): normalization for all conv layers + """ + super().__init__(input_shape, conv_dim=conv_dim, mask_dim=mask_dim, norm=norm) + + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + self.in_features = [k for k, v in input_shape] # starting from "res2" to "res5" + feature_strides = [v.stride for k, v in input_shape] + feature_channels = [v.channels for k, v in input_shape] + + in_channels = feature_channels[len(self.in_features) - 1] + self.input_proj = Conv2d(in_channels, conv_dim, kernel_size=1) + weight_init.c2_xavier_fill(self.input_proj) + self.transformer = TransformerEncoderOnly( + d_model=conv_dim, + dropout=transformer_dropout, + nhead=transformer_nheads, + dim_feedforward=transformer_dim_feedforward, + num_encoder_layers=transformer_enc_layers, + normalize_before=transformer_pre_norm, + ) + N_steps = conv_dim // 2 + self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True) + + # update layer + use_bias = norm == "" + output_norm = get_norm(norm, conv_dim) + output_conv = Conv2d( + conv_dim, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + activation=F.relu, + ) + weight_init.c2_xavier_fill(output_conv) + delattr(self, "layer_{}".format(len(self.in_features))) + self.add_module("layer_{}".format(len(self.in_features)), output_conv) + self.output_convs[0] = output_conv + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + ret = super().from_config(cfg, input_shape) + ret["transformer_dropout"] = cfg.MODEL.MASK_FORMER.DROPOUT + ret["transformer_nheads"] = cfg.MODEL.MASK_FORMER.NHEADS + ret["transformer_dim_feedforward"] = cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD + ret[ + "transformer_enc_layers" + ] = cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS # a separate config + ret["transformer_pre_norm"] = cfg.MODEL.MASK_FORMER.PRE_NORM + return ret + + def forward_features(self, features): + multi_scale_features = [] + num_cur_levels = 0 + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.in_features[::-1]): + x = features[f] + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + if lateral_conv is None: + transformer = self.input_proj(x) + pos = self.pe_layer(x) + transformer = self.transformer(transformer, None, pos) + y = output_conv(transformer) + # save intermediate feature as input to Transformer decoder + transformer_encoder_features = transformer + else: + cur_fpn = lateral_conv(x) + # Following FPN implementation, we use nearest upsampling here + y = cur_fpn + F.interpolate(y, size=cur_fpn.shape[-2:], mode="nearest") + y = output_conv(y) + if num_cur_levels < self.oneformer_num_feature_levels: + multi_scale_features.append(y) + num_cur_levels += 1 + return self.mask_features(y), transformer_encoder_features, multi_scale_features + + def forward(self, features, targets=None): + logger = logging.getLogger(__name__) + logger.warning("Calling forward() may cause unpredicted behavior of PixelDecoder module.") + return self.forward_features(features) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/msdeformattn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/msdeformattn.py new file mode 100644 index 0000000000000000000000000000000000000000..007051d713fd89a622154f5e0edc9902627cca14 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/msdeformattn.py @@ -0,0 +1,358 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import logging +import numpy as np +from typing import Callable, Dict, List, Optional, Tuple, Union + +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F +from torch.nn.init import xavier_uniform_, constant_, uniform_, normal_ +from torch.cuda.amp import autocast + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d, ShapeSpec, get_norm +from annotator.oneformer.detectron2.modeling import SEM_SEG_HEADS_REGISTRY + +from ..transformer_decoder.position_encoding import PositionEmbeddingSine +from ..transformer_decoder.transformer import _get_clones, _get_activation_fn +from .ops.modules import MSDeformAttn + + +# MSDeformAttn Transformer encoder in deformable detr +class MSDeformAttnTransformerEncoderOnly(nn.Module): + def __init__(self, d_model=256, nhead=8, + num_encoder_layers=6, dim_feedforward=1024, dropout=0.1, + activation="relu", + num_feature_levels=4, enc_n_points=4, + ): + super().__init__() + + self.d_model = d_model + self.nhead = nhead + + encoder_layer = MSDeformAttnTransformerEncoderLayer(d_model, dim_feedforward, + dropout, activation, + num_feature_levels, nhead, enc_n_points) + self.encoder = MSDeformAttnTransformerEncoder(encoder_layer, num_encoder_layers) + + self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model)) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + for m in self.modules(): + if isinstance(m, MSDeformAttn): + m._reset_parameters() + normal_(self.level_embed) + + def get_valid_ratio(self, mask): + _, H, W = mask.shape + valid_H = torch.sum(~mask[:, :, 0], 1) + valid_W = torch.sum(~mask[:, 0, :], 1) + valid_ratio_h = valid_H.float() / H + valid_ratio_w = valid_W.float() / W + valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1) + return valid_ratio + + def forward(self, srcs, pos_embeds): + masks = [torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) for x in srcs] + # prepare input for encoder + src_flatten = [] + mask_flatten = [] + lvl_pos_embed_flatten = [] + spatial_shapes = [] + for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)): + bs, c, h, w = src.shape + spatial_shape = (h, w) + spatial_shapes.append(spatial_shape) + src = src.flatten(2).transpose(1, 2) + mask = mask.flatten(1) + pos_embed = pos_embed.flatten(2).transpose(1, 2) + lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1) + lvl_pos_embed_flatten.append(lvl_pos_embed) + src_flatten.append(src) + mask_flatten.append(mask) + src_flatten = torch.cat(src_flatten, 1) + mask_flatten = torch.cat(mask_flatten, 1) + lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1) + spatial_shapes = torch.as_tensor(spatial_shapes, dtype=torch.long, device=src_flatten.device) + level_start_index = torch.cat((spatial_shapes.new_zeros((1, )), spatial_shapes.prod(1).cumsum(0)[:-1])) + valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks], 1) + + # encoder + memory = self.encoder(src_flatten, spatial_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten) + + return memory, spatial_shapes, level_start_index, valid_ratios + + +class MSDeformAttnTransformerEncoderLayer(nn.Module): + def __init__(self, + d_model=256, d_ffn=1024, + dropout=0.1, activation="relu", + n_levels=4, n_heads=8, n_points=4): + super().__init__() + + # self attention + self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) + self.dropout1 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(d_model) + + # ffn + self.linear1 = nn.Linear(d_model, d_ffn) + self.activation = _get_activation_fn(activation) + self.dropout2 = nn.Dropout(dropout) + self.linear2 = nn.Linear(d_ffn, d_model) + self.dropout3 = nn.Dropout(dropout) + self.norm2 = nn.LayerNorm(d_model) + + @staticmethod + def with_pos_embed(tensor, pos): + return tensor if pos is None else tensor + pos + + def forward_ffn(self, src): + src2 = self.linear2(self.dropout2(self.activation(self.linear1(src)))) + src = src + self.dropout3(src2) + src = self.norm2(src) + return src + + def forward(self, src, pos, reference_points, spatial_shapes, level_start_index, padding_mask=None): + # self attention + src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, spatial_shapes, level_start_index, padding_mask) + src = src + self.dropout1(src2) + src = self.norm1(src) + + # ffn + src = self.forward_ffn(src) + + return src + + +class MSDeformAttnTransformerEncoder(nn.Module): + def __init__(self, encoder_layer, num_layers): + super().__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + + @staticmethod + def get_reference_points(spatial_shapes, valid_ratios, device): + reference_points_list = [] + for lvl, (H_, W_) in enumerate(spatial_shapes): + + ref_y, ref_x = torch.meshgrid(torch.linspace(0.5, H_ - 0.5, H_, dtype=torch.float32, device=device), + torch.linspace(0.5, W_ - 0.5, W_, dtype=torch.float32, device=device)) + ref_y = ref_y.reshape(-1)[None] / (valid_ratios[:, None, lvl, 1] * H_) + ref_x = ref_x.reshape(-1)[None] / (valid_ratios[:, None, lvl, 0] * W_) + ref = torch.stack((ref_x, ref_y), -1) + reference_points_list.append(ref) + reference_points = torch.cat(reference_points_list, 1) + reference_points = reference_points[:, :, None] * valid_ratios[:, None] + return reference_points + + def forward(self, src, spatial_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None): + output = src + reference_points = self.get_reference_points(spatial_shapes, valid_ratios, device=src.device) + for _, layer in enumerate(self.layers): + output = layer(output, pos, reference_points, spatial_shapes, level_start_index, padding_mask) + + return output + + +@SEM_SEG_HEADS_REGISTRY.register() +class MSDeformAttnPixelDecoder(nn.Module): + @configurable + def __init__( + self, + input_shape: Dict[str, ShapeSpec], + *, + transformer_dropout: float, + transformer_nheads: int, + transformer_dim_feedforward: int, + transformer_enc_layers: int, + conv_dim: int, + mask_dim: int, + norm: Optional[Union[str, Callable]] = None, + # deformable transformer encoder args + transformer_in_features: List[str], + common_stride: int, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + transformer_dropout: dropout probability in transformer + transformer_nheads: number of heads in transformer + transformer_dim_feedforward: dimension of feedforward network + transformer_enc_layers: number of transformer encoder layers + conv_dims: number of output channels for the intermediate conv layers. + mask_dim: number of output channels for the final conv layer. + norm (str or callable): normalization for all conv layers + """ + super().__init__() + transformer_input_shape = { + k: v for k, v in input_shape.items() if k in transformer_in_features + } + + # this is the input shape of pixel decoder + input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) + self.in_features = [k for k, v in input_shape] # starting from "res2" to "res5" + self.feature_strides = [v.stride for k, v in input_shape] + self.feature_channels = [v.channels for k, v in input_shape] + + # this is the input shape of transformer encoder (could use less features than pixel decoder + transformer_input_shape = sorted(transformer_input_shape.items(), key=lambda x: x[1].stride) + self.transformer_in_features = [k for k, v in transformer_input_shape] # starting from "res2" to "res5" + transformer_in_channels = [v.channels for k, v in transformer_input_shape] + self.transformer_feature_strides = [v.stride for k, v in transformer_input_shape] # to decide extra FPN layers + + self.transformer_num_feature_levels = len(self.transformer_in_features) + if self.transformer_num_feature_levels > 1: + input_proj_list = [] + # from low resolution to high resolution (res5 -> res2) + for in_channels in transformer_in_channels[::-1]: + input_proj_list.append(nn.Sequential( + nn.Conv2d(in_channels, conv_dim, kernel_size=1), + nn.GroupNorm(32, conv_dim), + )) + self.input_proj = nn.ModuleList(input_proj_list) + else: + self.input_proj = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(transformer_in_channels[-1], conv_dim, kernel_size=1), + nn.GroupNorm(32, conv_dim), + )]) + + for proj in self.input_proj: + nn.init.xavier_uniform_(proj[0].weight, gain=1) + nn.init.constant_(proj[0].bias, 0) + + self.transformer = MSDeformAttnTransformerEncoderOnly( + d_model=conv_dim, + dropout=transformer_dropout, + nhead=transformer_nheads, + dim_feedforward=transformer_dim_feedforward, + num_encoder_layers=transformer_enc_layers, + num_feature_levels=self.transformer_num_feature_levels, + ) + N_steps = conv_dim // 2 + self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True) + + self.mask_dim = mask_dim + # use 1x1 conv instead + self.mask_features = Conv2d( + conv_dim, + mask_dim, + kernel_size=1, + stride=1, + padding=0, + ) + weight_init.c2_xavier_fill(self.mask_features) + + self.oneformer_num_feature_levels = 3 # always use 3 scales + self.common_stride = common_stride + + # extra fpn levels + stride = min(self.transformer_feature_strides) + self.num_fpn_levels = int(np.log2(stride) - np.log2(self.common_stride)) + + lateral_convs = [] + output_convs = [] + + use_bias = norm == "" + for idx, in_channels in enumerate(self.feature_channels[:self.num_fpn_levels]): + lateral_norm = get_norm(norm, conv_dim) + output_norm = get_norm(norm, conv_dim) + + lateral_conv = Conv2d( + in_channels, conv_dim, kernel_size=1, bias=use_bias, norm=lateral_norm + ) + output_conv = Conv2d( + conv_dim, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + activation=F.relu, + ) + weight_init.c2_xavier_fill(lateral_conv) + weight_init.c2_xavier_fill(output_conv) + self.add_module("adapter_{}".format(idx + 1), lateral_conv) + self.add_module("layer_{}".format(idx + 1), output_conv) + + lateral_convs.append(lateral_conv) + output_convs.append(output_conv) + # Place convs into top-down order (from low to high resolution) + # to make the top-down computation in forward clearer. + self.lateral_convs = lateral_convs[::-1] + self.output_convs = output_convs[::-1] + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + ret = {} + ret["input_shape"] = { + k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + } + ret["conv_dim"] = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + ret["mask_dim"] = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM + ret["norm"] = cfg.MODEL.SEM_SEG_HEAD.NORM + ret["transformer_dropout"] = cfg.MODEL.ONE_FORMER.DROPOUT + ret["transformer_nheads"] = cfg.MODEL.ONE_FORMER.NHEADS + # ret["transformer_dim_feedforward"] = cfg.MODEL.ONE_FORMER.DIM_FEEDFORWARD + ret["transformer_dim_feedforward"] = 1024 # use 1024 for deformable transformer encoder + ret[ + "transformer_enc_layers" + ] = cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS # a separate config + ret["transformer_in_features"] = cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES + ret["common_stride"] = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE + return ret + + @autocast(enabled=False) + def forward_features(self, features): + srcs = [] + pos = [] + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.transformer_in_features[::-1]): + x = features[f].float() # deformable detr does not support half precision + srcs.append(self.input_proj[idx](x)) + pos.append(self.pe_layer(x)) + + y, spatial_shapes, level_start_index, valid_ratios = self.transformer(srcs, pos) + bs = y.shape[0] + + split_size_or_sections = [None] * self.transformer_num_feature_levels + for i in range(self.transformer_num_feature_levels): + if i < self.transformer_num_feature_levels - 1: + split_size_or_sections[i] = level_start_index[i + 1] - level_start_index[i] + else: + split_size_or_sections[i] = y.shape[1] - level_start_index[i] + y = torch.split(y, split_size_or_sections, dim=1) + + out = [] + multi_scale_features = [] + num_cur_levels = 0 + for i, z in enumerate(y): + out.append(z.transpose(1, 2).view(bs, -1, spatial_shapes[i][0], spatial_shapes[i][1])) + + # append `out` with extra FPN levels + # Reverse feature maps into top-down order (from low to high resolution) + for idx, f in enumerate(self.in_features[:self.num_fpn_levels][::-1]): + x = features[f].float() + lateral_conv = self.lateral_convs[idx] + output_conv = self.output_convs[idx] + cur_fpn = lateral_conv(x) + # Following FPN implementation, we use nearest upsampling here + y = cur_fpn + F.interpolate(out[-1], size=cur_fpn.shape[-2:], mode="bilinear", align_corners=False) + y = output_conv(y) + out.append(y) + + for o in out: + if num_cur_levels < self.oneformer_num_feature_levels: + multi_scale_features.append(o) + num_cur_levels += 1 + + return self.mask_features(out[-1]), out[0], multi_scale_features, spatial_shapes, level_start_index diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2b06b5ac538b63bdb9a6c82e4635b95bb5491d5b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/__init__.py @@ -0,0 +1,13 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +from .ms_deform_attn_func import MSDeformAttnFunction + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/ms_deform_attn_func.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/ms_deform_attn_func.py new file mode 100644 index 0000000000000000000000000000000000000000..e074eb69819151add821a8ff9ed215ed9b874070 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/functions/ms_deform_attn_func.py @@ -0,0 +1,77 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import torch +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +# if torch.cuda.is_available(): +# try: +# import MultiScaleDeformableAttention as MSDA +# except ModuleNotFoundError as e: +# info_string = ( +# "\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n" +# "\t`cd oneformer/modeling/pixel_decoder/ops`\n" +# "\t`sh make.sh`\n" +# ) +# raise ModuleNotFoundError(info_string) +# else: +# MultiScaleDeformableAttention = None + + + +class MSDeformAttnFunction(Function): + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): + # ctx.im2col_step = im2col_step + output = ms_deform_attn_core_pytorch( + value, value_spatial_shapes, sampling_locations, attention_weights) + # ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) + return output + + # @staticmethod + # @once_differentiable + # def backward(ctx, grad_output): + # value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors + # grad_value, grad_sampling_loc, grad_attn_weight = \ + # MSDA.ms_deform_attn_backward( + # value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) + # + # return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None + + +def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): + # for debug and test only, + # need to use cuda version instead + N_, S_, M_, D_ = value.shape + _, Lq_, M_, L_, P_, _ = sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for lid_, (H_, W_) in enumerate(value_spatial_shapes): + # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ + value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) + # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 + sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) + # N_*M_, D_, Lq_, P_ + sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, + mode='bilinear', padding_mode='zeros', align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) + attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) + return output.transpose(1, 2).contiguous() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/make.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/make.sh new file mode 100644 index 0000000000000000000000000000000000000000..ca5c0b469da786c847ba04d437bb31ee0fc938da --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/make.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +FORCE_CUDA=1 python setup.py build install diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6fdbf03359958f3d67ab00f879bf6b61a6c8f06a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/__init__.py @@ -0,0 +1,12 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +from .ms_deform_attn import MSDeformAttn diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/ms_deform_attn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/ms_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..5bc471d2da550c839a3446a6041e40d338425129 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/modules/ms_deform_attn.py @@ -0,0 +1,120 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import warnings +import math + +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.init import xavier_uniform_, constant_ + +MSDeformAttnFunction = None +from ..functions.ms_deform_attn_func import ms_deform_attn_core_pytorch + + +def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) + return (n & (n-1) == 0) and n != 0 + + +class MSDeformAttn(nn.Module): + def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """ + Multi-Scale Deformable Attention Module + :param d_model hidden dimension + :param n_levels number of feature levels + :param n_heads number of attention heads + :param n_points number of sampling points per attention head per feature level + """ + super().__init__() + if d_model % n_heads != 0: + raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) + _d_per_head = d_model // n_heads + # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + if not _is_power_of_2(_d_per_head): + warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " + "which is more efficient in our CUDA implementation.") + + self.im2col_step = 128 + + self.d_model = d_model + self.n_levels = n_levels + self.n_heads = n_heads + self.n_points = n_points + + self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2) + self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) + self.value_proj = nn.Linear(d_model, d_model) + self.output_proj = nn.Linear(d_model, d_model) + + self._reset_parameters() + + def _reset_parameters(self): + constant_(self.sampling_offsets.weight.data, 0.) + thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1) + for i in range(self.n_points): + grid_init[:, :, i, :] *= i + 1 + with torch.no_grad(): + self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) + constant_(self.attention_weights.weight.data, 0.) + constant_(self.attention_weights.bias.data, 0.) + xavier_uniform_(self.value_proj.weight.data) + constant_(self.value_proj.bias.data, 0.) + xavier_uniform_(self.output_proj.weight.data) + constant_(self.output_proj.bias.data, 0.) + + def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): + """ + :param query (N, Length_{query}, C) + :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area + or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes + :param input_flatten (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C) + :param input_spatial_shapes (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] + :param input_level_start_index (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}] + :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements + :return output (N, Length_{query}, C) + """ + N, Len_q, _ = query.shape + N, Len_in, _ = input_flatten.shape + assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in + + value = self.value_proj(input_flatten) + if input_padding_mask is not None: + value = value.masked_fill(input_padding_mask[..., None], float(0)) + value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) + sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2) + attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) + attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) + # N, Len_q, n_heads, n_levels, n_points, 2 + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) + sampling_locations = reference_points[:, :, None, :, None, :] \ + + sampling_offsets / offset_normalizer[None, None, None, :, None, :] + elif reference_points.shape[-1] == 4: + sampling_locations = reference_points[:, :, None, :, None, :2] \ + + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 + else: + raise ValueError( + 'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1])) + # try: + output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + # # For FLOPs calculation only + # output = ms_deform_attn_core_pytorch(value, input_spatial_shapes, sampling_locations, attention_weights) + output = self.output_proj(output) + return output \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/setup.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..3b57ad313ac8f9b6586892142da8ba943e516cec --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/setup.py @@ -0,0 +1,78 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +import os +import glob + +import torch + +from torch.utils.cpp_extension import CUDA_HOME +from torch.utils.cpp_extension import CppExtension +from torch.utils.cpp_extension import CUDAExtension + +from setuptools import find_packages +from setuptools import setup + +requirements = ["torch", "torchvision"] + +def get_extensions(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + extensions_dir = os.path.join(this_dir, "src") + + main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) + source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) + + sources = main_file + source_cpu + extension = CppExtension + extra_compile_args = {"cxx": []} + define_macros = [] + + # Force cuda since torch ask for a device, not if cuda is in fact available. + if (os.environ.get('FORCE_CUDA') or torch.cuda.is_available()) and CUDA_HOME is not None: + extension = CUDAExtension + sources += source_cuda + define_macros += [("WITH_CUDA", None)] + extra_compile_args["nvcc"] = [ + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", + ] + else: + if CUDA_HOME is None: + raise NotImplementedError('CUDA_HOME is None. Please set environment variable CUDA_HOME.') + else: + raise NotImplementedError('No CUDA runtime is found. Please set FORCE_CUDA=1 or test it by running torch.cuda.is_available().') + + sources = [os.path.join(extensions_dir, s) for s in sources] + include_dirs = [extensions_dir] + ext_modules = [ + extension( + "MultiScaleDeformableAttention", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + return ext_modules + +setup( + name="MultiScaleDeformableAttention", + version="1.0", + author="Weijie Su", + url="https://github.com/fundamentalvision/Deformable-DETR", + description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", + packages=find_packages(exclude=("configs", "tests",)), + ext_modules=get_extensions(), + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, +) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48757e2b0156b2c1513b615d2a17e5aee5172ae7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp @@ -0,0 +1,46 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#include + +#include +#include + + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + AT_ERROR("Not implement on cpu"); +} + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..51bb27e9ee828f967e8aa854c2d55574040c6d7e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h @@ -0,0 +1,38 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#pragma once +#include + +at::Tensor +ms_deform_attn_cpu_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector +ms_deform_attn_cpu_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..0c465dab3d636dfd6a44523c63f148b6e15084d9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.cu @@ -0,0 +1,158 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#include +#include "cuda/ms_deform_im2col_cuda.cuh" + +#include +#include +#include +#include + + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto output = at::zeros({batch, num_query, num_heads, channels}, value.options()); + + const int batch_n = im2col_step_; + auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto columns = output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] { + ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + columns.data()); + + })); + } + + output = output.view({batch, num_query, num_heads*channels}); + + return output; +} + + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + + AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous"); + AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous"); + AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous"); + AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous"); + AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous"); + AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous"); + + AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor"); + AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor"); + AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor"); + AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor"); + AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor"); + AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor"); + + const int batch = value.size(0); + const int spatial_size = value.size(1); + const int num_heads = value.size(2); + const int channels = value.size(3); + + const int num_levels = spatial_shapes.size(0); + + const int num_query = sampling_loc.size(1); + const int num_point = sampling_loc.size(4); + + const int im2col_step_ = std::min(batch, im2col_step); + + AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_); + + auto grad_value = at::zeros_like(value); + auto grad_sampling_loc = at::zeros_like(sampling_loc); + auto grad_attn_weight = at::zeros_like(attn_weight); + + const int batch_n = im2col_step_; + auto per_value_size = spatial_size * num_heads * channels; + auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2; + auto per_attn_weight_size = num_query * num_heads * num_levels * num_point; + auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels}); + + for (int n = 0; n < batch/im2col_step_; ++n) + { + auto grad_output_g = grad_output_n.select(0, n); + AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] { + ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(), + grad_output_g.data(), + value.data() + n * im2col_step_ * per_value_size, + spatial_shapes.data(), + level_start_index.data(), + sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + attn_weight.data() + n * im2col_step_ * per_attn_weight_size, + batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point, + grad_value.data() + n * im2col_step_ * per_value_size, + grad_sampling_loc.data() + n * im2col_step_ * per_sample_loc_size, + grad_attn_weight.data() + n * im2col_step_ * per_attn_weight_size); + + })); + } + + return { + grad_value, grad_sampling_loc, grad_attn_weight + }; +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..4f0658e8668a11f0e7d71deff9adac71884f2e87 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h @@ -0,0 +1,35 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#pragma once +#include + +at::Tensor ms_deform_attn_cuda_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step); + +std::vector ms_deform_attn_cuda_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step); + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_im2col_cuda.cuh b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_im2col_cuda.cuh new file mode 100644 index 0000000000000000000000000000000000000000..c04e0d4ab97d25c1756fcd8d08dd1e5a6d280b7c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/cuda/ms_deform_im2col_cuda.cuh @@ -0,0 +1,1332 @@ +/*! +************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************** +* Modified from DCN (https://github.com/msracver/Deformable-ConvNets) +* Copyright (c) 2018 Microsoft +************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#include +#include +#include + +#include +#include + +#include + +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ + i < (n); \ + i += blockDim.x * gridDim.x) + +const int CUDA_NUM_THREADS = 1024; +inline int GET_BLOCKS(const int N, const int num_threads) +{ + return (N + num_threads - 1) / num_threads; +} + + +template +__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + } + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + *grad_attn_weight = top_grad * val; + *grad_sampling_loc = width * grad_w_weight * top_grad_value; + *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value; +} + + +template +__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, + const int &height, const int &width, const int &nheads, const int &channels, + const scalar_t &h, const scalar_t &w, const int &m, const int &c, + const scalar_t &top_grad, + const scalar_t &attn_weight, + scalar_t* &grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int h_low = floor(h); + const int w_low = floor(w); + const int h_high = h_low + 1; + const int w_high = w_low + 1; + + const scalar_t lh = h - h_low; + const scalar_t lw = w - w_low; + const scalar_t hh = 1 - lh, hw = 1 - lw; + + const int w_stride = nheads * channels; + const int h_stride = width * w_stride; + const int h_low_ptr_offset = h_low * h_stride; + const int h_high_ptr_offset = h_low_ptr_offset + h_stride; + const int w_low_ptr_offset = w_low * w_stride; + const int w_high_ptr_offset = w_low_ptr_offset + w_stride; + const int base_ptr = m * channels + c; + + const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + const scalar_t top_grad_value = top_grad * attn_weight; + scalar_t grad_h_weight = 0, grad_w_weight = 0; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + { + const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; + v1 = bottom_data[ptr1]; + grad_h_weight -= hw * v1; + grad_w_weight -= hh * v1; + atomicAdd(grad_value+ptr1, w1*top_grad_value); + } + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + { + const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; + v2 = bottom_data[ptr2]; + grad_h_weight -= lw * v2; + grad_w_weight += hh * v2; + atomicAdd(grad_value+ptr2, w2*top_grad_value); + } + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + { + const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; + v3 = bottom_data[ptr3]; + grad_h_weight += hw * v3; + grad_w_weight -= lh * v3; + atomicAdd(grad_value+ptr3, w3*top_grad_value); + } + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + { + const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; + v4 = bottom_data[ptr4]; + grad_h_weight += lw * v4; + grad_w_weight += lh * v4; + atomicAdd(grad_value+ptr4, w4*top_grad_value); + } + + const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + atomicAdd(grad_attn_weight, top_grad * val); + atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value); + atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value); +} + + +template +__global__ void ms_deformable_im2col_gpu_kernel(const int n, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *data_col) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + scalar_t *data_col_ptr = data_col + index; + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + scalar_t col = 0; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride); + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight; + } + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + } + } + *data_col_ptr = col; + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockSize; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockSize/2; s>0; s>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + if (tid == 0) + { + scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0]; + int sid=2; + for (unsigned int tid = 1; tid < blockDim.x; ++tid) + { + _grad_w += cache_grad_sampling_loc[sid]; + _grad_h += cache_grad_sampling_loc[sid + 1]; + _grad_a += cache_grad_attn_weight[tid]; + sid += 2; + } + + + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + +template +__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + extern __shared__ int _s[]; + scalar_t* cache_grad_sampling_loc = (scalar_t*)_s; + scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0; + *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0; + *(cache_grad_attn_weight+threadIdx.x)=0; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x); + } + + __syncthreads(); + + for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1) + { + if (tid < s) { + const unsigned int xid1 = tid << 1; + const unsigned int xid2 = (tid + s) << 1; + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1]; + if (tid + (s << 1) < spre) + { + cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)]; + cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)]; + cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)]; + } + } + __syncthreads(); + } + + if (tid == 0) + { + atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]); + atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]); + atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]); + } + __syncthreads(); + + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n, + const scalar_t *grad_col, + const scalar_t *data_value, + const int64_t *data_spatial_shapes, + const int64_t *data_level_start_index, + const scalar_t *data_sampling_loc, + const scalar_t *data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t *grad_value, + scalar_t *grad_sampling_loc, + scalar_t *grad_attn_weight) +{ + CUDA_KERNEL_LOOP(index, n) + { + int _temp = index; + const int c_col = _temp % channels; + _temp /= channels; + const int sampling_index = _temp; + const int m_col = _temp % num_heads; + _temp /= num_heads; + const int q_col = _temp % num_query; + _temp /= num_query; + const int b_col = _temp; + + const scalar_t top_grad = grad_col[index]; + + int data_weight_ptr = sampling_index * num_levels * num_point; + int data_loc_w_ptr = data_weight_ptr << 1; + const int grad_sampling_ptr = data_weight_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; + const int grad_weight_stride = 1; + const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; + const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; + + for (int l_col=0; l_col < num_levels; ++l_col) + { + const int level_start_id = data_level_start_index[l_col]; + const int spatial_h_ptr = l_col << 1; + const int spatial_h = data_spatial_shapes[spatial_h_ptr]; + const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1]; + const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride; + const scalar_t *data_value_ptr = data_value + value_ptr_offset; + scalar_t *grad_value_ptr = grad_value + value_ptr_offset; + + for (int p_col=0; p_col < num_point; ++p_col) + { + const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr]; + const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1]; + const scalar_t weight = data_attn_weight[data_weight_ptr]; + + const scalar_t h_im = loc_h * spatial_h - 0.5; + const scalar_t w_im = loc_w * spatial_w - 0.5; + if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) + { + ms_deform_attn_col2im_bilinear_gm( + data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, + top_grad, weight, grad_value_ptr, + grad_sampling_loc, grad_attn_weight); + } + data_weight_ptr += 1; + data_loc_w_ptr += 2; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; + } + } + } +} + + +template +void ms_deformable_im2col_cuda(cudaStream_t stream, + const scalar_t* data_value, + const int64_t* data_spatial_shapes, + const int64_t* data_level_start_index, + const scalar_t* data_sampling_loc, + const scalar_t* data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* data_col) +{ + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + const int num_threads = CUDA_NUM_THREADS; + ms_deformable_im2col_gpu_kernel + <<>>( + num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, + batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); + } + +} + +template +void ms_deformable_col2im_cuda(cudaStream_t stream, + const scalar_t* grad_col, + const scalar_t* data_value, + const int64_t * data_spatial_shapes, + const int64_t * data_level_start_index, + const scalar_t * data_sampling_loc, + const scalar_t * data_attn_weight, + const int batch_size, + const int spatial_size, + const int num_heads, + const int channels, + const int num_levels, + const int num_query, + const int num_point, + scalar_t* grad_value, + scalar_t* grad_sampling_loc, + scalar_t* grad_attn_weight) +{ + const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels; + const int num_kernels = batch_size * num_query * num_heads * channels; + const int num_actual_kernels = batch_size * num_query * num_heads * channels; + if (channels > 1024) + { + if ((channels & 1023) == 0) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_gm + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + else{ + switch(channels) + { + case 1: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 2: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 4: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 8: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 16: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 32: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 64: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 128: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 256: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 512: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + case 1024: + ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + break; + default: + if (channels < 64) + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v1 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + else + { + ms_deformable_col2im_gpu_kernel_shm_reduce_v2 + <<>>( + num_kernels, + grad_col, + data_value, + data_spatial_shapes, + data_level_start_index, + data_sampling_loc, + data_attn_weight, + batch_size, + spatial_size, + num_heads, + channels, + num_levels, + num_query, + num_point, + grad_value, + grad_sampling_loc, + grad_attn_weight); + } + } + } + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); + } + +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/ms_deform_attn.h b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/ms_deform_attn.h new file mode 100644 index 0000000000000000000000000000000000000000..2f80a1b294c55b37d13bb3558ff7aeadba3b37de --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/ms_deform_attn.h @@ -0,0 +1,67 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#pragma once + +#include "cpu/ms_deform_attn_cpu.h" + +#ifdef WITH_CUDA +#include "cuda/ms_deform_attn_cuda.h" +#endif + + +at::Tensor +ms_deform_attn_forward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_forward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +std::vector +ms_deform_attn_backward( + const at::Tensor &value, + const at::Tensor &spatial_shapes, + const at::Tensor &level_start_index, + const at::Tensor &sampling_loc, + const at::Tensor &attn_weight, + const at::Tensor &grad_output, + const int im2col_step) +{ + if (value.type().is_cuda()) + { +#ifdef WITH_CUDA + return ms_deform_attn_cuda_backward( + value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/vision.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a08821e0121a77556aa7a263ec8ebfa928b13b6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/src/vision.cpp @@ -0,0 +1,21 @@ +/*! +************************************************************************************************** +* Deformable DETR +* Copyright (c) 2020 SenseTime. All Rights Reserved. +* Licensed under the Apache License, Version 2.0 [see LICENSE for details] +************************************************************************************************** +* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +************************************************************************************************** +*/ + +/*! +* Copyright (c) Facebook, Inc. and its affiliates. +* Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR +*/ + +#include "ms_deform_attn.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); + m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); +} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/test.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/test.py new file mode 100644 index 0000000000000000000000000000000000000000..6e1b545459f6fd3235767e721eb5a1090ae14bef --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/pixel_decoder/ops/test.py @@ -0,0 +1,92 @@ +# ------------------------------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------------------------------ +# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 +# ------------------------------------------------------------------------------------------------ + +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import time +import torch +import torch.nn as nn +from torch.autograd import gradcheck + +from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch + + +N, M, D = 1, 2, 2 +Lq, L, P = 2, 2, 2 +shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() +level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1])) +S = sum([(H*W).item() for H, W in shapes]) + + +torch.manual_seed(3) + + +@torch.no_grad() +def check_forward_equal_with_pytorch_double(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +@torch.no_grad() +def check_forward_equal_with_pytorch_float(): + value = torch.rand(N, S, M, D).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() + output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() + fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) + max_abs_err = (output_cuda - output_pytorch).abs().max() + max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() + + print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') + + +def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): + + value = torch.rand(N, S, M, channels).cuda() * 0.01 + sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() + attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 + attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) + im2col_step = 2 + func = MSDeformAttnFunction.apply + + value.requires_grad = grad_value + sampling_locations.requires_grad = grad_sampling_loc + attention_weights.requires_grad = grad_attn_weight + + gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) + + print(f'* {gradok} check_gradient_numerical(D={channels})') + + +if __name__ == '__main__': + check_forward_equal_with_pytorch_double() + check_forward_equal_with_pytorch_float() + + for channels in [30, 32, 64, 71, 1025, 2048, 3096]: + check_gradient_numerical(channels, True, True, True) + + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b84bd4ecb48f134ccc218c4d5f02c50f7033bcd9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .oneformer_transformer_decoder import ContrastiveMultiScaleMaskedTransformerDecoder \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/oneformer_transformer_decoder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/oneformer_transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..2887c7718f864f5c64f245c7eee307c04835c41f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/oneformer_transformer_decoder.py @@ -0,0 +1,528 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/transformer_decoder/mask2former_transformer_decoder.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +import logging +import fvcore.nn.weight_init as weight_init +from typing import Optional +import torch +from torch import nn, Tensor +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.layers import Conv2d + +from .position_encoding import PositionEmbeddingSine +from .transformer import Transformer + +from annotator.oneformer.detectron2.utils.registry import Registry + + +TRANSFORMER_DECODER_REGISTRY = Registry("TRANSFORMER_MODULE") +TRANSFORMER_DECODER_REGISTRY.__doc__ = """ +Registry for transformer module in OneFormer. +""" + + +def build_transformer_decoder(cfg, in_channels, mask_classification=True): + """ + Build a instance embedding branch from `cfg.MODEL.INS_EMBED_HEAD.NAME`. + """ + name = cfg.MODEL.ONE_FORMER.TRANSFORMER_DECODER_NAME + return TRANSFORMER_DECODER_REGISTRY.get(name)(cfg, in_channels, mask_classification) + + +class SelfAttentionLayer(nn.Module): + + def __init__(self, d_model, nhead, dropout=0.0, + activation="relu", normalize_before=False): + super().__init__() + self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + + self.norm = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + self.normalize_before = normalize_before + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward_post(self, tgt, + tgt_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + q = k = self.with_pos_embed(tgt, query_pos) + tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask, + key_padding_mask=tgt_key_padding_mask)[0] + tgt = tgt + self.dropout(tgt2) + tgt = self.norm(tgt) + + return tgt + + def forward_pre(self, tgt, + tgt_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + tgt2 = self.norm(tgt) + q = k = self.with_pos_embed(tgt2, query_pos) + tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask, + key_padding_mask=tgt_key_padding_mask)[0] + tgt = tgt + self.dropout(tgt2) + + return tgt + + def forward(self, tgt, + tgt_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + if self.normalize_before: + return self.forward_pre(tgt, tgt_mask, + tgt_key_padding_mask, query_pos) + return self.forward_post(tgt, tgt_mask, + tgt_key_padding_mask, query_pos) + + +class CrossAttentionLayer(nn.Module): + + def __init__(self, d_model, nhead, dropout=0.0, + activation="relu", normalize_before=False): + super().__init__() + self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + + self.norm = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + self.normalize_before = normalize_before + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward_post(self, tgt, memory, + memory_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos), + key=self.with_pos_embed(memory, pos), + value=memory, attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask)[0] + tgt = tgt + self.dropout(tgt2) + tgt = self.norm(tgt) + + return tgt + + def forward_pre(self, tgt, memory, + memory_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + tgt2 = self.norm(tgt) + tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos), + key=self.with_pos_embed(memory, pos), + value=memory, attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask)[0] + tgt = tgt + self.dropout(tgt2) + + return tgt + + def forward(self, tgt, memory, + memory_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + if self.normalize_before: + return self.forward_pre(tgt, memory, memory_mask, + memory_key_padding_mask, pos, query_pos) + return self.forward_post(tgt, memory, memory_mask, + memory_key_padding_mask, pos, query_pos) + + +class FFNLayer(nn.Module): + + def __init__(self, d_model, dim_feedforward=2048, dropout=0.0, + activation="relu", normalize_before=False): + super().__init__() + # Implementation of Feedforward model + self.linear1 = nn.Linear(d_model, dim_feedforward) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_feedforward, d_model) + + self.norm = nn.LayerNorm(d_model) + + self.activation = _get_activation_fn(activation) + self.normalize_before = normalize_before + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward_post(self, tgt): + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = tgt + self.dropout(tgt2) + tgt = self.norm(tgt) + return tgt + + def forward_pre(self, tgt): + tgt2 = self.norm(tgt) + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) + tgt = tgt + self.dropout(tgt2) + return tgt + + def forward(self, tgt): + if self.normalize_before: + return self.forward_pre(tgt) + return self.forward_post(tgt) + + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(F"activation should be relu/gelu, not {activation}.") + + +class MLP(nn.Module): + """ Very simple multi-layer perceptron (also called FFN)""" + + def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + super().__init__() + self.num_layers = num_layers + h = [hidden_dim] * (num_layers - 1) + self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + return x + + +@TRANSFORMER_DECODER_REGISTRY.register() +class ContrastiveMultiScaleMaskedTransformerDecoder(nn.Module): + + _version = 2 + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get("version", None) + if version is None or version < 2: + # Do not warn if train from scratch + scratch = True + logger = logging.getLogger(__name__) + for k in list(state_dict.keys()): + newk = k + if "static_query" in k: + newk = k.replace("static_query", "query_feat") + if newk != k: + state_dict[newk] = state_dict[k] + del state_dict[k] + scratch = False + + if not scratch: + logger.warning( + f"Weight format of {self.__class__.__name__} have changed! " + "Please upgrade your models. Applying automatic conversion now ..." + ) + + @configurable + def __init__( + self, + in_channels, + mask_classification=True, + *, + num_classes: int, + hidden_dim: int, + num_queries: int, + nheads: int, + dropout: float, + dim_feedforward: int, + enc_layers: int, + is_train: bool, + dec_layers: int, + class_dec_layers: int, + pre_norm: bool, + mask_dim: int, + enforce_input_project: bool, + use_task_norm: bool, + ): + """ + NOTE: this interface is experimental. + Args: + in_channels: channels of the input features + mask_classification: whether to add mask classifier or not + num_classes: number of classes + hidden_dim: Transformer feature dimension + num_queries: number of queries + nheads: number of heads + dim_feedforward: feature dimension in feedforward network + enc_layers: number of Transformer encoder layers + dec_layers: number of Transformer decoder layers + pre_norm: whether to use pre-LayerNorm or not + mask_dim: mask feature dimension + enforce_input_project: add input project 1x1 conv even if input + channels and hidden dim is identical + """ + super().__init__() + + assert mask_classification, "Only support mask classification model" + self.mask_classification = mask_classification + self.is_train = is_train + self.use_task_norm = use_task_norm + + # positional encoding + N_steps = hidden_dim // 2 + self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True) + + self.class_transformer = Transformer( + d_model=hidden_dim, + dropout=dropout, + nhead=nheads, + dim_feedforward=dim_feedforward, + num_encoder_layers=enc_layers, + num_decoder_layers=class_dec_layers, + normalize_before=pre_norm, + return_intermediate_dec=False, + ) + + # define Transformer decoder here + self.num_heads = nheads + self.num_layers = dec_layers + self.transformer_self_attention_layers = nn.ModuleList() + self.transformer_cross_attention_layers = nn.ModuleList() + self.transformer_ffn_layers = nn.ModuleList() + + for _ in range(self.num_layers): + self.transformer_self_attention_layers.append( + SelfAttentionLayer( + d_model=hidden_dim, + nhead=nheads, + dropout=0.0, + normalize_before=pre_norm, + ) + ) + + self.transformer_cross_attention_layers.append( + CrossAttentionLayer( + d_model=hidden_dim, + nhead=nheads, + dropout=0.0, + normalize_before=pre_norm, + ) + ) + + self.transformer_ffn_layers.append( + FFNLayer( + d_model=hidden_dim, + dim_feedforward=dim_feedforward, + dropout=0.0, + normalize_before=pre_norm, + ) + ) + + self.decoder_norm = nn.LayerNorm(hidden_dim) + + self.num_queries = num_queries + # learnable query p.e. + self.query_embed = nn.Embedding(num_queries, hidden_dim) + + # level embedding (we always use 3 scales) + self.num_feature_levels = 3 + self.level_embed = nn.Embedding(self.num_feature_levels, hidden_dim) + self.input_proj = nn.ModuleList() + for _ in range(self.num_feature_levels): + if in_channels != hidden_dim or enforce_input_project: + self.input_proj.append(Conv2d(in_channels, hidden_dim, kernel_size=1)) + weight_init.c2_xavier_fill(self.input_proj[-1]) + else: + self.input_proj.append(nn.Sequential()) + + self.class_input_proj = Conv2d(in_channels, hidden_dim, kernel_size=1) + weight_init.c2_xavier_fill(self.class_input_proj) + + # output FFNs + if self.mask_classification: + self.class_embed = nn.Linear(hidden_dim, num_classes + 1) + self.mask_embed = MLP(hidden_dim, hidden_dim, mask_dim, 3) + + @classmethod + def from_config(cls, cfg, in_channels, mask_classification): + ret = {} + ret["in_channels"] = in_channels + ret["mask_classification"] = mask_classification + + ret["num_classes"] = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES + ret["hidden_dim"] = cfg.MODEL.ONE_FORMER.HIDDEN_DIM + ret["num_queries"] = cfg.MODEL.ONE_FORMER.NUM_OBJECT_QUERIES + # Transformer parameters: + ret["nheads"] = cfg.MODEL.ONE_FORMER.NHEADS + ret["dim_feedforward"] = cfg.MODEL.ONE_FORMER.DIM_FEEDFORWARD + + # NOTE: because we add learnable query features which requires supervision, + # we add minus 1 to decoder layers to be consistent with our loss + # implementation: that is, number of auxiliary losses is always + # equal to number of decoder layers. With learnable query features, the number of + # auxiliary losses equals number of decoders plus 1. + assert cfg.MODEL.ONE_FORMER.DEC_LAYERS >= 1 + ret["dec_layers"] = cfg.MODEL.ONE_FORMER.DEC_LAYERS - 1 + ret["class_dec_layers"] = cfg.MODEL.ONE_FORMER.CLASS_DEC_LAYERS + ret["enc_layers"] = cfg.MODEL.ONE_FORMER.ENC_LAYERS + ret["dropout"] = cfg.MODEL.ONE_FORMER.DROPOUT + ret["pre_norm"] = cfg.MODEL.ONE_FORMER.PRE_NORM + ret["enforce_input_project"] = cfg.MODEL.ONE_FORMER.ENFORCE_INPUT_PROJ + ret["is_train"] = cfg.MODEL.IS_TRAIN + ret["mask_dim"] = cfg.MODEL.SEM_SEG_HEAD.MASK_DIM + ret["use_task_norm"] = cfg.MODEL.ONE_FORMER.USE_TASK_NORM + + return ret + + def forward(self, x, mask_features, tasks, mask = None): + # x is a list of multi-scale feature + assert len(x) == self.num_feature_levels + src = [] + pos = [] + size_list = [] + + # disable mask, it does not affect performance + del mask + + for i in range(self.num_feature_levels): + size_list.append(x[i].shape[-2:]) + pos.append(self.pe_layer(x[i], None).flatten(2)) + src.append(self.input_proj[i](x[i]).flatten(2) + self.level_embed.weight[i][None, :, None]) + + # flatten NxCxHxW to HWxNxC + pos[-1] = pos[-1].permute(2, 0, 1) + src[-1] = src[-1].permute(2, 0, 1) + + _, bs, _ = src[0].shape + + # QxNxC + query_embed = self.query_embed.weight.unsqueeze(1).repeat(1, bs, 1) + tasks = tasks.unsqueeze(0) + if self.use_task_norm: + tasks = self.decoder_norm(tasks) + + feats = self.pe_layer(mask_features, None) + + out_t, _ = self.class_transformer(feats, None, + self.query_embed.weight[:-1], + self.class_input_proj(mask_features), + tasks if self.use_task_norm else None) + out_t = out_t[0].permute(1, 0, 2) + + out = torch.cat([out_t, tasks], dim=0) + + output = out.clone() + + predictions_class = [] + predictions_mask = [] + + # prediction heads on learnable query features + outputs_class, outputs_mask, attn_mask = self.forward_prediction_heads(output, mask_features, attn_mask_target_size=size_list[0], i=0) + predictions_class.append(outputs_class) + predictions_mask.append(outputs_mask) + + for i in range(self.num_layers): + level_index = i % self.num_feature_levels + attn_mask[torch.where(attn_mask.sum(-1) == attn_mask.shape[-1])] = False + # attention: cross-attention first + output = self.transformer_cross_attention_layers[i]( + output, src[level_index], + memory_mask=attn_mask, + memory_key_padding_mask=None, # here we do not apply masking on padded region + pos=pos[level_index], query_pos=query_embed + ) + + output = self.transformer_self_attention_layers[i]( + output, tgt_mask=None, + tgt_key_padding_mask=None, + query_pos=query_embed + ) + + # FFN + output = self.transformer_ffn_layers[i]( + output + ) + + outputs_class, outputs_mask, attn_mask = self.forward_prediction_heads(output, mask_features, attn_mask_target_size=size_list[(i + 1) % self.num_feature_levels], i=i+1) + predictions_class.append(outputs_class) + predictions_mask.append(outputs_mask) + + assert len(predictions_class) == self.num_layers + 1 + if self.is_train: + query_class = out.permute(1, 0, 2) + else: + query_class = None + out = { + 'contrastive_logits': query_class, + 'pred_logits': predictions_class[-1], + 'pred_masks': predictions_mask[-1], + 'aux_outputs': self._set_aux_loss( + predictions_class if self.mask_classification else None, + predictions_mask, + ) + } + + return out + + def forward_prediction_heads(self, output, mask_features, attn_mask_target_size, i): + decoder_output = self.decoder_norm(output) + decoder_output = decoder_output.transpose(0, 1) + outputs_class = self.class_embed(decoder_output) + mask_embed = self.mask_embed(decoder_output) + outputs_mask = torch.einsum("bqc,bchw->bqhw", mask_embed, mask_features) + + # NOTE: prediction is of higher-resolution + # [B, Q, H, W] -> [B, Q, H*W] -> [B, h, Q, H*W] -> [B*h, Q, HW] + attn_mask = F.interpolate(outputs_mask, size=attn_mask_target_size, mode="bilinear", align_corners=False) + + # save_attn_masks(attn_mask.sigmoid() < 0.5, fname=f'demo/maps/{i}_pre_bool') + + # must use bool type + # If a BoolTensor is provided, positions with ``True`` are not allowed to attend while ``False`` values will be unchanged. + attn_mask = (attn_mask.sigmoid().flatten(2).unsqueeze(1).repeat(1, self.num_heads, 1, 1).flatten(0, 1) < 0.5).bool() + attn_mask = attn_mask.detach() + + return outputs_class, outputs_mask, attn_mask + + @torch.jit.unused + def _set_aux_loss(self, outputs_class, outputs_seg_masks): + # this is a workaround to make torchscript happy, as torchscript + # doesn't support dictionary with non-homogeneous values, such + # as a dict having both a Tensor and a list. + if self.mask_classification: + aux_list = [ + {"pred_logits": a, "pred_masks": b} + for a, b in zip(outputs_class[:-1], outputs_seg_masks[:-1]) + ] + else: + aux_list = [{"pred_masks": b} for b, in outputs_seg_masks[:-1]] + + return aux_list \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/position_encoding.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/position_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..051984d9ea6e04e834f6fae3daf7d8317c2f0819 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/position_encoding.py @@ -0,0 +1,67 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/transformer_decoder/position_encoding.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +""" +Various positional encodings for the transformer. +""" +import math + +import torch +from torch import nn + + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + + def forward(self, x, mask=None): + if mask is None: + mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) + not_mask = ~mask + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale + x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + + pos_x = x_embed[:, :, :, None] / dim_t + pos_y = y_embed[:, :, :, None] / dim_t + pos_x = torch.stack( + (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos_y = torch.stack( + (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) + return pos + + def __repr__(self, _repr_indent=4): + head = "Positional encoding " + self.__class__.__name__ + body = [ + "num_pos_feats: {}".format(self.num_pos_feats), + "temperature: {}".format(self.temperature), + "normalize: {}".format(self.normalize), + "scale: {}".format(self.scale), + ] + # _repr_indent = 4 + lines = [head] + [" " * _repr_indent + line for line in body] + return "\n".join(lines) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/text_transformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/text_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..d0b7292018ecfbf4111c0da9c90444d0e1e41cb6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/text_transformer.py @@ -0,0 +1,257 @@ +# ------------------------------------------------------------------------- +# MIT License +# +# Copyright (c) 2021 OpenAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ------------------------------------------------------------------------- + +import torch +import torch.utils.checkpoint as checkpoint +from torch import nn +from collections import OrderedDict +from timm.models.layers import trunc_normal_ + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights + self.scale = qk_scale or head_dim ** -0.5 + + self.q_proj = nn.Linear(dim, dim, bias=qkv_bias) + self.k_proj = nn.Linear(dim, dim, bias=qkv_bias) + self.v_proj = nn.Linear(dim, dim, bias=qkv_bias) + + + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, q, k, v): + B, N, C = q.shape + assert k.shape == v.shape + B, M, C = k.shape + q = self.q_proj(q).reshape(B, N, self.num_heads, C // self.num_heads) + k = self.k_proj(k).reshape(B, M, self.num_heads, C // self.num_heads) + v = self.v_proj(v).reshape(B, M, self.num_heads, C // self.num_heads) + + attn = torch.einsum('bnkc,bmkc->bknm', q, k) * self.scale + + attn = attn.softmax(dim=-1) + + x = torch.einsum('bknm,bmkc->bnkc', attn, v).reshape(B, N, C) + + x = self.proj(x) + x = self.proj_drop(x) + return x + +class TransformerDecoderLayer(nn.Module): + def __init__( + self, + d_model, + nhead, + dropout=0.1, + ): + super().__init__() + self.self_attn = Attention(d_model, nhead, proj_drop=dropout) + self.cross_attn = Attention(d_model, nhead, proj_drop=dropout) + + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.norm3 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + + self.mlp = nn.Sequential( + nn.Linear(d_model, d_model * 4), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(d_model * 4, d_model) + ) + + def forward(self, x, mem): + q = k = v = self.norm1(x) + x = x + self.self_attn(q, k, v) + q = self.norm2(x) + x = x + self.cross_attn(q, mem, mem) + x = x + self.dropout(self.mlp(self.norm3(x))) + return x + + +class ContextDecoder(nn.Module): + def __init__(self, + transformer_width=256, + transformer_heads=4, + transformer_layers=6, + visual_dim=1024, + dropout=0.1, + **kwargs): + super().__init__() + + self.memory_proj = nn.Sequential( + nn.LayerNorm(visual_dim), + nn.Linear(visual_dim, transformer_width), + nn.LayerNorm(transformer_width), + ) + + self.text_proj = nn.Sequential( + nn.LayerNorm(visual_dim), + nn.Linear(visual_dim, transformer_width), + ) + + self.decoder = nn.ModuleList([ + TransformerDecoderLayer(transformer_width, transformer_heads, dropout) for _ in range(transformer_layers) + ]) + + self.out_proj = nn.Sequential( + nn.LayerNorm(transformer_width), + nn.Linear(transformer_width, visual_dim) + ) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + + def forward(self, text, visual): + B, N, C = visual.shape + visual = self.memory_proj(visual) + x = self.text_proj(text) + + for layer in self.decoder: + x = layer(x, visual) + + return self.out_proj(x) + + +class QuickGELU(nn.Module): + + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential( + OrderedDict([('c_fc', nn.Linear(d_model, d_model * 4)), ('gelu', QuickGELU()), + ('c_proj', nn.Linear(d_model * 4, d_model))])) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor, key_padding_mask: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask, key_padding_mask=key_padding_mask)[0] + + def forward(self, x: torch.Tensor, key_padding_mask=None): + x = x + self.attention(self.ln_1(x), key_padding_mask=key_padding_mask) + x = x + self.mlp(self.ln_2(x)) + return x + +class Transformer(nn.Module): + + def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None, use_checkpoint=False): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + proj_std = (self.width**-0.5) * ((2 * self.layers)**-0.5) + attn_std = self.width**-0.5 + fc_std = (2 * self.width)**-0.5 + for block in self.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + + self.use_checkpoint = use_checkpoint + + def forward(self, x: torch.Tensor): + for resblock in self.resblocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(resblock, x) + else: + x = resblock(x) + return x + + +class TextTransformer(nn.Module): + + def __init__( + self, + context_length: int, + width: int, + layers: int, + vocab_size, + use_checkpoint=False, + ): + + super().__init__() + heads = width // 64 + self.context_length = context_length + self.width = width + self.transformer = Transformer( + width=width, + layers=layers, + heads=heads, + attn_mask=self.build_attention_mask(), + use_checkpoint=use_checkpoint) + + self.positional_embedding = nn.Parameter(torch.empty(self.context_length, width)) + self.ln_final = nn.LayerNorm(width) + self.token_embedding = nn.Embedding(vocab_size, width) + nn.init.normal_(self.token_embedding.weight, std=0.02) + + # initialization + nn.init.normal_(self.positional_embedding, std=0.01) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float('-inf')) + mask.triu_(1) # zero out the lower diagonal + return mask + + def forward(self, text): + x = self.token_embedding(text) + x = x + self.positional_embedding + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] + + return x \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/transformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..cd07525673b9b1165e1fdd0c9990a8f29c84f199 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/modeling/transformer_decoder/transformer.py @@ -0,0 +1,376 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/transformer_decoder/transformer.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +""" +Transformer class. + +Copy-paste from torch.nn.Transformer with modifications: + * positional encodings are passed in MHattention + * extra LN at the end of encoder is removed + * decoder returns a stack of activations from all decoding layers +""" +import copy +from typing import List, Optional + +import torch +import torch.nn.functional as F +from torch import Tensor, nn + + +class Transformer(nn.Module): + def __init__( + self, + d_model=512, + nhead=8, + num_encoder_layers=6, + num_decoder_layers=6, + dim_feedforward=2048, + dropout=0.1, + activation="relu", + normalize_before=False, + return_intermediate_dec=False, + ): + super().__init__() + + encoder_layer = TransformerEncoderLayer( + d_model, nhead, dim_feedforward, dropout, activation, normalize_before + ) + encoder_norm = nn.LayerNorm(d_model) if normalize_before else None + self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) + + decoder_layer = TransformerDecoderLayer( + d_model, nhead, dim_feedforward, dropout, activation, normalize_before + ) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = TransformerDecoder( + decoder_layer, + num_decoder_layers, + decoder_norm, + return_intermediate=return_intermediate_dec, + ) + + self._reset_parameters() + + self.d_model = d_model + self.nhead = nhead + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, src, mask, query_embed, pos_embed, task_token=None): + # flatten NxCxHxW to HWxNxC + bs, c, h, w = src.shape + src = src.flatten(2).permute(2, 0, 1) + pos_embed = pos_embed.flatten(2).permute(2, 0, 1) + query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1) + if mask is not None: + mask = mask.flatten(1) + + if task_token is None: + tgt = torch.zeros_like(query_embed) + else: + tgt = task_token.repeat(query_embed.shape[0], 1, 1) + + memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed) + hs = self.decoder( + tgt, memory, memory_key_padding_mask=mask, pos=pos_embed, query_pos=query_embed + ) + return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w) + + +class TransformerEncoder(nn.Module): + def __init__(self, encoder_layer, num_layers, norm=None): + super().__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + self.norm = norm + + def forward( + self, + src, + mask: Optional[Tensor] = None, + src_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + ): + output = src + + for layer in self.layers: + output = layer( + output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos + ) + + if self.norm is not None: + output = self.norm(output) + + return output + + +class TransformerDecoder(nn.Module): + def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False): + super().__init__() + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + self.norm = norm + self.return_intermediate = return_intermediate + + def forward( + self, + tgt, + memory, + tgt_mask: Optional[Tensor] = None, + memory_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None, + ): + output = tgt + + intermediate = [] + + for layer in self.layers: + output = layer( + output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + tgt_key_padding_mask=tgt_key_padding_mask, + memory_key_padding_mask=memory_key_padding_mask, + pos=pos, + query_pos=query_pos, + ) + if self.return_intermediate: + intermediate.append(self.norm(output)) + + if self.norm is not None: + output = self.norm(output) + if self.return_intermediate: + intermediate.pop() + intermediate.append(output) + + if self.return_intermediate: + return torch.stack(intermediate) + + return output.unsqueeze(0) + + +class TransformerEncoderLayer(nn.Module): + def __init__( + self, + d_model, + nhead, + dim_feedforward=2048, + dropout=0.1, + activation="relu", + normalize_before=False, + ): + super().__init__() + self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + # Implementation of Feedforward model + self.linear1 = nn.Linear(d_model, dim_feedforward) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_feedforward, d_model) + + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + self.normalize_before = normalize_before + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward_post( + self, + src, + src_mask: Optional[Tensor] = None, + src_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + ): + q = k = self.with_pos_embed(src, pos) + src2 = self.self_attn( + q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask + )[0] + src = src + self.dropout1(src2) + src = self.norm1(src) + src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) + src = src + self.dropout2(src2) + src = self.norm2(src) + return src + + def forward_pre( + self, + src, + src_mask: Optional[Tensor] = None, + src_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + ): + src2 = self.norm1(src) + q = k = self.with_pos_embed(src2, pos) + src2 = self.self_attn( + q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask + )[0] + src = src + self.dropout1(src2) + src2 = self.norm2(src) + src2 = self.linear2(self.dropout(self.activation(self.linear1(src2)))) + src = src + self.dropout2(src2) + return src + + def forward( + self, + src, + src_mask: Optional[Tensor] = None, + src_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + ): + if self.normalize_before: + return self.forward_pre(src, src_mask, src_key_padding_mask, pos) + return self.forward_post(src, src_mask, src_key_padding_mask, pos) + + +class TransformerDecoderLayer(nn.Module): + def __init__( + self, + d_model, + nhead, + dim_feedforward=2048, + dropout=0.1, + activation="relu", + normalize_before=False, + ): + super().__init__() + self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) + # Implementation of Feedforward model + self.linear1 = nn.Linear(d_model, dim_feedforward) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_feedforward, d_model) + + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.norm3 = nn.LayerNorm(d_model) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + self.dropout3 = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + self.normalize_before = normalize_before + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward_post( + self, + tgt, + memory, + tgt_mask: Optional[Tensor] = None, + memory_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None, + ): + q = k = self.with_pos_embed(tgt, query_pos) + tgt2 = self.self_attn( + q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask + )[0] + tgt = tgt + self.dropout1(tgt2) + tgt = self.norm1(tgt) + tgt2 = self.multihead_attn( + query=self.with_pos_embed(tgt, query_pos), + key=self.with_pos_embed(memory, pos), + value=memory, + attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask, + )[0] + tgt = tgt + self.dropout2(tgt2) + tgt = self.norm2(tgt) + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = tgt + self.dropout3(tgt2) + tgt = self.norm3(tgt) + return tgt + + def forward_pre( + self, + tgt, + memory, + tgt_mask: Optional[Tensor] = None, + memory_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None, + ): + tgt2 = self.norm1(tgt) + q = k = self.with_pos_embed(tgt2, query_pos) + tgt2 = self.self_attn( + q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask + )[0] + tgt = tgt + self.dropout1(tgt2) + tgt2 = self.norm2(tgt) + tgt2 = self.multihead_attn( + query=self.with_pos_embed(tgt2, query_pos), + key=self.with_pos_embed(memory, pos), + value=memory, + attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask, + )[0] + tgt = tgt + self.dropout2(tgt2) + tgt2 = self.norm3(tgt) + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) + tgt = tgt + self.dropout3(tgt2) + return tgt + + def forward( + self, + tgt, + memory, + tgt_mask: Optional[Tensor] = None, + memory_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None, + ): + if self.normalize_before: + return self.forward_pre( + tgt, + memory, + tgt_mask, + memory_mask, + tgt_key_padding_mask, + memory_key_padding_mask, + pos, + query_pos, + ) + return self.forward_post( + tgt, + memory, + tgt_mask, + memory_mask, + tgt_key_padding_mask, + memory_key_padding_mask, + pos, + query_pos, + ) + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(f"activation should be relu/gelu, not {activation}.") diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/oneformer_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/oneformer_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8bb18a85a8ecdfa6a7bef912bd6eb038e79e5251 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/oneformer_model.py @@ -0,0 +1,470 @@ +# ------------------------------------------------------------------------------ +# Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/maskformer_model.py +# Modified by Jitesh Jain (https://github.com/praeclarumjj3) +# ------------------------------------------------------------------------------ + +from typing import Tuple + +import torch +from torch import nn +from torch.nn import functional as F + +from annotator.oneformer.detectron2.config import configurable +from annotator.oneformer.detectron2.data import MetadataCatalog +from annotator.oneformer.detectron2.modeling import META_ARCH_REGISTRY, build_backbone, build_sem_seg_head +from annotator.oneformer.detectron2.modeling.backbone import Backbone +from annotator.oneformer.detectron2.modeling.postprocessing import sem_seg_postprocess +from annotator.oneformer.detectron2.structures import Boxes, ImageList, Instances, BitMasks +from annotator.oneformer.detectron2.utils.memory import retry_if_cuda_oom + +from .modeling.matcher import HungarianMatcher +from einops import rearrange +from .modeling.transformer_decoder.text_transformer import TextTransformer +from .modeling.transformer_decoder.oneformer_transformer_decoder import MLP +from annotator.oneformer.oneformer.data.tokenizer import SimpleTokenizer, Tokenize + +@META_ARCH_REGISTRY.register() +class OneFormer(nn.Module): + """ + Main class for mask classification semantic segmentation architectures. + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + sem_seg_head: nn.Module, + task_mlp: nn.Module, + text_encoder: nn.Module, + text_projector: nn.Module, + prompt_ctx: nn.Embedding, + num_queries: int, + object_mask_threshold: float, + overlap_threshold: float, + metadata, + size_divisibility: int, + sem_seg_postprocess_before_inference: bool, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + # inference + semantic_on: bool, + panoptic_on: bool, + instance_on: bool, + detection_on: bool, + test_topk_per_image: int, + task_seq_len: int, + max_seq_len: int, + is_demo: bool, + ): + """ + Args: + backbone: a backbone module, must follow detectron2's backbone interface + sem_seg_head: a module that predicts semantic segmentation from backbone features + criterion: a module that defines the loss + num_queries: int, number of queries + object_mask_threshold: float, threshold to filter query based on classification score + for panoptic segmentation inference + overlap_threshold: overlap threshold used in general inference for panoptic segmentation + metadata: dataset meta, get `thing` and `stuff` category names for panoptic + segmentation inference + size_divisibility: Some backbones require the input height and width to be divisible by a + specific integer. We can use this to override such requirement. + sem_seg_postprocess_before_inference: whether to resize the prediction back + to original input size before semantic segmentation inference or after. + For high-resolution dataset like Mapillary, resizing predictions before + inference will cause OOM error. + pixel_mean, pixel_std: list or tuple with #channels element, representing + the per-channel mean and std to be used to normalize the input image + semantic_on: bool, whether to output semantic segmentation prediction + instance_on: bool, whether to output instance segmentation prediction + panoptic_on: bool, whether to output panoptic segmentation prediction + test_topk_per_image: int, instance segmentation parameter, keep topk instances per image + """ + super().__init__() + self.backbone = backbone + self.sem_seg_head = sem_seg_head + self.task_mlp = task_mlp + self.text_encoder = text_encoder + self.text_projector = text_projector + self.prompt_ctx = prompt_ctx + self.num_queries = num_queries + self.overlap_threshold = overlap_threshold + self.object_mask_threshold = object_mask_threshold + self.metadata = metadata + if size_divisibility < 0: + # use backbone size_divisibility if not set + size_divisibility = self.backbone.size_divisibility + self.size_divisibility = size_divisibility + self.sem_seg_postprocess_before_inference = sem_seg_postprocess_before_inference + self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False) + + # additional args + self.semantic_on = semantic_on + self.instance_on = instance_on + self.panoptic_on = panoptic_on + self.detection_on = detection_on + self.test_topk_per_image = test_topk_per_image + + self.text_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=max_seq_len) + self.task_tokenizer = Tokenize(SimpleTokenizer(), max_seq_len=task_seq_len) + self.is_demo = is_demo + + self.thing_indices = [k for k in self.metadata.thing_dataset_id_to_contiguous_id.keys()] + + if not self.semantic_on: + assert self.sem_seg_postprocess_before_inference + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + sem_seg_head = build_sem_seg_head(cfg, backbone.output_shape()) + + if cfg.MODEL.IS_TRAIN: + text_encoder = TextTransformer(context_length=cfg.MODEL.TEXT_ENCODER.CONTEXT_LENGTH, + width=cfg.MODEL.TEXT_ENCODER.WIDTH, + layers=cfg.MODEL.TEXT_ENCODER.NUM_LAYERS, + vocab_size=cfg.MODEL.TEXT_ENCODER.VOCAB_SIZE) + text_projector = MLP(text_encoder.width, cfg.MODEL.ONE_FORMER.HIDDEN_DIM, + cfg.MODEL.ONE_FORMER.HIDDEN_DIM, cfg.MODEL.TEXT_ENCODER.PROJ_NUM_LAYERS) + if cfg.MODEL.TEXT_ENCODER.N_CTX > 0: + prompt_ctx = nn.Embedding(cfg.MODEL.TEXT_ENCODER.N_CTX, cfg.MODEL.TEXT_ENCODER.WIDTH) + else: + prompt_ctx = None + else: + text_encoder = None + text_projector = None + prompt_ctx = None + + task_mlp = MLP(cfg.INPUT.TASK_SEQ_LEN, cfg.MODEL.ONE_FORMER.HIDDEN_DIM, + cfg.MODEL.ONE_FORMER.HIDDEN_DIM, 2) + + # Loss parameters: + deep_supervision = cfg.MODEL.ONE_FORMER.DEEP_SUPERVISION + no_object_weight = cfg.MODEL.ONE_FORMER.NO_OBJECT_WEIGHT + + # loss weights + class_weight = cfg.MODEL.ONE_FORMER.CLASS_WEIGHT + dice_weight = cfg.MODEL.ONE_FORMER.DICE_WEIGHT + mask_weight = cfg.MODEL.ONE_FORMER.MASK_WEIGHT + contrastive_weight = cfg.MODEL.ONE_FORMER.CONTRASTIVE_WEIGHT + + # building criterion + matcher = HungarianMatcher( + cost_class=class_weight, + cost_mask=mask_weight, + cost_dice=dice_weight, + num_points=cfg.MODEL.ONE_FORMER.TRAIN_NUM_POINTS, + ) + + weight_dict = {"loss_ce": class_weight, "loss_mask": mask_weight, + "loss_dice": dice_weight, "loss_contrastive": contrastive_weight} + + + if deep_supervision: + dec_layers = cfg.MODEL.ONE_FORMER.DEC_LAYERS + aux_weight_dict = {} + for i in range(dec_layers - 1): + aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) + weight_dict.update(aux_weight_dict) + + losses = ["labels", "masks", "contrastive"] + + return { + "backbone": backbone, + "sem_seg_head": sem_seg_head, + "task_mlp": task_mlp, + "prompt_ctx": prompt_ctx, + "text_encoder": text_encoder, + "text_projector": text_projector, + "num_queries": cfg.MODEL.ONE_FORMER.NUM_OBJECT_QUERIES, + "object_mask_threshold": cfg.MODEL.TEST.OBJECT_MASK_THRESHOLD, + "overlap_threshold": cfg.MODEL.TEST.OVERLAP_THRESHOLD, + "metadata": MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), + "size_divisibility": cfg.MODEL.ONE_FORMER.SIZE_DIVISIBILITY, + "sem_seg_postprocess_before_inference": ( + cfg.MODEL.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE + or cfg.MODEL.TEST.PANOPTIC_ON + or cfg.MODEL.TEST.INSTANCE_ON + ), + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + # inference + "semantic_on": cfg.MODEL.TEST.SEMANTIC_ON, + "instance_on": cfg.MODEL.TEST.INSTANCE_ON, + "panoptic_on": cfg.MODEL.TEST.PANOPTIC_ON, + "detection_on": cfg.MODEL.TEST.DETECTION_ON, + "test_topk_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, + "task_seq_len": cfg.INPUT.TASK_SEQ_LEN, + "max_seq_len": cfg.INPUT.MAX_SEQ_LEN, + "is_demo": cfg.MODEL.IS_DEMO, + } + + @property + def device(self): + return self.pixel_mean.device + + def encode_text(self, text): + assert text.ndim in [2, 3], text.ndim + b = text.shape[0] + squeeze_dim = False + num_text = 1 + if text.ndim == 3: + num_text = text.shape[1] + text = rearrange(text, 'b n l -> (b n) l', n=num_text) + squeeze_dim = True + + # [B, C] + x = self.text_encoder(text) + + text_x = self.text_projector(x) + + if squeeze_dim: + text_x = rearrange(text_x, '(b n) c -> b n c', n=num_text) + if self.prompt_ctx is not None: + text_ctx = self.prompt_ctx.weight.unsqueeze(0).repeat(text_x.shape[0], 1, 1) + text_x = torch.cat([text_x, text_ctx], dim=1) + + return {"texts": text_x} + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + * "image": Tensor, image in (C, H, W) format. + * "instances": per-region ground truth + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model (may be different + from input resolution), used in inference. + Returns: + list[dict]: + each dict has the results for one image. The dict contains the following keys: + * "sem_seg": + A Tensor that represents the + per-pixel segmentation prediced by the head. + The prediction has shape KxHxW that represents the logits of + each class for each pixel. + * "panoptic_seg": + A tuple that represent panoptic output + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. + segments_info (list[dict]): Describe each segment in `panoptic_seg`. + Each dict contains keys "id", "category_id", "isthing". + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.size_divisibility) + + tasks = torch.cat([self.task_tokenizer(x["task"]).to(self.device).unsqueeze(0) for x in batched_inputs], dim=0) + tasks = self.task_mlp(tasks.float()) + + features = self.backbone(images.tensor) + outputs = self.sem_seg_head(features, tasks) + + if self.training: + texts = torch.cat([self.text_tokenizer(x["text"]).to(self.device).unsqueeze(0) for x in batched_inputs], dim=0) + texts_x = self.encode_text(texts) + + outputs = {**outputs, **texts_x} + + # mask classification target + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + targets = self.prepare_targets(gt_instances, images) + else: + targets = None + + # bipartite matching-based loss + losses = self.criterion(outputs, targets) + + for k in list(losses.keys()): + if k in self.criterion.weight_dict: + losses[k] *= self.criterion.weight_dict[k] + else: + # remove this loss if not specified in `weight_dict` + losses.pop(k) + return losses + else: + mask_cls_results = outputs["pred_logits"] + mask_pred_results = outputs["pred_masks"] + # upsample masks + mask_pred_results = F.interpolate( + mask_pred_results, + size=(images.tensor.shape[-2], images.tensor.shape[-1]), + mode="bilinear", + align_corners=False, + ) + + del outputs + + processed_results = [] + for i, data in enumerate(zip( + mask_cls_results, mask_pred_results, batched_inputs, images.image_sizes + )): + mask_cls_result, mask_pred_result, input_per_image, image_size = data + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + processed_results.append({}) + + if self.sem_seg_postprocess_before_inference: + mask_pred_result = retry_if_cuda_oom(sem_seg_postprocess)( + mask_pred_result, image_size, height, width + ) + mask_cls_result = mask_cls_result.to(mask_pred_result) + + # semantic segmentation inference + if self.semantic_on: + r = retry_if_cuda_oom(self.semantic_inference)(mask_cls_result, mask_pred_result) + if not self.sem_seg_postprocess_before_inference: + r = retry_if_cuda_oom(sem_seg_postprocess)(r, image_size, height, width) + processed_results[-1]["sem_seg"] = r + + # panoptic segmentation inference + if self.panoptic_on: + panoptic_r = retry_if_cuda_oom(self.panoptic_inference)(mask_cls_result, mask_pred_result) + processed_results[-1]["panoptic_seg"] = panoptic_r + + # instance segmentation inference + if self.instance_on: + instance_r = retry_if_cuda_oom(self.instance_inference)(mask_cls_result, mask_pred_result) + processed_results[-1]["instances"] = instance_r + + if self.detection_on: + bbox_r = retry_if_cuda_oom(self.instance_inference)(mask_cls_result, mask_pred_result) + processed_results[-1]["box_instances"] = bbox_r + + return processed_results + + def prepare_targets(self, targets, images): + h_pad, w_pad = images.tensor.shape[-2:] + new_targets = [] + for targets_per_image in targets: + # pad gt + gt_masks = targets_per_image.gt_masks + padded_masks = torch.zeros((gt_masks.shape[0], h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device) + padded_masks[:, : gt_masks.shape[1], : gt_masks.shape[2]] = gt_masks + new_targets.append( + { + "labels": targets_per_image.gt_classes, + "masks": padded_masks, + } + ) + return new_targets + + def semantic_inference(self, mask_cls, mask_pred): + mask_cls = F.softmax(mask_cls, dim=-1)[..., :-1] + mask_pred = mask_pred.sigmoid() + semseg = torch.einsum("qc,qhw->chw", mask_cls, mask_pred) + return semseg + + def panoptic_inference(self, mask_cls, mask_pred): + scores, labels = F.softmax(mask_cls, dim=-1).max(-1) + mask_pred = mask_pred.sigmoid() + + keep = labels.ne(self.sem_seg_head.num_classes) & (scores > self.object_mask_threshold) + cur_scores = scores[keep] + cur_classes = labels[keep] + cur_masks = mask_pred[keep] + cur_mask_cls = mask_cls[keep] + cur_mask_cls = cur_mask_cls[:, :-1] + + cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks + + h, w = cur_masks.shape[-2:] + panoptic_seg = torch.zeros((h, w), dtype=torch.int32, device=cur_masks.device) + segments_info = [] + + current_segment_id = 0 + + if cur_masks.shape[0] == 0: + # We didn't detect any mask :( + return panoptic_seg, segments_info + else: + # take argmax + cur_mask_ids = cur_prob_masks.argmax(0) + stuff_memory_list = {} + for k in range(cur_classes.shape[0]): + pred_class = cur_classes[k].item() + isthing = pred_class in self.metadata.thing_dataset_id_to_contiguous_id.values() + mask_area = (cur_mask_ids == k).sum().item() + original_area = (cur_masks[k] >= 0.5).sum().item() + mask = (cur_mask_ids == k) & (cur_masks[k] >= 0.5) + + if mask_area > 0 and original_area > 0 and mask.sum().item() > 0: + if mask_area / original_area < self.overlap_threshold: + continue + + # merge stuff regions + if not isthing: + if int(pred_class) in stuff_memory_list.keys(): + panoptic_seg[mask] = stuff_memory_list[int(pred_class)] + continue + else: + stuff_memory_list[int(pred_class)] = current_segment_id + 1 + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + + segments_info.append( + { + "id": current_segment_id, + "isthing": bool(isthing), + "category_id": int(pred_class), + } + ) + + return panoptic_seg, segments_info + + def instance_inference(self, mask_cls, mask_pred): + # mask_pred is already processed to have the same shape as original input + image_size = mask_pred.shape[-2:] + + # [Q, K] + scores = F.softmax(mask_cls, dim=-1)[:, :-1] + labels = torch.arange(self.sem_seg_head.num_classes, device=self.device).unsqueeze(0).repeat(self.num_queries, 1).flatten(0, 1) + + # scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.num_queries, sorted=False) + scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.test_topk_per_image, sorted=False) + labels_per_image = labels[topk_indices] + + topk_indices = topk_indices // self.sem_seg_head.num_classes + # mask_pred = mask_pred.unsqueeze(1).repeat(1, self.sem_seg_head.num_classes, 1).flatten(0, 1) + mask_pred = mask_pred[topk_indices] + + # Only consider scores with confidence over [self.object_mask_threshold] for demo + if self.is_demo: + keep = scores_per_image > self.object_mask_threshold + scores_per_image = scores_per_image[keep] + labels_per_image = labels_per_image[keep] + mask_pred = mask_pred[keep] + + # if this is panoptic segmentation, we only keep the "thing" classes + if self.panoptic_on: + keep = torch.zeros_like(scores_per_image).bool() + for i, lab in enumerate(labels_per_image): + keep[i] = lab in self.metadata.thing_dataset_id_to_contiguous_id.values() + + scores_per_image = scores_per_image[keep] + labels_per_image = labels_per_image[keep] + mask_pred = mask_pred[keep] + + if 'ade20k' in self.metadata.name: + for i in range(labels_per_image.shape[0]): + labels_per_image[i] = self.thing_indices.index(labels_per_image[i].item()) + + result = Instances(image_size) + # mask (before sigmoid) + result.pred_masks = (mask_pred > 0).float() + if self.detection_on: + # Uncomment the following to get boxes from masks (this is slow) + result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes() + else: + result.pred_boxes = Boxes(torch.zeros(mask_pred.size(0), 4)) + + # calculate average mask prob + mask_scores_per_image = (mask_pred.sigmoid().flatten(1) * result.pred_masks.flatten(1)).sum(1) / (result.pred_masks.flatten(1).sum(1) + 1e-6) + result.scores = scores_per_image * mask_scores_per_image + result.pred_classes = labels_per_image + return result \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..130d3011b032f91df1a9cf965625e54922f6c81b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +from .events import setup_wandb, WandbWriter \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/box_ops.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/box_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a2b62ad99ed1fc35cdb10a9e11acdeb0ff1abcc4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/box_ops.py @@ -0,0 +1,133 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Utilities for bounding box manipulation and GIoU. +""" +import torch, os +from torchvision.ops.boxes import box_area + + +def box_cxcywh_to_xyxy(x): + x_c, y_c, w, h = x.unbind(-1) + b = [(x_c - 0.5 * w), (y_c - 0.5 * h), + (x_c + 0.5 * w), (y_c + 0.5 * h)] + return torch.stack(b, dim=-1) + + +def box_xyxy_to_cxcywh(x): + x0, y0, x1, y1 = x.unbind(-1) + b = [(x0 + x1) / 2, (y0 + y1) / 2, + (x1 - x0), (y1 - y0)] + return torch.stack(b, dim=-1) + + +# modified from torchvision to also return the union +def box_iou(boxes1, boxes2): + area1 = box_area(boxes1) + area2 = box_area(boxes2) + + # import ipdb; ipdb.set_trace() + lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] + rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] + + wh = (rb - lt).clamp(min=0) # [N,M,2] + inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] + + union = area1[:, None] + area2 - inter + + iou = inter / (union + 1e-6) + return iou, union + + +def generalized_box_iou(boxes1, boxes2): + """ + Generalized IoU from https://giou.stanford.edu/ + The boxes should be in [x0, y0, x1, y1] format + Returns a [N, M] pairwise matrix, where N = len(boxes1) + and M = len(boxes2) + """ + # degenerate boxes gives inf / nan results + # so do an early check + assert (boxes1[:, 2:] >= boxes1[:, :2]).all() + assert (boxes2[:, 2:] >= boxes2[:, :2]).all() + # except: + # import ipdb; ipdb.set_trace() + iou, union = box_iou(boxes1, boxes2) + + lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + + wh = (rb - lt).clamp(min=0) # [N,M,2] + area = wh[:, :, 0] * wh[:, :, 1] + + return iou - (area - union) / (area + 1e-6) + + + +# modified from torchvision to also return the union +def box_iou_pairwise(boxes1, boxes2): + area1 = box_area(boxes1) + area2 = box_area(boxes2) + + lt = torch.max(boxes1[:, :2], boxes2[:, :2]) # [N,2] + rb = torch.min(boxes1[:, 2:], boxes2[:, 2:]) # [N,2] + + wh = (rb - lt).clamp(min=0) # [N,2] + inter = wh[:, 0] * wh[:, 1] # [N] + + union = area1 + area2 - inter + + iou = inter / union + return iou, union + + +def generalized_box_iou_pairwise(boxes1, boxes2): + """ + Generalized IoU from https://giou.stanford.edu/ + Input: + - boxes1, boxes2: N,4 + Output: + - giou: N, 4 + """ + # degenerate boxes gives inf / nan results + # so do an early check + assert (boxes1[:, 2:] >= boxes1[:, :2]).all() + assert (boxes2[:, 2:] >= boxes2[:, :2]).all() + assert boxes1.shape == boxes2.shape + iou, union = box_iou_pairwise(boxes1, boxes2) # N, 4 + + lt = torch.min(boxes1[:, :2], boxes2[:, :2]) + rb = torch.max(boxes1[:, 2:], boxes2[:, 2:]) + + wh = (rb - lt).clamp(min=0) # [N,2] + area = wh[:, 0] * wh[:, 1] + + return iou - (area - union) / area + +def masks_to_boxes(masks): + """Compute the bounding boxes around the provided masks + The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. + Returns a [N, 4] tensors, with the boxes in xyxy format + """ + if masks.numel() == 0: + return torch.zeros((0, 4), device=masks.device) + + h, w = masks.shape[-2:] + + y = torch.arange(0, h, dtype=torch.float) + x = torch.arange(0, w, dtype=torch.float) + y, x = torch.meshgrid(y, x) + + x_mask = (masks * x.unsqueeze(0)) + x_max = x_mask.flatten(1).max(-1)[0] + x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] + + y_mask = (masks * y.unsqueeze(0)) + y_max = y_mask.flatten(1).max(-1)[0] + y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] + + return torch.stack([x_min, y_min, x_max, y_max], 1) + +if __name__ == '__main__': + x = torch.rand(5, 4) + y = torch.rand(3, 4) + iou, union = box_iou(x, y) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/events.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/events.py new file mode 100644 index 0000000000000000000000000000000000000000..3c81e8f65ce8cc4428dd2f24d1ff72a424362d5d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/events.py @@ -0,0 +1,120 @@ +import os +import wandb +from annotator.oneformer.detectron2.utils import comm +from annotator.oneformer.detectron2.utils.events import EventWriter, get_event_storage + + +def setup_wandb(cfg, args): + if comm.is_main_process(): + init_args = { + k.lower(): v + for k, v in cfg.WANDB.items() + if isinstance(k, str) and k not in ["config"] + } + # only include most related part to avoid too big table + # TODO: add configurable params to select which part of `cfg` should be saved in config + if "config_exclude_keys" in init_args: + init_args["config"] = cfg + init_args["config"]["cfg_file"] = args.config_file + else: + init_args["config"] = { + "model": cfg.MODEL, + "solver": cfg.SOLVER, + "cfg_file": args.config_file, + } + if ("name" not in init_args) or (init_args["name"] is None): + init_args["name"] = os.path.basename(args.config_file) + else: + init_args["name"] = init_args["name"] + '_' + os.path.basename(args.config_file) + wandb.init(**init_args) + + +class BaseRule(object): + def __call__(self, target): + return target + + +class IsIn(BaseRule): + def __init__(self, keyword: str): + self.keyword = keyword + + def __call__(self, target): + return self.keyword in target + + +class Prefix(BaseRule): + def __init__(self, keyword: str): + self.keyword = keyword + + def __call__(self, target): + return "/".join([self.keyword, target]) + + +class WandbWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self): + """ + Args: + log_dir (str): the directory to save the output events + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._last_write = -1 + self._group_rules = [ + (IsIn("/"), BaseRule()), + (IsIn("loss"), Prefix("train")), + ] + + def write(self): + + storage = get_event_storage() + + def _group_name(scalar_name): + for (rule, op) in self._group_rules: + if rule(scalar_name): + return op(scalar_name) + return scalar_name + + stats = { + _group_name(name): scalars[0] + for name, scalars in storage.latest().items() + if scalars[1] > self._last_write + } + if len(stats) > 0: + self._last_write = max([v[1] for k, v in storage.latest().items()]) + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + stats["image"] = [ + wandb.Image(img, caption=img_name) + for img_name, img, step_num in storage._vis_data + ] + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + + def create_bar(tag, bucket_limits, bucket_counts, **kwargs): + data = [ + [label, val] for (label, val) in zip(bucket_limits, bucket_counts) + ] + table = wandb.Table(data=data, columns=["label", "value"]) + return wandb.plot.bar(table, "label", "value", title=tag) + + stats["hist"] = [create_bar(**params) for params in storage._histograms] + + storage.clear_histograms() + + if len(stats) == 0: + return + wandb.log(stats, step=storage.iter) + + def close(self): + wandb.finish() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/misc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..f2bca7733278c3a4b1f145bd7e5da23683b74961 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/misc.py @@ -0,0 +1,197 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py +""" +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +from typing import List, Optional + +import torch +import torch.distributed as dist +import torchvision +from torch import Tensor +import warnings +import torch.nn.functional as F +import math + +def inverse_sigmoid(x, eps=1e-3): + x = x.clamp(min=0, max=1) + x1 = x.clamp(min=eps) + x2 = (1 - x).clamp(min=eps) + return torch.log(x1/x2) + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + # Cut & paste from PyTorch official master until it's in a few official releases - RW + # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " + "The distribution of values may be incorrect.", + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + l = norm_cdf((a - mean) / std) + u = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * l - 1, 2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + +def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): + # type: (Tensor, float, float, float, float) -> Tensor + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor: an n-dimensional `torch.Tensor` + mean: the mean of the normal distribution + std: the standard deviation of the normal distribution + a: the minimum cutoff value + b: the maximum cutoff value + Examples: + >>> w = torch.empty(3, 5) + >>> nn.init.trunc_normal_(w) + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > output_h: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + if isinstance(size, torch.Size): + size = tuple(int(x) for x in size) + return F.interpolate(input, size, scale_factor, mode, align_corners) + +def _max_by_axis(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + +class NestedTensor(object): + def __init__(self, tensors, mask: Optional[Tensor]): + self.tensors = tensors + self.mask = mask + + def to(self, device): + # type: (Device) -> NestedTensor # noqa + cast_tensor = self.tensors.to(device) + mask = self.mask + if mask is not None: + assert mask is not None + cast_mask = mask.to(device) + else: + cast_mask = None + return NestedTensor(cast_tensor, cast_mask) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): + # TODO make this more general + if tensor_list[0].ndim == 3: + if torchvision._is_tracing(): + # nested_tensor_from_tensor_list() does not export well to ONNX + # call _onnx_nested_tensor_from_tensor_list() instead + return _onnx_nested_tensor_from_tensor_list(tensor_list) + + # TODO make it support different-sized images + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + dtype = tensor_list[0].dtype + device = tensor_list[0].device + tensor = torch.zeros(batch_shape, dtype=dtype, device=device) + mask = torch.ones((b, h, w), dtype=torch.bool, device=device) + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + m[: img.shape[1], : img.shape[2]] = False + else: + raise ValueError("not supported") + return NestedTensor(tensor, mask) + + +# _onnx_nested_tensor_from_tensor_list() is an implementation of +# nested_tensor_from_tensor_list() that is supported by ONNX tracing. +@torch.jit.unused +def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: + max_size = [] + for i in range(tensor_list[0].dim()): + max_size_i = torch.max( + torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32) + ).to(torch.int64) + max_size.append(max_size_i) + max_size = tuple(max_size) + + # work around for + # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + # m[: img.shape[1], :img.shape[2]] = False + # which is not yet supported in onnx + padded_imgs = [] + padded_masks = [] + for img in tensor_list: + padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] + padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) + padded_imgs.append(padded_img) + + m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) + padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) + padded_masks.append(padded_mask.to(torch.bool)) + + tensor = torch.stack(padded_imgs) + mask = torch.stack(padded_masks) + + return NestedTensor(tensor, mask=mask) + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/pos_embed.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/pos_embed.py new file mode 100644 index 0000000000000000000000000000000000000000..aa11d60db65fa98c140e7d75bdf985ff7ece8f18 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/oneformer/utils/pos_embed.py @@ -0,0 +1,122 @@ +# -------------------------------------------------------- +# Position embedding utils +# -------------------------------------------------------- + +from typing import Tuple + +import numpy as np +import torch + + +# -------------------------------------------------------- +# 2D sine-cosine position embedding +# References: +# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py +# MoCo v3: https://github.com/facebookresearch/moco-v3 +# -------------------------------------------------------- +def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False): + """ + grid_size: int of the grid height and width + return: + pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + """ + grid_h = np.arange(grid_size, dtype=np.float32) + grid_w = np.arange(grid_size, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + + grid = grid.reshape([2, 1, grid_size, grid_size]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + if cls_token: + pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): + assert embed_dim % 2 == 0 + + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) + + emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000 ** omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product + + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + + emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) + return emb + + +# -------------------------------------------------------- +# Interpolate position embeddings for high-resolution +# References: +# DeiT: https://github.com/facebookresearch/deit +# -------------------------------------------------------- +def interpolate_pos_embed(model, checkpoint_model, pos_embed_key): + if pos_embed_key in checkpoint_model: + pos_embed_checkpoint = checkpoint_model[pos_embed_key] + embedding_size = pos_embed_checkpoint.shape[-1] + num_patches = model.num_patches + if pos_embed_key.startswith("decoder"): + num_extra_tokens = model.decoder_pos_embed.shape[-2] - num_patches + else: + num_extra_tokens = model.pos_embed.shape[-2] - num_patches + # height (== width) for the checkpoint position embedding + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + # height (== width) for the new position embedding + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + print( + "Position interpolate from %dx%d to %dx%d" + % (orig_size, orig_size, new_size, new_size) + ) + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape( + -1, orig_size, orig_size, embedding_size + ).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, + size=(new_size, new_size), + mode="bicubic", + align_corners=False, + ) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + checkpoint_model[pos_embed_key] = new_pos_embed + + +def interpolate_pos_embed_online( + pos_embed, orig_size: Tuple[int], new_size: Tuple[int], num_extra_tokens: int +): + extra_tokens = pos_embed[:, :num_extra_tokens] + pos_tokens = pos_embed[:, num_extra_tokens:] + embedding_size = pos_tokens.shape[-1] + pos_tokens = pos_tokens.reshape( + -1, orig_size[0], orig_size[1], embedding_size + ).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, size=new_size, mode="bicubic", align_corners=False, + ) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + return new_pos_embed diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3f7d85bba884ea8f83fc6ab2a1e6ade80d98d4d9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/__init__.py @@ -0,0 +1 @@ +__author__ = 'tylin' diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/coco.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1ecb6c1b00325b3073c67dd5081bd514f568ece5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/coco.py @@ -0,0 +1,444 @@ +__author__ = 'tylin' +__version__ = '2.0' +# Interface for accessing the Microsoft COCO dataset. + +# Microsoft COCO is a large image dataset designed for object detection, +# segmentation, and caption generation. annotator.oneformer.pycocotools is a Python API that +# assists in loading, parsing and visualizing the annotations in COCO. +# Please visit http://mscoco.org/ for more information on COCO, including +# for the data, paper, and tutorials. The exact format of the annotations +# is also described on the COCO website. For example usage of the annotator.oneformer.pycocotools +# please see annotator.oneformer.pycocotools_demo.ipynb. In addition to this API, please download both +# the COCO images and annotations in order to run the demo. + +# An alternative to using the API is to load the annotations directly +# into Python dictionary +# Using the API provides additional utility functions. Note that this API +# supports both *instance* and *caption* annotations. In the case of +# captions not all functions are defined (e.g. categories are undefined). + +# The following API functions are defined: +# COCO - COCO api class that loads COCO annotation file and prepare data structures. +# decodeMask - Decode binary mask M encoded via run-length encoding. +# encodeMask - Encode binary mask M using run-length encoding. +# getAnnIds - Get ann ids that satisfy given filter conditions. +# getCatIds - Get cat ids that satisfy given filter conditions. +# getImgIds - Get img ids that satisfy given filter conditions. +# loadAnns - Load anns with the specified ids. +# loadCats - Load cats with the specified ids. +# loadImgs - Load imgs with the specified ids. +# annToMask - Convert segmentation in an annotation to binary mask. +# showAnns - Display the specified annotations. +# loadRes - Load algorithm results and create API for accessing them. +# download - Download COCO images from mscoco.org server. +# Throughout the API "ann"=annotation, "cat"=category, and "img"=image. +# Help on each functions can be accessed by: "help COCO>function". + +# See also COCO>decodeMask, +# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, +# COCO>getImgIds, COCO>loadAnns, COCO>loadCats, +# COCO>loadImgs, COCO>annToMask, COCO>showAnns + +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2014. +# Licensed under the Simplified BSD License [see bsd.txt] + +import json +import time +import numpy as np +import copy +import itertools +from . import mask as maskUtils +import os +from collections import defaultdict +import sys +PYTHON_VERSION = sys.version_info[0] +if PYTHON_VERSION == 2: + from urllib import urlretrieve +elif PYTHON_VERSION == 3: + from urllib.request import urlretrieve + + +def _isArrayLike(obj): + return hasattr(obj, '__iter__') and hasattr(obj, '__len__') + + +class COCO: + def __init__(self, annotation_file=None): + """ + Constructor of Microsoft COCO helper class for reading and visualizing annotations. + :param annotation_file (str): location of annotation file + :param image_folder (str): location to the folder that hosts images. + :return: + """ + # load dataset + self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() + self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) + if not annotation_file == None: + print('loading annotations into memory...') + tic = time.time() + with open(annotation_file, 'r') as f: + dataset = json.load(f) + assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) + print('Done (t={:0.2f}s)'.format(time.time()- tic)) + self.dataset = dataset + self.createIndex() + + def createIndex(self): + # create index + print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns,catToImgs = defaultdict(list),defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + def info(self): + """ + Print information about the annotation file. + :return: + """ + for key, value in self.dataset['info'].items(): + print('{}: {}'.format(key, value)) + + def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): + """ + Get ann ids that satisfy given filter conditions. default skips that filter + :param imgIds (int array) : get anns for given imgs + catIds (int array) : get anns for given cats + areaRng (float array) : get anns for given area range (e.g. [0 inf]) + iscrowd (boolean) : get anns for given crowd label (False or True) + :return: ids (int array) : integer array of ann ids + """ + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == len(areaRng) == 0: + anns = self.dataset['annotations'] + else: + if not len(imgIds) == 0: + lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] + anns = list(itertools.chain.from_iterable(lists)) + else: + anns = self.dataset['annotations'] + anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] + anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] + if not iscrowd == None: + ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] + else: + ids = [ann['id'] for ann in anns] + return ids + + def getCatIds(self, catNms=[], supNms=[], catIds=[]): + """ + filtering parameters. default skips that filter. + :param catNms (str array) : get cats for given cat names + :param supNms (str array) : get cats for given supercategory names + :param catIds (int array) : get cats for given cat ids + :return: ids (int array) : integer array of cat ids + """ + catNms = catNms if _isArrayLike(catNms) else [catNms] + supNms = supNms if _isArrayLike(supNms) else [supNms] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(catNms) == len(supNms) == len(catIds) == 0: + cats = self.dataset['categories'] + else: + cats = self.dataset['categories'] + cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] + cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] + cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] + ids = [cat['id'] for cat in cats] + return ids + + def getImgIds(self, imgIds=[], catIds=[]): + ''' + Get img ids that satisfy given filter conditions. + :param imgIds (int array) : get imgs for given ids + :param catIds (int array) : get imgs with all given cats + :return: ids (int array) : integer array of img ids + ''' + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == 0: + ids = self.imgs.keys() + else: + ids = set(imgIds) + for i, catId in enumerate(catIds): + if i == 0 and len(ids) == 0: + ids = set(self.catToImgs[catId]) + else: + ids &= set(self.catToImgs[catId]) + return list(ids) + + def loadAnns(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying anns + :return: anns (object array) : loaded ann objects + """ + if _isArrayLike(ids): + return [self.anns[id] for id in ids] + elif type(ids) == int: + return [self.anns[ids]] + + def loadCats(self, ids=[]): + """ + Load cats with the specified ids. + :param ids (int array) : integer ids specifying cats + :return: cats (object array) : loaded cat objects + """ + if _isArrayLike(ids): + return [self.cats[id] for id in ids] + elif type(ids) == int: + return [self.cats[ids]] + + def loadImgs(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying img + :return: imgs (object array) : loaded img objects + """ + if _isArrayLike(ids): + return [self.imgs[id] for id in ids] + elif type(ids) == int: + return [self.imgs[ids]] + + def showAnns(self, anns, draw_bbox=False): + """ + Display the specified annotations. + :param anns (array of object): annotations to display + :return: None + """ + if len(anns) == 0: + return 0 + if 'segmentation' in anns[0] or 'keypoints' in anns[0]: + datasetType = 'instances' + elif 'caption' in anns[0]: + datasetType = 'captions' + else: + raise Exception('datasetType not supported') + if datasetType == 'instances': + import matplotlib.pyplot as plt + from matplotlib.collections import PatchCollection + from matplotlib.patches import Polygon + + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in anns: + c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] + if 'segmentation' in ann: + if type(ann['segmentation']) == list: + # polygon + for seg in ann['segmentation']: + poly = np.array(seg).reshape((int(len(seg)/2), 2)) + polygons.append(Polygon(poly)) + color.append(c) + else: + # mask + t = self.imgs[ann['image_id']] + if type(ann['segmentation']['counts']) == list: + rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) + else: + rle = [ann['segmentation']] + m = maskUtils.decode(rle) + img = np.ones( (m.shape[0], m.shape[1], 3) ) + if ann['iscrowd'] == 1: + color_mask = np.array([2.0,166.0,101.0])/255 + if ann['iscrowd'] == 0: + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack( (img, m*0.5) )) + if 'keypoints' in ann and type(ann['keypoints']) == list: + # turn skeleton into zero-based index + sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 + kp = np.array(ann['keypoints']) + x = kp[0::3] + y = kp[1::3] + v = kp[2::3] + for sk in sks: + if np.all(v[sk]>0): + plt.plot(x[sk],y[sk], linewidth=3, color=c) + plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) + plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) + + if draw_bbox: + [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox'] + poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]] + np_poly = np.array(poly).reshape((4,2)) + polygons.append(Polygon(np_poly)) + color.append(c) + + p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) + ax.add_collection(p) + p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) + ax.add_collection(p) + elif datasetType == 'captions': + for ann in anns: + print(ann['caption']) + + def loadRes(self, resFile): + """ + Load result file and return a result api object. + :param resFile (str) : file name of result file + :return: res (obj) : result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + print('Loading and preparing results...') + tic = time.time() + if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode): + with open(resFile) as f: + anns = json.load(f) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id+1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] + if not 'segmentation' in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2]*bb[3] + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if not 'bbox' in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1-x0)*(y1-y0) + ann['id'] = id + 1 + ann['bbox'] = [x0,y0,x1-x0,y1-y0] + print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + res.createIndex() + return res + + def download(self, tarDir = None, imgIds = [] ): + ''' + Download COCO images from mscoco.org server. + :param tarDir (str): COCO results directory name + imgIds (list): images to be downloaded + :return: + ''' + if tarDir is None: + print('Please specify target directory') + return -1 + if len(imgIds) == 0: + imgs = self.imgs.values() + else: + imgs = self.loadImgs(imgIds) + N = len(imgs) + if not os.path.exists(tarDir): + os.makedirs(tarDir) + for i, img in enumerate(imgs): + tic = time.time() + fname = os.path.join(tarDir, img['file_name']) + if not os.path.exists(fname): + urlretrieve(img['coco_url'], fname) + print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + + def loadNumpyAnnotations(self, data): + """ + Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} + :param data (numpy.ndarray) + :return: annotations (python nested list) + """ + print('Converting ndarray to lists...') + assert(type(data) == np.ndarray) + print(data.shape) + assert(data.shape[1] == 7) + N = data.shape[0] + ann = [] + for i in range(N): + if i % 1000000 == 0: + print('{}/{}'.format(i,N)) + ann += [{ + 'image_id' : int(data[i, 0]), + 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], + 'score' : data[i, 5], + 'category_id': int(data[i, 6]), + }] + return ann + + def annToRLE(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE to RLE. + :return: binary mask (numpy 2D array) + """ + t = self.imgs[ann['image_id']] + h, w = t['height'], t['width'] + segm = ann['segmentation'] + if type(segm) == list: + # polygon -- a single object might consist of multiple parts + # we merge all parts into one mask rle code + rles = maskUtils.frPyObjects(segm, h, w) + rle = maskUtils.merge(rles) + elif type(segm['counts']) == list: + # uncompressed RLE + rle = maskUtils.frPyObjects(segm, h, w) + else: + # rle + rle = ann['segmentation'] + return rle + + def annToMask(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. + :return: binary mask (numpy 2D array) + """ + rle = self.annToRLE(ann) + m = maskUtils.decode(rle) + return m diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/cocoeval.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/cocoeval.py new file mode 100644 index 0000000000000000000000000000000000000000..89c251e1652a0cfc7e8ff1bbb1024a801ed2ebe7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/cocoeval.py @@ -0,0 +1,534 @@ +__author__ = 'tsungyi' + +import numpy as np +import datetime +import time +from collections import defaultdict +from . import mask as maskUtils +import copy + +class COCOeval: + # Interface for evaluating detection on the Microsoft COCO dataset. + # + # The usage for CocoEval is as follows: + # cocoGt=..., cocoDt=... # load dataset and results + # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object + # E.params.recThrs = ...; # set parameters as desired + # E.evaluate(); # run per image evaluation + # E.accumulate(); # accumulate per image results + # E.summarize(); # display summary metrics of results + # For example usage see evalDemo.m and http://mscoco.org/. + # + # The evaluation parameters are as follows (defaults in brackets): + # imgIds - [all] N img ids to use for evaluation + # catIds - [all] K cat ids to use for evaluation + # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation + # recThrs - [0:.01:1] R=101 recall thresholds for evaluation + # areaRng - [...] A=4 object area ranges for evaluation + # maxDets - [1 10 100] M=3 thresholds on max detections per image + # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation + # Note: if useCats=0 category labels are ignored as in proposal scoring. + # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. + # + # evaluate(): evaluates detections on every image and every category and + # concats the results into the "evalImgs" with fields: + # dtIds - [1xD] id for each of the D detections (dt) + # gtIds - [1xG] id for each of the G ground truths (gt) + # dtMatches - [TxD] matching gt id at each IoU or 0 + # gtMatches - [TxG] matching dt id at each IoU or 0 + # dtScores - [1xD] confidence of each dt + # gtIgnore - [1xG] ignore flag for each gt + # dtIgnore - [TxD] ignore flag for each dt at each IoU + # + # accumulate(): accumulates the per-image, per-category evaluation + # results in "evalImgs" into the dictionary "eval" with fields: + # params - parameters used for evaluation + # date - date evaluation was performed + # counts - [T,R,K,A,M] parameter dimensions (see above) + # precision - [TxRxKxAxM] precision for every evaluation setting + # recall - [TxKxAxM] max recall for every evaluation setting + # Note: precision and recall==-1 for settings with no gt objects. + # + # See also coco, mask, pycocoDemo, pycocoEvalDemo + # + # Microsoft COCO Toolbox. version 2.0 + # Data, paper, and tutorials available at: http://mscoco.org/ + # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. + # Licensed under the Simplified BSD License [see coco/license.txt] + def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): + ''' + Initialize CocoEval using coco APIs for gt and dt + :param cocoGt: coco object with ground truth annotations + :param cocoDt: coco object with detection results + :return: None + ''' + if not iouType: + print('iouType not specified. use default iouType segm') + self.cocoGt = cocoGt # ground truth COCO API + self.cocoDt = cocoDt # detections COCO API + self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements + self.eval = {} # accumulated evaluation results + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + self.params = Params(iouType=iouType) # parameters + self._paramsEval = {} # parameters for evaluation + self.stats = [] # result summarization + self.ious = {} # ious between all gts and dts + if not cocoGt is None: + self.params.imgIds = sorted(cocoGt.getImgIds()) + self.params.catIds = sorted(cocoGt.getCatIds()) + + + def _prepare(self): + ''' + Prepare ._gts and ._dts for evaluation based on params + :return: None + ''' + def _toMask(anns, coco): + # modify ann['segmentation'] by reference + for ann in anns: + rle = coco.annToRLE(ann) + ann['segmentation'] = rle + p = self.params + if p.useCats: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + else: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) + + # convert ground truth to mask if iouType == 'segm' + if p.iouType == 'segm': + _toMask(gts, self.cocoGt) + _toMask(dts, self.cocoDt) + # set ignore flag + for gt in gts: + gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 + gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] + if p.iouType == 'keypoints': + gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + for gt in gts: + self._gts[gt['image_id'], gt['category_id']].append(gt) + for dt in dts: + self._dts[dt['image_id'], dt['category_id']].append(dt) + self.evalImgs = defaultdict(list) # per-image per-category evaluation results + self.eval = {} # accumulated evaluation results + + def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + tic = time.time() + print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if not p.useSegm is None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params=p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = {(imgId, catId): computeIoU(imgId, catId) \ + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print('DONE (t={:0.2f}s).'.format(toc-tic)) + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return [] + inds = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt=dt[0:p.maxDets[-1]] + + if p.iouType == 'segm': + g = [g['segmentation'] for g in gt] + d = [d['segmentation'] for d in dt] + elif p.iouType == 'bbox': + g = [g['bbox'] for g in gt] + d = [d['bbox'] for d in dt] + else: + raise Exception('unknown iouType for iou computation') + + # compute iou between each dt and gt region + iscrowd = [int(o['iscrowd']) for o in gt] + ious = maskUtils.iou(d,g,iscrowd) + return ious + + def computeOks(self, imgId, catId): + p = self.params + # dimention here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort([-d['score'] for d in dts], kind='mergesort') + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0:p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = p.kpt_oks_sigmas + vars = (sigmas * 2)**2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt['keypoints']) + xg = g[0::3]; yg = g[1::3]; vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt['bbox'] + x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt['keypoints']) + xd = d[0::3]; yd = d[1::3] + if k1>0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros((k)) + dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) + dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) + e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 + if k1 > 0: + e=e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] + return ious + + def evaluateImg(self, imgId, catId, aRng, maxDet): + ''' + perform evaluation for single category and image + :return: dict (single image results) + ''' + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return None + + for g in gt: + if g['ignore'] or (g['area']aRng[1]): + g['_ignore'] = 1 + else: + g['_ignore'] = 0 + + # sort dt highest score first, sort gt ignore last + gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') + gt = [gt[i] for i in gtind] + dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in dtind[0:maxDet]] + iscrowd = [int(o['iscrowd']) for o in gt] + # load computed ious + ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] + + T = len(p.iouThrs) + G = len(gt) + D = len(dt) + gtm = np.zeros((T,G)) + dtm = np.zeros((T,D)) + gtIg = np.array([g['_ignore'] for g in gt]) + dtIg = np.zeros((T,D)) + if not len(ious)==0: + for tind, t in enumerate(p.iouThrs): + for dind, d in enumerate(dt): + # information about best match so far (m=-1 -> unmatched) + iou = min([t,1-1e-10]) + m = -1 + for gind, g in enumerate(gt): + # if this gt already matched, and not a crowd, continue + if gtm[tind,gind]>0 and not iscrowd[gind]: + continue + # if dt matched to reg gt, and on ignore gt, stop + if m>-1 and gtIg[m]==0 and gtIg[gind]==1: + break + # continue to next gt unless better match made + if ious[dind,gind] < iou: + continue + # if match successful and best so far, store appropriately + iou=ious[dind,gind] + m=gind + # if match made store id of match for both dt and gt + if m ==-1: + continue + dtIg[tind,dind] = gtIg[m] + dtm[tind,dind] = gt[m]['id'] + gtm[tind,m] = d['id'] + # set unmatched detections outside of area range to ignore + a = np.array([d['area']aRng[1] for d in dt]).reshape((1, len(dt))) + dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0))) + # store results for given image and category + return { + 'image_id': imgId, + 'category_id': catId, + 'aRng': aRng, + 'maxDet': maxDet, + 'dtIds': [d['id'] for d in dt], + 'gtIds': [g['id'] for g in gt], + 'dtMatches': dtm, + 'gtMatches': gtm, + 'dtScores': [d['score'] for d in dt], + 'gtIgnore': gtIg, + 'dtIgnore': dtIg, + } + + def accumulate(self, p = None): + ''' + Accumulate per image evaluation results and store the result in self.eval + :param p: input params for evaluation + :return: None + ''' + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: + continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: + continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=float) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + q = np.zeros((R,)) + ss = np.zeros((R,)) + + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + ss[ri] = dtScoresSorted[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + scores[t,:,k,a,m] = np.array(ss) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + def summarize(self): + ''' + Compute and display summary metrics for evaluation results. + Note this functin can *only* be applied on the default parameter setting + ''' + def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): + p = self.params + iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap==1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval['precision'] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,:,aind,mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,aind,mind] + if len(s[s>-1])==0: + mean_s = -1 + else: + mean_s = np.mean(s[s>-1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) + return stats + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=.5) + stats[2] = _summarize(1, maxDets=20, iouThr=.75) + stats[3] = _summarize(1, maxDets=20, areaRng='medium') + stats[4] = _summarize(1, maxDets=20, areaRng='large') + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=.5) + stats[7] = _summarize(0, maxDets=20, iouThr=.75) + stats[8] = _summarize(0, maxDets=20, areaRng='medium') + stats[9] = _summarize(0, maxDets=20, areaRng='large') + return stats + if not self.eval: + raise Exception('Please run accumulate() first') + iouType = self.params.iouType + if iouType == 'segm' or iouType == 'bbox': + summarize = _summarizeDets + elif iouType == 'keypoints': + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() + +class Params: + ''' + Params for coco evaluation api + ''' + def setDetParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'small', 'medium', 'large'] + self.useCats = 1 + + def setKpParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'medium', 'large'] + self.useCats = 1 + self.kpt_oks_sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 + + def __init__(self, iouType='segm'): + if iouType == 'segm' or iouType == 'bbox': + self.setDetParams() + elif iouType == 'keypoints': + self.setKpParams() + else: + raise Exception('iouType not supported') + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/mask.py b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/mask.py new file mode 100644 index 0000000000000000000000000000000000000000..85a5643aadd5c3c5f02609aa918c38d6da14a929 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/oneformer/pycocotools/mask.py @@ -0,0 +1,107 @@ +__author__ = 'tsungyi' + +# import annotator.oneformer.pycocotools._mask as _mask + +# Interface for manipulating masks stored in RLE format. +# +# RLE is a simple yet efficient format for storing binary masks. RLE +# first divides a vector (or vectorized image) into a series of piecewise +# constant regions and then for each piece simply stores the length of +# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would +# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] +# (note that the odd counts are always the numbers of zeros). Instead of +# storing the counts directly, additional compression is achieved with a +# variable bitrate representation based on a common scheme called LEB128. +# +# Compression is greatest given large piecewise constant regions. +# Specifically, the size of the RLE is proportional to the number of +# *boundaries* in M (or for an image the number of boundaries in the y +# direction). Assuming fairly simple shapes, the RLE representation is +# O(sqrt(n)) where n is number of pixels in the object. Hence space usage +# is substantially lower, especially for large simple objects (large n). +# +# Many common operations on masks can be computed directly using the RLE +# (without need for decoding). This includes computations such as area, +# union, intersection, etc. All of these operations are linear in the +# size of the RLE, in other words they are O(sqrt(n)) where n is the area +# of the object. Computing these operations on the original mask is O(n). +# Thus, using the RLE can result in substantial computational savings. +# +# The following API functions are defined: +# encode - Encode binary masks using RLE. +# decode - Decode binary masks encoded via RLE. +# merge - Compute union or intersection of encoded masks. +# iou - Compute intersection over union between masks. +# area - Compute area of encoded masks. +# toBbox - Get bounding boxes surrounding encoded masks. +# frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. +# +# Usage: +# Rs = encode( masks ) +# masks = decode( Rs ) +# R = merge( Rs, intersect=false ) +# o = iou( dt, gt, iscrowd ) +# a = area( Rs ) +# bbs = toBbox( Rs ) +# Rs = frPyObjects( [pyObjects], h, w ) +# +# In the API the following formats are used: +# Rs - [dict] Run-length encoding of binary masks +# R - dict Run-length encoding of binary mask +# masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) +# iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore +# bbs - [nx4] Bounding box(es) stored as [x y w h] +# poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) +# dt,gt - May be either bounding boxes or encoded masks +# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). +# +# Finally, a note about the intersection over union (iou) computation. +# The standard iou of a ground truth (gt) and detected (dt) object is +# iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) +# For "crowd" regions, we use a modified criteria. If a gt object is +# marked as "iscrowd", we allow a dt to match any subregion of the gt. +# Choosing gt' in the crowd gt that best matches the dt can be done using +# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing +# iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) +# For crowd gt regions we use this modified criteria above for the iou. +# +# To compile run "python setup.py build_ext --inplace" +# Please do not contact us for help with compiling. +# +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +# Licensed under the Simplified BSD License [see coco/license.txt] + +# iou = _mask.iou +# merge = _mask.merge +# frPyObjects = _mask.frPyObjects + +def encode(bimask): + pass + # if len(bimask.shape) == 3: + # return _mask.encode(bimask) + # elif len(bimask.shape) == 2: + # h, w = bimask.shape + # return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] + +def decode(rleObjs): + pass + # if type(rleObjs) == list: + # return _mask.decode(rleObjs) + # else: + # return _mask.decode([rleObjs])[:,:,0] + +def area(rleObjs): + pass + # if type(rleObjs) == list: + # return _mask.area(rleObjs) + # else: + # return _mask.area([rleObjs])[0] + +def toBbox(rleObjs): + pass + # if type(rleObjs) == list: + # return _mask.toBbox(rleObjs) + # else: + # return _mask.toBbox([rleObjs])[0] \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6f60b76d35fa1012809985780964a5068adce4fd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/LICENSE @@ -0,0 +1,108 @@ +OPENPOSE: MULTIPERSON KEYPOINT DETECTION +SOFTWARE LICENSE AGREEMENT +ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY + +BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE. + +This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor. + +RESERVATION OF OWNERSHIP AND GRANT OF LICENSE: +Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive, +non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i). + +CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication. + +COPYRIGHT: The Software is owned by Licensor and is protected by United +States copyright laws and applicable international treaties and/or conventions. + +PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto. + +DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement. + +BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies. + +USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “OpenPose", "Carnegie Mellon" or any renditions thereof without the prior written permission of Licensor. + +You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software. + +ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void. + +TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below. + +The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement. + +FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement. + +DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS. + +SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement. + +EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage. + +EXPORT REGULATION: Licensee agrees to comply with any and all applicable +U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control. + +SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby. + +NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor. + +GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania. + +ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto. + + + +************************************************************************ + +THIRD-PARTY SOFTWARE NOTICES AND INFORMATION + +This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise. + +1. Caffe, version 1.0.0, (https://github.com/BVLC/caffe/) + +COPYRIGHT + +All contributions by the University of California: +Copyright (c) 2014-2017 The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014-2017, the respective contributors +All rights reserved. + +Caffe uses a shared copyright model: each contributor holds copyright over +their contributions to Caffe. The project versioning records all such +contribution and copyright details. If a contributor wants to further mark +their specific copyright on a particular contribution, they should indicate +their copyright solely in the commit message of the change when it is +committed. + +LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CONTRIBUTION AGREEMENT + +By contributing to the BVLC/caffe repository through pull-request, comment, +or otherwise, the contributor releases their content to the +license and copyright terms herein. + +************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97558b94e736981a44c87f2650854e46937896b2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/__init__.py @@ -0,0 +1,463 @@ +# Openpose +# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose +# 2nd Edited by https://github.com/Hzzone/pytorch-openpose +# 3rd Edited by ControlNet +# 4th Edited by ControlNet (added face and correct hands) +# 5th Edited by ControlNet (Improved JSON serialization/deserialization, and lots of bug fixs) +# This preprocessor is licensed by CMU for non-commercial use only. + + +import os + +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" + +import torch +import numpy as np +from . import util +from .body import Body, BodyResult, Keypoint +from .hand import Hand +from .face import Face +from .types import HandResult, FaceResult, HumanPoseResult, AnimalPoseResult +from modules import devices +from annotator.annotator_path import models_path +from .animalpose import draw_animalposes + +from typing import Tuple, List, Callable, Union, Optional + +body_model_path = ( + "https://huggingface.co/lllyasviel/Annotators/resolve/main/body_pose_model.pth" +) +hand_model_path = ( + "https://huggingface.co/lllyasviel/Annotators/resolve/main/hand_pose_model.pth" +) +face_model_path = ( + "https://huggingface.co/lllyasviel/Annotators/resolve/main/facenet.pth" +) + +remote_onnx_det = "https://huggingface.co/yzd-v/DWPose/resolve/main/yolox_l.onnx" +remote_onnx_pose = ( + "https://huggingface.co/yzd-v/DWPose/resolve/main/dw-ll_ucoco_384.onnx" +) + +animal_onnx_pose = "https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/resolve/main/Annotators/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.onnx" + + +def draw_poses( + poses: List[HumanPoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True +): + """ + Draw the detected poses on an empty canvas. + + Args: + poses (List[HumanPoseResult]): A list of HumanPoseResult objects containing the detected poses. + H (int): The height of the canvas. + W (int): The width of the canvas. + draw_body (bool, optional): Whether to draw body keypoints. Defaults to True. + draw_hand (bool, optional): Whether to draw hand keypoints. Defaults to True. + draw_face (bool, optional): Whether to draw face keypoints. Defaults to True. + + Returns: + numpy.ndarray: A 3D numpy array representing the canvas with the drawn poses. + """ + canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8) + + for pose in poses: + if draw_body: + canvas = util.draw_bodypose(canvas, pose.body.keypoints) + + if draw_hand: + canvas = util.draw_handpose(canvas, pose.left_hand) + canvas = util.draw_handpose(canvas, pose.right_hand) + + if draw_face: + canvas = util.draw_facepose(canvas, pose.face) + + return canvas + + +def decode_json_as_poses( + pose_json: dict, +) -> Tuple[List[HumanPoseResult], List[AnimalPoseResult], int, int]: + """Decode the json_string complying with the openpose JSON output format + to poses that controlnet recognizes. + https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/02_output.md + + Args: + json_string: The json string to decode. + + Returns: + human_poses + animal_poses + canvas_height + canvas_width + """ + height = pose_json["canvas_height"] + width = pose_json["canvas_width"] + + def chunks(lst, n): + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i : i + n] + + def decompress_keypoints( + numbers: Optional[List[float]], + ) -> Optional[List[Optional[Keypoint]]]: + if not numbers: + return None + + assert len(numbers) % 3 == 0 + + def create_keypoint(x, y, c): + if c < 1.0: + return None + keypoint = Keypoint(x, y) + return keypoint + + return [create_keypoint(x, y, c) for x, y, c in chunks(numbers, n=3)] + + return ( + [ + HumanPoseResult( + body=BodyResult( + keypoints=decompress_keypoints(pose.get("pose_keypoints_2d")) + ), + left_hand=decompress_keypoints(pose.get("hand_left_keypoints_2d")), + right_hand=decompress_keypoints(pose.get("hand_right_keypoints_2d")), + face=decompress_keypoints(pose.get("face_keypoints_2d")), + ) + for pose in pose_json.get("people", []) + ], + [decompress_keypoints(pose) for pose in pose_json.get("animals", [])], + height, + width, + ) + + +def encode_poses_as_json( + poses: List[HumanPoseResult], + animals: List[AnimalPoseResult], + canvas_height: int, + canvas_width: int, +) -> dict: + """Encode the pose as a JSON compatible dict following openpose JSON output format: + https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/02_output.md + """ + + def compress_keypoints( + keypoints: Union[List[Keypoint], None] + ) -> Union[List[float], None]: + if not keypoints: + return None + + return [ + value + for keypoint in keypoints + for value in ( + [float(keypoint.x), float(keypoint.y), 1.0] + if keypoint is not None + else [0.0, 0.0, 0.0] + ) + ] + + return { + "people": [ + { + "pose_keypoints_2d": compress_keypoints(pose.body.keypoints), + "face_keypoints_2d": compress_keypoints(pose.face), + "hand_left_keypoints_2d": compress_keypoints(pose.left_hand), + "hand_right_keypoints_2d": compress_keypoints(pose.right_hand), + } + for pose in poses + ], + "animals": [compress_keypoints(animal) for animal in animals], + "canvas_height": canvas_height, + "canvas_width": canvas_width, + } + + +class OpenposeDetector: + """ + A class for detecting human poses in images using the Openpose model. + + Attributes: + model_dir (str): Path to the directory where the pose models are stored. + """ + + model_dir = os.path.join(models_path, "openpose") + + def __init__(self): + self.device = devices.get_device_for("controlnet") + self.body_estimation = None + self.hand_estimation = None + self.face_estimation = None + + self.dw_pose_estimation = None + self.animal_pose_estimation = None + + def load_model(self): + """ + Load the Openpose body, hand, and face models. + """ + body_modelpath = os.path.join(self.model_dir, "body_pose_model.pth") + hand_modelpath = os.path.join(self.model_dir, "hand_pose_model.pth") + face_modelpath = os.path.join(self.model_dir, "facenet.pth") + + if not os.path.exists(body_modelpath): + from modules.modelloader import load_file_from_url + + load_file_from_url(body_model_path, model_dir=self.model_dir) + + if not os.path.exists(hand_modelpath): + from modules.modelloader import load_file_from_url + + load_file_from_url(hand_model_path, model_dir=self.model_dir) + + if not os.path.exists(face_modelpath): + from modules.modelloader import load_file_from_url + + load_file_from_url(face_model_path, model_dir=self.model_dir) + + self.body_estimation = Body(body_modelpath) + self.hand_estimation = Hand(hand_modelpath) + self.face_estimation = Face(face_modelpath) + + def load_dw_model(self): + from .wholebody import Wholebody # DW Pose + + def load_model(filename: str, remote_url: str): + local_path = os.path.join(self.model_dir, filename) + if not os.path.exists(local_path): + from modules.modelloader import load_file_from_url + + load_file_from_url(remote_url, model_dir=self.model_dir) + return local_path + + onnx_det = load_model("yolox_l.onnx", remote_onnx_det) + onnx_pose = load_model("dw-ll_ucoco_384.onnx", remote_onnx_pose) + self.dw_pose_estimation = Wholebody(onnx_det, onnx_pose) + + def load_animalpose_model(self): + from .animalpose import AnimalPose # Animalpose + + def load_model(filename: str, remote_url: str): + """ + Load the model from the specified filename and remote URL if it doesn't exist locally. + + Args: + filename (str): The filename of the model. + remote_url (str): The remote URL of the model. + """ + local_path = os.path.join(self.model_dir, filename) + if not os.path.exists(local_path): + from modules.modelloader import load_file_from_url + + load_file_from_url(remote_url, model_dir=self.model_dir) + return local_path + + onnx_det = load_model("yolox_l.onnx", remote_onnx_det) + onnx_pose = load_model( + "rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.onnx", + animal_onnx_pose, + ) + self.animal_pose_estimation = AnimalPose(onnx_det, onnx_pose) + + def unload_model(self): + """ + Unload the Openpose models by moving them to the CPU. + Note: DW Pose models always run on CPU, so no need to `unload` them. + """ + if self.body_estimation is not None: + self.body_estimation.model.to("cpu") + self.hand_estimation.model.to("cpu") + self.face_estimation.model.to("cpu") + + def detect_hands( + self, body: BodyResult, oriImg + ) -> Tuple[Union[HandResult, None], Union[HandResult, None]]: + left_hand = None + right_hand = None + H, W, _ = oriImg.shape + for x, y, w, is_left in util.handDetect(body, oriImg): + peaks = self.hand_estimation(oriImg[y : y + w, x : x + w, :]).astype( + np.float32 + ) + if peaks.ndim == 2 and peaks.shape[1] == 2: + peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float( + W + ) + peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float( + H + ) + + hand_result = [Keypoint(x=peak[0], y=peak[1]) for peak in peaks] + + if is_left: + left_hand = hand_result + else: + right_hand = hand_result + + return left_hand, right_hand + + def detect_face(self, body: BodyResult, oriImg) -> Union[FaceResult, None]: + face = util.faceDetect(body, oriImg) + if face is None: + return None + + x, y, w = face + H, W, _ = oriImg.shape + heatmaps = self.face_estimation(oriImg[y : y + w, x : x + w, :]) + peaks = self.face_estimation.compute_peaks_from_heatmaps(heatmaps).astype( + np.float32 + ) + if peaks.ndim == 2 and peaks.shape[1] == 2: + peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W) + peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H) + return [Keypoint(x=peak[0], y=peak[1]) for peak in peaks] + + return None + + def detect_poses( + self, oriImg, include_hand=False, include_face=False + ) -> List[HumanPoseResult]: + """ + Detect poses in the given image. + Args: + oriImg (numpy.ndarray): The input image for pose detection. + include_hand (bool, optional): Whether to include hand detection. Defaults to False. + include_face (bool, optional): Whether to include face detection. Defaults to False. + + Returns: + List[HumanPoseResult]: A list of HumanPoseResult objects containing the detected poses. + """ + if self.body_estimation is None: + self.load_model() + + self.body_estimation.model.to(self.device) + self.hand_estimation.model.to(self.device) + self.face_estimation.model.to(self.device) + + self.body_estimation.cn_device = self.device + self.hand_estimation.cn_device = self.device + self.face_estimation.cn_device = self.device + + oriImg = oriImg[:, :, ::-1].copy() + H, W, C = oriImg.shape + with torch.no_grad(): + candidate, subset = self.body_estimation(oriImg) + bodies = self.body_estimation.format_body_result(candidate, subset) + + results = [] + for body in bodies: + left_hand, right_hand, face = (None,) * 3 + if include_hand: + left_hand, right_hand = self.detect_hands(body, oriImg) + if include_face: + face = self.detect_face(body, oriImg) + + results.append( + HumanPoseResult( + BodyResult( + keypoints=[ + Keypoint( + x=keypoint.x / float(W), y=keypoint.y / float(H) + ) + if keypoint is not None + else None + for keypoint in body.keypoints + ], + total_score=body.total_score, + total_parts=body.total_parts, + ), + left_hand, + right_hand, + face, + ) + ) + + return results + + def detect_poses_dw(self, oriImg) -> List[HumanPoseResult]: + """ + Detect poses in the given image using DW Pose: + https://github.com/IDEA-Research/DWPose + + Args: + oriImg (numpy.ndarray): The input image for pose detection. + + Returns: + List[HumanPoseResult]: A list of HumanPoseResult objects containing the detected poses. + """ + from .wholebody import Wholebody # DW Pose + + self.load_dw_model() + + with torch.no_grad(): + keypoints_info = self.dw_pose_estimation(oriImg.copy()) + return Wholebody.format_result(keypoints_info) + + def detect_poses_animal(self, oriImg) -> List[AnimalPoseResult]: + """ + Detect poses in the given image using RTMPose AP10k model: + https://github.com/abehonest/ControlNet_AnimalPose + + Args: + oriImg (numpy.ndarray): The input image for pose detection. + + Returns: + A list of AnimalPoseResult objects containing the detected animal poses. + """ + + self.load_animalpose_model() + + with torch.no_grad(): + return self.animal_pose_estimation(oriImg.copy()) + + def __call__( + self, + oriImg, + include_body=True, + include_hand=False, + include_face=False, + use_dw_pose=False, + use_animal_pose=False, + json_pose_callback: Callable[[str], None] = None, + ): + """ + Detect and draw poses in the given image. + + Args: + oriImg (numpy.ndarray): The input image for pose detection and drawing. + include_body (bool, optional): Whether to include body keypoints. Defaults to True. + include_hand (bool, optional): Whether to include hand keypoints. Defaults to False. + include_face (bool, optional): Whether to include face keypoints. Defaults to False. + use_dw_pose (bool, optional): Whether to use DW pose detection algorithm. Defaults to False. + json_pose_callback (Callable, optional): A callback that accepts the pose JSON string. + + Returns: + numpy.ndarray: The image with detected and drawn poses. + """ + H, W, _ = oriImg.shape + animals = [] + poses = [] + if use_animal_pose: + animals = self.detect_poses_animal(oriImg) + elif use_dw_pose: + poses = self.detect_poses_dw(oriImg) + else: + poses = self.detect_poses(oriImg, include_hand, include_face) + + if json_pose_callback: + json_pose_callback(encode_poses_as_json(poses, animals, H, W)) + + if poses: + assert len(animals) == 0 + return draw_poses( + poses, + H, + W, + draw_body=include_body, + draw_hand=include_hand, + draw_face=include_face, + ) + else: + return draw_animalposes(animals, H, W) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/animalpose.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/animalpose.py new file mode 100644 index 0000000000000000000000000000000000000000..46fec4a4e1555784d73c5b5440cdce032675f406 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/animalpose.py @@ -0,0 +1,131 @@ +import cv2 +import numpy as np +from typing import List + +from .cv_ox_det import inference_detector +from .cv_ox_pose import inference_pose + +from .types import AnimalPoseResult, Keypoint + + +def draw_animalposes(animals: List[List[Keypoint]], H: int, W: int) -> np.ndarray: + canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8) + for animal_pose in animals: + canvas = draw_animalpose(canvas, animal_pose) + return canvas + + +def draw_animalpose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: + # order of the keypoints for AP10k and a standardized list of colors for limbs + keypointPairsList = [ + (1, 2), + (2, 3), + (1, 3), + (3, 4), + (4, 9), + (9, 10), + (10, 11), + (4, 6), + (6, 7), + (7, 8), + (4, 5), + (5, 15), + (15, 16), + (16, 17), + (5, 12), + (12, 13), + (13, 14), + ] + colorsList = [ + (255, 255, 255), + (100, 255, 100), + (150, 255, 255), + (100, 50, 255), + (50, 150, 200), + (0, 255, 255), + (0, 150, 0), + (0, 0, 255), + (0, 0, 150), + (255, 50, 255), + (255, 0, 255), + (255, 0, 0), + (150, 0, 0), + (255, 255, 100), + (0, 150, 0), + (255, 255, 0), + (150, 150, 150), + ] # 16 colors needed + + for ind, (i, j) in enumerate(keypointPairsList): + p1 = keypoints[i - 1] + p2 = keypoints[j - 1] + + if p1 is not None and p2 is not None: + cv2.line( + canvas, + (int(p1.x), int(p1.y)), + (int(p2.x), int(p2.y)), + colorsList[ind], + 5, + ) + return canvas + + +class AnimalPose: + def __init__( + self, + onnx_det: str, + onnx_pose: str, + ): + self.onnx_det = onnx_det + self.onnx_pose = onnx_pose + self.model_input_size = (256, 256) + + # Always loads to CPU to avoid building OpenCV. + device = 'cpu' + backend = cv2.dnn.DNN_BACKEND_OPENCV if device == 'cpu' else cv2.dnn.DNN_BACKEND_CUDA + # You need to manually build OpenCV through cmake to work with your GPU. + providers = cv2.dnn.DNN_TARGET_CPU if device == 'cpu' else cv2.dnn.DNN_TARGET_CUDA + + self.session_det = cv2.dnn.readNetFromONNX(onnx_det) + self.session_det.setPreferableBackend(backend) + self.session_det.setPreferableTarget(providers) + + self.session_pose = cv2.dnn.readNetFromONNX(onnx_pose) + self.session_pose.setPreferableBackend(backend) + self.session_pose.setPreferableTarget(providers) + + def __call__(self, oriImg) -> List[AnimalPoseResult]: + detect_classes = list( + range(14, 23 + 1) + ) # https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml + + det_result = inference_detector( + self.session_det, + oriImg, + detect_classes=detect_classes, + ) + + if (det_result is None) or (det_result.shape[0] == 0): + return [] + + keypoint_sets, scores = inference_pose( + self.session_pose, + det_result, + oriImg, + self.model_input_size, + ) + + animals = [] + for idx, keypoints in enumerate(keypoint_sets): + score = scores[idx, ..., None] + score[score > 1.0] = 1.0 + score[score < 0.0] = 0.0 + animals.append( + [ + Keypoint(x, y, c) + for x, y, c in np.concatenate((keypoints, score), axis=-1).tolist() + ] + ) + + return animals diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/body.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/body.py new file mode 100644 index 0000000000000000000000000000000000000000..d3ec1cb9169441bfd5b1ca20486ea88c8c67834f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/body.py @@ -0,0 +1,258 @@ +import cv2 +import numpy as np +import math +import time +from scipy.ndimage import gaussian_filter +import matplotlib.pyplot as plt +import matplotlib +import torch +from torchvision import transforms +from typing import NamedTuple, List, Union + +from . import util +from .model import bodypose_model +from .types import Keypoint, BodyResult + +class Body(object): + def __init__(self, model_path): + self.model = bodypose_model() + model_dict = util.transfer(self.model, torch.load(model_path)) + self.model.load_state_dict(model_dict) + self.model.eval() + + def __call__(self, oriImg): + # scale_search = [0.5, 1.0, 1.5, 2.0] + scale_search = [0.5] + boxsize = 368 + stride = 8 + padValue = 128 + thre1 = 0.1 + thre2 = 0.05 + multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] + heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19)) + paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) + + for m in range(len(multiplier)): + scale = multiplier[m] + imageToTest = util.smart_resize_k(oriImg, fx=scale, fy=scale) + imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) + im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 + im = np.ascontiguousarray(im) + + data = torch.from_numpy(im).float() + if torch.cuda.is_available(): + data = data.cuda() + # data = data.permute([2, 0, 1]).unsqueeze(0).float() + with torch.no_grad(): + data = data.to(self.cn_device) + Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data) + Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy() + Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy() + + # extract outputs, resize, and remove padding + # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps + heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps + heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = util.smart_resize(heatmap, (oriImg.shape[0], oriImg.shape[1])) + + # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs + paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs + paf = util.smart_resize_k(paf, fx=stride, fy=stride) + paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + paf = util.smart_resize(paf, (oriImg.shape[0], oriImg.shape[1])) + + heatmap_avg += heatmap_avg + heatmap / len(multiplier) + paf_avg += + paf / len(multiplier) + + all_peaks = [] + peak_counter = 0 + + for part in range(18): + map_ori = heatmap_avg[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + + map_left = np.zeros(one_heatmap.shape) + map_left[1:, :] = one_heatmap[:-1, :] + map_right = np.zeros(one_heatmap.shape) + map_right[:-1, :] = one_heatmap[1:, :] + map_up = np.zeros(one_heatmap.shape) + map_up[:, 1:] = one_heatmap[:, :-1] + map_down = np.zeros(one_heatmap.shape) + map_down[:, :-1] = one_heatmap[:, 1:] + + peaks_binary = np.logical_and.reduce( + (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1)) + peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse + peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] + peak_id = range(peak_counter, peak_counter + len(peaks)) + peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))] + + all_peaks.append(peaks_with_score_and_id) + peak_counter += len(peaks) + + # find connection in the specified sequence, center 29 is in the position 15 + limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ + [1, 16], [16, 18], [3, 17], [6, 18]] + # the middle joints heatmap correpondence + mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ + [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ + [55, 56], [37, 38], [45, 46]] + + connection_all = [] + special_k = [] + mid_num = 10 + + for k in range(len(mapIdx)): + score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] + candA = all_peaks[limbSeq[k][0] - 1] + candB = all_peaks[limbSeq[k][1] - 1] + nA = len(candA) + nB = len(candB) + indexA, indexB = limbSeq[k] + if (nA != 0 and nB != 0): + connection_candidate = [] + for i in range(nA): + for j in range(nB): + vec = np.subtract(candB[j][:2], candA[i][:2]) + norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) + norm = max(0.001, norm) + vec = np.divide(vec, norm) + + startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ + np.linspace(candA[i][1], candB[j][1], num=mid_num))) + + vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ + for I in range(len(startend))]) + vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ + for I in range(len(startend))]) + + score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) + score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( + 0.5 * oriImg.shape[0] / norm - 1, 0) + criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) + criterion2 = score_with_dist_prior > 0 + if criterion1 and criterion2: + connection_candidate.append( + [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) + + connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) + connection = np.zeros((0, 5)) + for c in range(len(connection_candidate)): + i, j, s = connection_candidate[c][0:3] + if (i not in connection[:, 3] and j not in connection[:, 4]): + connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) + if (len(connection) >= min(nA, nB)): + break + + connection_all.append(connection) + else: + special_k.append(k) + connection_all.append([]) + + # last number in each row is the total parts number of that person + # the second last number in each row is the score of the overall configuration + subset = -1 * np.ones((0, 20)) + candidate = np.array([item for sublist in all_peaks for item in sublist]) + + for k in range(len(mapIdx)): + if k not in special_k: + partAs = connection_all[k][:, 0] + partBs = connection_all[k][:, 1] + indexA, indexB = np.array(limbSeq[k]) - 1 + + for i in range(len(connection_all[k])): # = 1:size(temp,1) + found = 0 + subset_idx = [-1, -1] + for j in range(len(subset)): # 1:size(subset,1): + if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: + subset_idx[found] = j + found += 1 + + if found == 1: + j = subset_idx[0] + if subset[j][indexB] != partBs[i]: + subset[j][indexB] = partBs[i] + subset[j][-1] += 1 + subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + elif found == 2: # if found 2 and disjoint, merge them + j1, j2 = subset_idx + membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] + if len(np.nonzero(membership == 2)[0]) == 0: # merge + subset[j1][:-2] += (subset[j2][:-2] + 1) + subset[j1][-2:] += subset[j2][-2:] + subset[j1][-2] += connection_all[k][i][2] + subset = np.delete(subset, j2, 0) + else: # as like found == 1 + subset[j1][indexB] = partBs[i] + subset[j1][-1] += 1 + subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + + # if find no partA in the subset, create a new subset + elif not found and k < 17: + row = -1 * np.ones(20) + row[indexA] = partAs[i] + row[indexB] = partBs[i] + row[-1] = 2 + row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] + subset = np.vstack([subset, row]) + # delete some rows of subset which has few parts occur + deleteIdx = [] + for i in range(len(subset)): + if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: + deleteIdx.append(i) + subset = np.delete(subset, deleteIdx, axis=0) + + # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts + # candidate: x, y, score, id + return candidate, subset + + @staticmethod + def format_body_result(candidate: np.ndarray, subset: np.ndarray) -> List[BodyResult]: + """ + Format the body results from the candidate and subset arrays into a list of BodyResult objects. + + Args: + candidate (np.ndarray): An array of candidates containing the x, y coordinates, score, and id + for each body part. + subset (np.ndarray): An array of subsets containing indices to the candidate array for each + person detected. The last two columns of each row hold the total score and total parts + of the person. + + Returns: + List[BodyResult]: A list of BodyResult objects, where each object represents a person with + detected keypoints, total score, and total parts. + """ + return [ + BodyResult( + keypoints=[ + Keypoint( + x=candidate[candidate_index][0], + y=candidate[candidate_index][1], + score=candidate[candidate_index][2], + id=candidate[candidate_index][3] + ) if candidate_index != -1 else None + for candidate_index in person[:18].astype(int) + ], + total_score=person[18], + total_parts=person[19] + ) + for person in subset + ] + + +if __name__ == "__main__": + body_estimation = Body('../model/body_pose_model.pth') + + test_image = '../images/ski.jpg' + oriImg = cv2.imread(test_image) # B,G,R order + candidate, subset = body_estimation(oriImg) + bodies = body_estimation.format_body_result(candidate, subset) + + canvas = oriImg + for body in bodies: + canvas = util.draw_bodypose(canvas, body) + + plt.imshow(canvas[:, :, [2, 1, 0]]) + plt.show() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_det.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_det.py new file mode 100644 index 0000000000000000000000000000000000000000..0c210a5307bb571410efaf1bbcce43f1d213cf3c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_det.py @@ -0,0 +1,128 @@ +import cv2 +import numpy as np + +def nms(boxes, scores, nms_thr): + """Single class NMS implemented in Numpy.""" + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= nms_thr)[0] + order = order[inds + 1] + + return keep + +def multiclass_nms(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy. Class-aware version.""" + final_dets = [] + num_classes = scores.shape[1] + for cls_ind in range(num_classes): + cls_scores = scores[:, cls_ind] + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + continue + else: + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if len(keep) > 0: + cls_inds = np.ones((len(keep), 1)) * cls_ind + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1 + ) + final_dets.append(dets) + if len(final_dets) == 0: + return None + return np.concatenate(final_dets, 0) + +def demo_postprocess(outputs, img_size, p6=False): + grids = [] + expanded_strides = [] + strides = [8, 16, 32] if not p6 else [8, 16, 32, 64] + + hsizes = [img_size[0] // stride for stride in strides] + wsizes = [img_size[1] // stride for stride in strides] + + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) + + grids = np.concatenate(grids, 1) + expanded_strides = np.concatenate(expanded_strides, 1) + outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides + outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides + + return outputs + +def preprocess(img, input_size, swap=(2, 0, 1)): + if len(img.shape) == 3: + padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 + else: + padded_img = np.ones(input_size, dtype=np.uint8) * 114 + + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + return padded_img, r + +def inference_detector(session, oriImg, detect_classes=[0]): + input_shape = (640,640) + img, ratio = preprocess(oriImg, input_shape) + + input = img[None, :, :, :] + if "InferenceSession" in type(session).__name__: + input_name = session.get_inputs()[0].name + output = session.run(None, {input_name: input}) + else: + outNames = session.getUnconnectedOutLayersNames() + session.setInput(input) + output = session.forward(outNames) + + predictions = demo_postprocess(output[0], input_shape)[0] + + boxes = predictions[:, :4] + scores = predictions[:, 4:5] * predictions[:, 5:] + + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2. + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2. + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2. + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2. + boxes_xyxy /= ratio + dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1) + if dets is None: + return None + final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] + isscore = final_scores>0.3 + iscat = np.isin(final_cls_inds, detect_classes) + isbbox = [ i and j for (i, j) in zip(isscore, iscat)] + final_boxes = final_boxes[isbbox] + return final_boxes diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_pose.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_pose.py new file mode 100644 index 0000000000000000000000000000000000000000..529404ca82e49fe67b6f3a57609bd41b9022a94c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/cv_ox_pose.py @@ -0,0 +1,364 @@ +from typing import List, Tuple + +import cv2 +import numpy as np + +def preprocess( + img: np.ndarray, out_bbox, input_size: Tuple[int, int] = (192, 256) +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Do preprocessing for DWPose model inference. + + Args: + img (np.ndarray): Input image in shape. + input_size (tuple): Input image size in shape (w, h). + + Returns: + tuple: + - resized_img (np.ndarray): Preprocessed image. + - center (np.ndarray): Center of image. + - scale (np.ndarray): Scale of image. + """ + # get shape of image + img_shape = img.shape[:2] + out_img, out_center, out_scale = [], [], [] + if len(out_bbox) == 0: + out_bbox = [[0, 0, img_shape[1], img_shape[0]]] + for i in range(len(out_bbox)): + x0 = out_bbox[i][0] + y0 = out_bbox[i][1] + x1 = out_bbox[i][2] + y1 = out_bbox[i][3] + bbox = np.array([x0, y0, x1, y1]) + + # get center and scale + center, scale = bbox_xyxy2cs(bbox, padding=1.25) + + # do affine transformation + resized_img, scale = top_down_affine(input_size, scale, center, img) + + # normalize image + mean = np.array([123.675, 116.28, 103.53]) + std = np.array([58.395, 57.12, 57.375]) + resized_img = (resized_img - mean) / std + + out_img.append(resized_img) + out_center.append(center) + out_scale.append(scale) + + return out_img, out_center, out_scale + + +def inference(sess, img): + """Inference DWPose model. + + Args: + sess : ONNXRuntime session. + img : Input image in shape. + + Returns: + outputs : Output of DWPose model. + """ + all_out = [] + # build input + input = np.stack(img, axis=0).transpose(0, 3, 1, 2) + input = input.astype(np.float32) + if "InferenceSession" in type(sess).__name__: + input_name = sess.get_inputs()[0].name + all_outputs = sess.run(None, {input_name: input}) + for batch_idx in range(len(all_outputs[0])): + outputs = [all_outputs[i][batch_idx:batch_idx+1,...] for i in range(len(all_outputs))] + all_out.append(outputs) + return all_out + + for i in range(len(img)): + + input = img[i].transpose(2, 0, 1) + input = input[None, :, :, :] + + outNames = sess.getUnconnectedOutLayersNames() + sess.setInput(input) + outputs = sess.forward(outNames) + all_out.append(outputs) + + return all_out + + +def postprocess(outputs: List[np.ndarray], + model_input_size: Tuple[int, int], + center: Tuple[int, int], + scale: Tuple[int, int], + simcc_split_ratio: float = 2.0 + ) -> Tuple[np.ndarray, np.ndarray]: + """Postprocess for DWPose model output. + + Args: + outputs (np.ndarray): Output of RTMPose model. + model_input_size (tuple): RTMPose model Input image size. + center (tuple): Center of bbox in shape (x, y). + scale (tuple): Scale of bbox in shape (w, h). + simcc_split_ratio (float): Split ratio of simcc. + + Returns: + tuple: + - keypoints (np.ndarray): Rescaled keypoints. + - scores (np.ndarray): Model predict scores. + """ + all_key = [] + all_score = [] + for i in range(len(outputs)): + # use simcc to decode + simcc_x, simcc_y = outputs[i] + keypoints, scores = decode(simcc_x, simcc_y, simcc_split_ratio) + + # rescale keypoints + keypoints = keypoints / model_input_size * scale[i] + center[i] - scale[i] / 2 + all_key.append(keypoints[0]) + all_score.append(scores[0]) + + return np.array(all_key), np.array(all_score) + + +def bbox_xyxy2cs(bbox: np.ndarray, + padding: float = 1.) -> Tuple[np.ndarray, np.ndarray]: + """Transform the bbox format from (x,y,w,h) into (center, scale) + + Args: + bbox (ndarray): Bounding box(es) in shape (4,) or (n, 4), formatted + as (left, top, right, bottom) + padding (float): BBox padding factor that will be multilied to scale. + Default: 1.0 + + Returns: + tuple: A tuple containing center and scale. + - np.ndarray[float32]: Center (x, y) of the bbox in shape (2,) or + (n, 2) + - np.ndarray[float32]: Scale (w, h) of the bbox in shape (2,) or + (n, 2) + """ + # convert single bbox from (4, ) to (1, 4) + dim = bbox.ndim + if dim == 1: + bbox = bbox[None, :] + + # get bbox center and scale + x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3]) + center = np.hstack([x1 + x2, y1 + y2]) * 0.5 + scale = np.hstack([x2 - x1, y2 - y1]) * padding + + if dim == 1: + center = center[0] + scale = scale[0] + + return center, scale + + +def _fix_aspect_ratio(bbox_scale: np.ndarray, + aspect_ratio: float) -> np.ndarray: + """Extend the scale to match the given aspect ratio. + + Args: + scale (np.ndarray): The image scale (w, h) in shape (2, ) + aspect_ratio (float): The ratio of ``w/h`` + + Returns: + np.ndarray: The reshaped image scale in (2, ) + """ + w, h = np.hsplit(bbox_scale, [1]) + bbox_scale = np.where(w > h * aspect_ratio, + np.hstack([w, w / aspect_ratio]), + np.hstack([h * aspect_ratio, h])) + return bbox_scale + + +def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray: + """Rotate a point by an angle. + + Args: + pt (np.ndarray): 2D point coordinates (x, y) in shape (2, ) + angle_rad (float): rotation angle in radian + + Returns: + np.ndarray: Rotated point in shape (2, ) + """ + sn, cs = np.sin(angle_rad), np.cos(angle_rad) + rot_mat = np.array([[cs, -sn], [sn, cs]]) + return rot_mat @ pt + + +def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """To calculate the affine matrix, three pairs of points are required. This + function is used to get the 3rd point, given 2D points a & b. + + The 3rd point is defined by rotating vector `a - b` by 90 degrees + anticlockwise, using b as the rotation center. + + Args: + a (np.ndarray): The 1st point (x,y) in shape (2, ) + b (np.ndarray): The 2nd point (x,y) in shape (2, ) + + Returns: + np.ndarray: The 3rd point. + """ + direction = a - b + c = b + np.r_[-direction[1], direction[0]] + return c + + +def get_warp_matrix(center: np.ndarray, + scale: np.ndarray, + rot: float, + output_size: Tuple[int, int], + shift: Tuple[float, float] = (0., 0.), + inv: bool = False) -> np.ndarray: + """Calculate the affine transformation matrix that can warp the bbox area + in the input image to the output size. + + Args: + center (np.ndarray[2, ]): Center of the bounding box (x, y). + scale (np.ndarray[2, ]): Scale of the bounding box + wrt [width, height]. + rot (float): Rotation angle (degree). + output_size (np.ndarray[2, ] | list(2,)): Size of the + destination heatmaps. + shift (0-100%): Shift translation ratio wrt the width/height. + Default (0., 0.). + inv (bool): Option to inverse the affine transform direction. + (inv=False: src->dst or inv=True: dst->src) + + Returns: + np.ndarray: A 2x3 transformation matrix + """ + shift = np.array(shift) + src_w = scale[0] + dst_w = output_size[0] + dst_h = output_size[1] + + # compute transformation matrix + rot_rad = np.deg2rad(rot) + src_dir = _rotate_point(np.array([0., src_w * -0.5]), rot_rad) + dst_dir = np.array([0., dst_w * -0.5]) + + # get four corners of the src rectangle in the original image + src = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale * shift + src[1, :] = center + src_dir + scale * shift + src[2, :] = _get_3rd_point(src[0, :], src[1, :]) + + # get four corners of the dst rectangle in the input image + dst = np.zeros((3, 2), dtype=np.float32) + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir + dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + warp_mat = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + warp_mat = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return warp_mat + + +def top_down_affine(input_size: dict, bbox_scale: dict, bbox_center: dict, + img: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Get the bbox image as the model input by affine transform. + + Args: + input_size (dict): The input size of the model. + bbox_scale (dict): The bbox scale of the img. + bbox_center (dict): The bbox center of the img. + img (np.ndarray): The original image. + + Returns: + tuple: A tuple containing center and scale. + - np.ndarray[float32]: img after affine transform. + - np.ndarray[float32]: bbox scale after affine transform. + """ + w, h = input_size + warp_size = (int(w), int(h)) + + # reshape bbox to fixed aspect ratio + bbox_scale = _fix_aspect_ratio(bbox_scale, aspect_ratio=w / h) + + # get the affine matrix + center = bbox_center + scale = bbox_scale + rot = 0 + warp_mat = get_warp_matrix(center, scale, rot, output_size=(w, h)) + + # do affine transform + img = cv2.warpAffine(img, warp_mat, warp_size, flags=cv2.INTER_LINEAR) + + return img, bbox_scale + + +def get_simcc_maximum(simcc_x: np.ndarray, + simcc_y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Get maximum response location and value from simcc representations. + + Note: + instance number: N + num_keypoints: K + heatmap height: H + heatmap width: W + + Args: + simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx) + simcc_y (np.ndarray): y-axis SimCC in shape (K, Wy) or (N, K, Wy) + + Returns: + tuple: + - locs (np.ndarray): locations of maximum heatmap responses in shape + (K, 2) or (N, K, 2) + - vals (np.ndarray): values of maximum heatmap responses in shape + (K,) or (N, K) + """ + N, K, Wx = simcc_x.shape + simcc_x = simcc_x.reshape(N * K, -1) + simcc_y = simcc_y.reshape(N * K, -1) + + # get maximum value locations + x_locs = np.argmax(simcc_x, axis=1) + y_locs = np.argmax(simcc_y, axis=1) + locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32) + max_val_x = np.amax(simcc_x, axis=1) + max_val_y = np.amax(simcc_y, axis=1) + + # get maximum value across x and y axis + mask = max_val_x > max_val_y + max_val_x[mask] = max_val_y[mask] + vals = max_val_x + locs[vals <= 0.] = -1 + + # reshape + locs = locs.reshape(N, K, 2) + vals = vals.reshape(N, K) + + return locs, vals + + +def decode(simcc_x: np.ndarray, simcc_y: np.ndarray, + simcc_split_ratio) -> Tuple[np.ndarray, np.ndarray]: + """Modulate simcc distribution with Gaussian. + + Args: + simcc_x (np.ndarray[K, Wx]): model predicted simcc in x. + simcc_y (np.ndarray[K, Wy]): model predicted simcc in y. + simcc_split_ratio (int): The split ratio of simcc. + + Returns: + tuple: A tuple containing center and scale. + - np.ndarray[float32]: keypoints in shape (K, 2) or (n, K, 2) + - np.ndarray[float32]: scores in shape (K,) or (n, K) + """ + keypoints, scores = get_simcc_maximum(simcc_x, simcc_y) + keypoints /= simcc_split_ratio + + return keypoints, scores + + +def inference_pose(session, out_bbox, oriImg, model_input_size: Tuple[int, int]= (288, 384) ): + resized_img, center, scale = preprocess(oriImg, out_bbox, model_input_size) + outputs = inference(session, resized_img) + keypoints, scores = postprocess(outputs, model_input_size, center, scale) + + return keypoints, scores \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/face.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/face.py new file mode 100644 index 0000000000000000000000000000000000000000..f3c46d77664aa9fa91c63785a1485a396f05cacc --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/face.py @@ -0,0 +1,362 @@ +import logging +import numpy as np +from torchvision.transforms import ToTensor, ToPILImage +import torch +import torch.nn.functional as F +import cv2 + +from . import util +from torch.nn import Conv2d, Module, ReLU, MaxPool2d, init + + +class FaceNet(Module): + """Model the cascading heatmaps. """ + def __init__(self): + super(FaceNet, self).__init__() + # cnn to make feature map + self.relu = ReLU() + self.max_pooling_2d = MaxPool2d(kernel_size=2, stride=2) + self.conv1_1 = Conv2d(in_channels=3, out_channels=64, + kernel_size=3, stride=1, padding=1) + self.conv1_2 = Conv2d( + in_channels=64, out_channels=64, kernel_size=3, stride=1, + padding=1) + self.conv2_1 = Conv2d( + in_channels=64, out_channels=128, kernel_size=3, stride=1, + padding=1) + self.conv2_2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=3, stride=1, + padding=1) + self.conv3_1 = Conv2d( + in_channels=128, out_channels=256, kernel_size=3, stride=1, + padding=1) + self.conv3_2 = Conv2d( + in_channels=256, out_channels=256, kernel_size=3, stride=1, + padding=1) + self.conv3_3 = Conv2d( + in_channels=256, out_channels=256, kernel_size=3, stride=1, + padding=1) + self.conv3_4 = Conv2d( + in_channels=256, out_channels=256, kernel_size=3, stride=1, + padding=1) + self.conv4_1 = Conv2d( + in_channels=256, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv4_2 = Conv2d( + in_channels=512, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv4_3 = Conv2d( + in_channels=512, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv4_4 = Conv2d( + in_channels=512, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv5_1 = Conv2d( + in_channels=512, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv5_2 = Conv2d( + in_channels=512, out_channels=512, kernel_size=3, stride=1, + padding=1) + self.conv5_3_CPM = Conv2d( + in_channels=512, out_channels=128, kernel_size=3, stride=1, + padding=1) + + # stage1 + self.conv6_1_CPM = Conv2d( + in_channels=128, out_channels=512, kernel_size=1, stride=1, + padding=0) + self.conv6_2_CPM = Conv2d( + in_channels=512, out_channels=71, kernel_size=1, stride=1, + padding=0) + + # stage2 + self.Mconv1_stage2 = Conv2d( + in_channels=199, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv2_stage2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv3_stage2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv4_stage2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv5_stage2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv6_stage2 = Conv2d( + in_channels=128, out_channels=128, kernel_size=1, stride=1, + padding=0) + self.Mconv7_stage2 = Conv2d( + in_channels=128, out_channels=71, kernel_size=1, stride=1, + padding=0) + + # stage3 + self.Mconv1_stage3 = Conv2d( + in_channels=199, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv2_stage3 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv3_stage3 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv4_stage3 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv5_stage3 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv6_stage3 = Conv2d( + in_channels=128, out_channels=128, kernel_size=1, stride=1, + padding=0) + self.Mconv7_stage3 = Conv2d( + in_channels=128, out_channels=71, kernel_size=1, stride=1, + padding=0) + + # stage4 + self.Mconv1_stage4 = Conv2d( + in_channels=199, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv2_stage4 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv3_stage4 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv4_stage4 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv5_stage4 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv6_stage4 = Conv2d( + in_channels=128, out_channels=128, kernel_size=1, stride=1, + padding=0) + self.Mconv7_stage4 = Conv2d( + in_channels=128, out_channels=71, kernel_size=1, stride=1, + padding=0) + + # stage5 + self.Mconv1_stage5 = Conv2d( + in_channels=199, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv2_stage5 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv3_stage5 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv4_stage5 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv5_stage5 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv6_stage5 = Conv2d( + in_channels=128, out_channels=128, kernel_size=1, stride=1, + padding=0) + self.Mconv7_stage5 = Conv2d( + in_channels=128, out_channels=71, kernel_size=1, stride=1, + padding=0) + + # stage6 + self.Mconv1_stage6 = Conv2d( + in_channels=199, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv2_stage6 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv3_stage6 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv4_stage6 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv5_stage6 = Conv2d( + in_channels=128, out_channels=128, kernel_size=7, stride=1, + padding=3) + self.Mconv6_stage6 = Conv2d( + in_channels=128, out_channels=128, kernel_size=1, stride=1, + padding=0) + self.Mconv7_stage6 = Conv2d( + in_channels=128, out_channels=71, kernel_size=1, stride=1, + padding=0) + + for m in self.modules(): + if isinstance(m, Conv2d): + init.constant_(m.bias, 0) + + def forward(self, x): + """Return a list of heatmaps.""" + heatmaps = [] + + h = self.relu(self.conv1_1(x)) + h = self.relu(self.conv1_2(h)) + h = self.max_pooling_2d(h) + h = self.relu(self.conv2_1(h)) + h = self.relu(self.conv2_2(h)) + h = self.max_pooling_2d(h) + h = self.relu(self.conv3_1(h)) + h = self.relu(self.conv3_2(h)) + h = self.relu(self.conv3_3(h)) + h = self.relu(self.conv3_4(h)) + h = self.max_pooling_2d(h) + h = self.relu(self.conv4_1(h)) + h = self.relu(self.conv4_2(h)) + h = self.relu(self.conv4_3(h)) + h = self.relu(self.conv4_4(h)) + h = self.relu(self.conv5_1(h)) + h = self.relu(self.conv5_2(h)) + h = self.relu(self.conv5_3_CPM(h)) + feature_map = h + + # stage1 + h = self.relu(self.conv6_1_CPM(h)) + h = self.conv6_2_CPM(h) + heatmaps.append(h) + + # stage2 + h = torch.cat([h, feature_map], dim=1) # channel concat + h = self.relu(self.Mconv1_stage2(h)) + h = self.relu(self.Mconv2_stage2(h)) + h = self.relu(self.Mconv3_stage2(h)) + h = self.relu(self.Mconv4_stage2(h)) + h = self.relu(self.Mconv5_stage2(h)) + h = self.relu(self.Mconv6_stage2(h)) + h = self.Mconv7_stage2(h) + heatmaps.append(h) + + # stage3 + h = torch.cat([h, feature_map], dim=1) # channel concat + h = self.relu(self.Mconv1_stage3(h)) + h = self.relu(self.Mconv2_stage3(h)) + h = self.relu(self.Mconv3_stage3(h)) + h = self.relu(self.Mconv4_stage3(h)) + h = self.relu(self.Mconv5_stage3(h)) + h = self.relu(self.Mconv6_stage3(h)) + h = self.Mconv7_stage3(h) + heatmaps.append(h) + + # stage4 + h = torch.cat([h, feature_map], dim=1) # channel concat + h = self.relu(self.Mconv1_stage4(h)) + h = self.relu(self.Mconv2_stage4(h)) + h = self.relu(self.Mconv3_stage4(h)) + h = self.relu(self.Mconv4_stage4(h)) + h = self.relu(self.Mconv5_stage4(h)) + h = self.relu(self.Mconv6_stage4(h)) + h = self.Mconv7_stage4(h) + heatmaps.append(h) + + # stage5 + h = torch.cat([h, feature_map], dim=1) # channel concat + h = self.relu(self.Mconv1_stage5(h)) + h = self.relu(self.Mconv2_stage5(h)) + h = self.relu(self.Mconv3_stage5(h)) + h = self.relu(self.Mconv4_stage5(h)) + h = self.relu(self.Mconv5_stage5(h)) + h = self.relu(self.Mconv6_stage5(h)) + h = self.Mconv7_stage5(h) + heatmaps.append(h) + + # stage6 + h = torch.cat([h, feature_map], dim=1) # channel concat + h = self.relu(self.Mconv1_stage6(h)) + h = self.relu(self.Mconv2_stage6(h)) + h = self.relu(self.Mconv3_stage6(h)) + h = self.relu(self.Mconv4_stage6(h)) + h = self.relu(self.Mconv5_stage6(h)) + h = self.relu(self.Mconv6_stage6(h)) + h = self.Mconv7_stage6(h) + heatmaps.append(h) + + return heatmaps + + +LOG = logging.getLogger(__name__) +TOTEN = ToTensor() +TOPIL = ToPILImage() + + +params = { + 'gaussian_sigma': 2.5, + 'inference_img_size': 736, # 368, 736, 1312 + 'heatmap_peak_thresh': 0.1, + 'crop_scale': 1.5, + 'line_indices': [ + [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], + [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13], + [13, 14], [14, 15], [15, 16], + [17, 18], [18, 19], [19, 20], [20, 21], + [22, 23], [23, 24], [24, 25], [25, 26], + [27, 28], [28, 29], [29, 30], + [31, 32], [32, 33], [33, 34], [34, 35], + [36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [41, 36], + [42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [47, 42], + [48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54], + [54, 55], [55, 56], [56, 57], [57, 58], [58, 59], [59, 48], + [60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66], + [66, 67], [67, 60] + ], +} + + +class Face(object): + """ + The OpenPose face landmark detector model. + + Args: + inference_size: set the size of the inference image size, suggested: + 368, 736, 1312, default 736 + gaussian_sigma: blur the heatmaps, default 2.5 + heatmap_peak_thresh: return landmark if over threshold, default 0.1 + + """ + def __init__(self, face_model_path, + inference_size=None, + gaussian_sigma=None, + heatmap_peak_thresh=None): + self.inference_size = inference_size or params["inference_img_size"] + self.sigma = gaussian_sigma or params['gaussian_sigma'] + self.threshold = heatmap_peak_thresh or params["heatmap_peak_thresh"] + self.model = FaceNet() + self.model.load_state_dict(torch.load(face_model_path)) + # if torch.cuda.is_available(): + # self.model = self.model.cuda() + # print('cuda') + self.model.eval() + + def __call__(self, face_img): + H, W, C = face_img.shape + + w_size = 384 + x_data = torch.from_numpy(util.smart_resize(face_img, (w_size, w_size))).permute([2, 0, 1]) / 256.0 - 0.5 + + x_data = x_data.to(self.cn_device) + + with torch.no_grad(): + hs = self.model(x_data[None, ...]) + heatmaps = F.interpolate( + hs[-1], + (H, W), + mode='bilinear', align_corners=True).cpu().numpy()[0] + return heatmaps + + def compute_peaks_from_heatmaps(self, heatmaps): + all_peaks = [] + for part in range(heatmaps.shape[0]): + map_ori = heatmaps[part].copy() + binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8) + + if np.sum(binary) == 0: + continue + + positions = np.where(binary > 0.5) + intensities = map_ori[positions] + mi = np.argmax(intensities) + y, x = positions[0][mi], positions[1][mi] + all_peaks.append([x, y]) + + return np.array(all_peaks) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/hand.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/hand.py new file mode 100644 index 0000000000000000000000000000000000000000..03a218d02ac0021e587c1f2666dbdfea439a051c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/hand.py @@ -0,0 +1,94 @@ +import cv2 +import json +import numpy as np +import math +import time +from scipy.ndimage import gaussian_filter +import matplotlib.pyplot as plt +import matplotlib +import torch +from skimage.measure import label + +from .model import handpose_model +from . import util + +class Hand(object): + def __init__(self, model_path): + self.model = handpose_model() + # if torch.cuda.is_available(): + # self.model = self.model.cuda() + # print('cuda') + model_dict = util.transfer(self.model, torch.load(model_path)) + self.model.load_state_dict(model_dict) + self.model.eval() + + def __call__(self, oriImgRaw): + scale_search = [0.5, 1.0, 1.5, 2.0] + # scale_search = [0.5] + boxsize = 368 + stride = 8 + padValue = 128 + thre = 0.05 + multiplier = [x * boxsize for x in scale_search] + + wsize = 128 + heatmap_avg = np.zeros((wsize, wsize, 22)) + + Hr, Wr, Cr = oriImgRaw.shape + + oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8) + + for m in range(len(multiplier)): + scale = multiplier[m] + imageToTest = util.smart_resize(oriImg, (scale, scale)) + + imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) + im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 + im = np.ascontiguousarray(im) + + data = torch.from_numpy(im).float() + if torch.cuda.is_available(): + data = data.cuda() + + with torch.no_grad(): + data = data.to(self.cn_device) + output = self.model(data).cpu().numpy() + + # extract outputs, resize, and remove padding + heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps + heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = util.smart_resize(heatmap, (wsize, wsize)) + + heatmap_avg += heatmap / len(multiplier) + + all_peaks = [] + for part in range(21): + map_ori = heatmap_avg[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8) + + if np.sum(binary) == 0: + all_peaks.append([0, 0]) + continue + label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) + max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 + label_img[label_img != max_index] = 0 + map_ori[label_img == 0] = 0 + + y, x = util.npmax(map_ori) + y = int(float(y) * float(Hr) / float(wsize)) + x = int(float(x) * float(Wr) / float(wsize)) + all_peaks.append([x, y]) + return np.array(all_peaks) + +if __name__ == "__main__": + hand_estimation = Hand('../model/hand_pose_model.pth') + + # test_image = '../images/hand.jpg' + test_image = '../images/hand.jpg' + oriImg = cv2.imread(test_image) # B,G,R order + peaks = hand_estimation(oriImg) + canvas = util.draw_handpose(oriImg, peaks, True) + cv2.imshow('', canvas) + cv2.waitKey(0) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/model.py new file mode 100644 index 0000000000000000000000000000000000000000..72dc79ad857933a7c108d21494d6395572b816e6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/model.py @@ -0,0 +1,218 @@ +import torch +from collections import OrderedDict + +import torch +import torch.nn as nn + +def make_layers(block, no_relu_layers): + layers = [] + for layer_name, v in block.items(): + if 'pool' in layer_name: + layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], + padding=v[2]) + layers.append((layer_name, layer)) + else: + conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], + kernel_size=v[2], stride=v[3], + padding=v[4]) + layers.append((layer_name, conv2d)) + if layer_name not in no_relu_layers: + layers.append(('relu_'+layer_name, nn.ReLU(inplace=True))) + + return nn.Sequential(OrderedDict(layers)) + +class bodypose_model(nn.Module): + def __init__(self): + super(bodypose_model, self).__init__() + + # these layers have no relu layer + no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\ + 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\ + 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\ + 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'] + blocks = {} + block0 = OrderedDict([ + ('conv1_1', [3, 64, 3, 1, 1]), + ('conv1_2', [64, 64, 3, 1, 1]), + ('pool1_stage1', [2, 2, 0]), + ('conv2_1', [64, 128, 3, 1, 1]), + ('conv2_2', [128, 128, 3, 1, 1]), + ('pool2_stage1', [2, 2, 0]), + ('conv3_1', [128, 256, 3, 1, 1]), + ('conv3_2', [256, 256, 3, 1, 1]), + ('conv3_3', [256, 256, 3, 1, 1]), + ('conv3_4', [256, 256, 3, 1, 1]), + ('pool3_stage1', [2, 2, 0]), + ('conv4_1', [256, 512, 3, 1, 1]), + ('conv4_2', [512, 512, 3, 1, 1]), + ('conv4_3_CPM', [512, 256, 3, 1, 1]), + ('conv4_4_CPM', [256, 128, 3, 1, 1]) + ]) + + + # Stage 1 + block1_1 = OrderedDict([ + ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L1', [512, 38, 1, 1, 0]) + ]) + + block1_2 = OrderedDict([ + ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L2', [512, 19, 1, 1, 0]) + ]) + blocks['block1_1'] = block1_1 + blocks['block1_2'] = block1_2 + + self.model0 = make_layers(block0, no_relu_layers) + + # Stages 2 - 6 + for i in range(2, 7): + blocks['block%d_1' % i] = OrderedDict([ + ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0]) + ]) + + blocks['block%d_2' % i] = OrderedDict([ + ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0]) + ]) + + for k in blocks.keys(): + blocks[k] = make_layers(blocks[k], no_relu_layers) + + self.model1_1 = blocks['block1_1'] + self.model2_1 = blocks['block2_1'] + self.model3_1 = blocks['block3_1'] + self.model4_1 = blocks['block4_1'] + self.model5_1 = blocks['block5_1'] + self.model6_1 = blocks['block6_1'] + + self.model1_2 = blocks['block1_2'] + self.model2_2 = blocks['block2_2'] + self.model3_2 = blocks['block3_2'] + self.model4_2 = blocks['block4_2'] + self.model5_2 = blocks['block5_2'] + self.model6_2 = blocks['block6_2'] + + + def forward(self, x): + + out1 = self.model0(x) + + out1_1 = self.model1_1(out1) + out1_2 = self.model1_2(out1) + out2 = torch.cat([out1_1, out1_2, out1], 1) + + out2_1 = self.model2_1(out2) + out2_2 = self.model2_2(out2) + out3 = torch.cat([out2_1, out2_2, out1], 1) + + out3_1 = self.model3_1(out3) + out3_2 = self.model3_2(out3) + out4 = torch.cat([out3_1, out3_2, out1], 1) + + out4_1 = self.model4_1(out4) + out4_2 = self.model4_2(out4) + out5 = torch.cat([out4_1, out4_2, out1], 1) + + out5_1 = self.model5_1(out5) + out5_2 = self.model5_2(out5) + out6 = torch.cat([out5_1, out5_2, out1], 1) + + out6_1 = self.model6_1(out6) + out6_2 = self.model6_2(out6) + + return out6_1, out6_2 + +class handpose_model(nn.Module): + def __init__(self): + super(handpose_model, self).__init__() + + # these layers have no relu layer + no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\ + 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'] + # stage 1 + block1_0 = OrderedDict([ + ('conv1_1', [3, 64, 3, 1, 1]), + ('conv1_2', [64, 64, 3, 1, 1]), + ('pool1_stage1', [2, 2, 0]), + ('conv2_1', [64, 128, 3, 1, 1]), + ('conv2_2', [128, 128, 3, 1, 1]), + ('pool2_stage1', [2, 2, 0]), + ('conv3_1', [128, 256, 3, 1, 1]), + ('conv3_2', [256, 256, 3, 1, 1]), + ('conv3_3', [256, 256, 3, 1, 1]), + ('conv3_4', [256, 256, 3, 1, 1]), + ('pool3_stage1', [2, 2, 0]), + ('conv4_1', [256, 512, 3, 1, 1]), + ('conv4_2', [512, 512, 3, 1, 1]), + ('conv4_3', [512, 512, 3, 1, 1]), + ('conv4_4', [512, 512, 3, 1, 1]), + ('conv5_1', [512, 512, 3, 1, 1]), + ('conv5_2', [512, 512, 3, 1, 1]), + ('conv5_3_CPM', [512, 128, 3, 1, 1]) + ]) + + block1_1 = OrderedDict([ + ('conv6_1_CPM', [128, 512, 1, 1, 0]), + ('conv6_2_CPM', [512, 22, 1, 1, 0]) + ]) + + blocks = {} + blocks['block1_0'] = block1_0 + blocks['block1_1'] = block1_1 + + # stage 2-6 + for i in range(2, 7): + blocks['block%d' % i] = OrderedDict([ + ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]), + ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0]) + ]) + + for k in blocks.keys(): + blocks[k] = make_layers(blocks[k], no_relu_layers) + + self.model1_0 = blocks['block1_0'] + self.model1_1 = blocks['block1_1'] + self.model2 = blocks['block2'] + self.model3 = blocks['block3'] + self.model4 = blocks['block4'] + self.model5 = blocks['block5'] + self.model6 = blocks['block6'] + + def forward(self, x): + out1_0 = self.model1_0(x) + out1_1 = self.model1_1(out1_0) + concat_stage2 = torch.cat([out1_1, out1_0], 1) + out_stage2 = self.model2(concat_stage2) + concat_stage3 = torch.cat([out_stage2, out1_0], 1) + out_stage3 = self.model3(concat_stage3) + concat_stage4 = torch.cat([out_stage3, out1_0], 1) + out_stage4 = self.model4(concat_stage4) + concat_stage5 = torch.cat([out_stage4, out1_0], 1) + out_stage5 = self.model5(concat_stage5) + concat_stage6 = torch.cat([out_stage5, out1_0], 1) + out_stage6 = self.model6(concat_stage6) + return out_stage6 + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/types.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/types.py new file mode 100644 index 0000000000000000000000000000000000000000..3136612f8535517de5acf053d9f5851d29bbcdba --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/types.py @@ -0,0 +1,31 @@ +from typing import NamedTuple, List, Optional, Union + + +class Keypoint(NamedTuple): + x: float + y: float + score: float = 1.0 + id: int = -1 + + +class BodyResult(NamedTuple): + # Note: Using `Optional` instead of `|` operator as the ladder is a Python + # 3.10 feature. + # Annotator code should be Python 3.8 Compatible, as controlnet repo uses + # Python 3.8 environment. + # https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6 + keypoints: List[Optional[Keypoint]] + total_score: float = 0.0 + total_parts: int = 0 + + +HandResult = List[Keypoint] +FaceResult = List[Keypoint] +AnimalPoseResult = List[Keypoint] + + +class HumanPoseResult(NamedTuple): + body: BodyResult + left_hand: Optional[HandResult] + right_hand: Optional[HandResult] + face: Optional[FaceResult] diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/util.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/util.py new file mode 100644 index 0000000000000000000000000000000000000000..00a88084ef0e30ade2baab935d5c66c59be1c0be --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/util.py @@ -0,0 +1,411 @@ +import math +import numpy as np +import matplotlib +import cv2 +from typing import List, Tuple, Union, Optional + +from .body import BodyResult, Keypoint + +eps = 0.01 + + +def smart_resize(x, s): + Ht, Wt = s + if x.ndim == 2: + Ho, Wo = x.shape + Co = 1 + else: + Ho, Wo, Co = x.shape + if Co == 3 or Co == 1: + k = float(Ht + Wt) / float(Ho + Wo) + return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4) + else: + return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2) + + +def smart_resize_k(x, fx, fy): + if x.ndim == 2: + Ho, Wo = x.shape + Co = 1 + else: + Ho, Wo, Co = x.shape + Ht, Wt = Ho * fy, Wo * fx + if Co == 3 or Co == 1: + k = float(Ht + Wt) / float(Ho + Wo) + return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4) + else: + return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2) + + +def padRightDownCorner(img, stride, padValue): + h = img.shape[0] + w = img.shape[1] + + pad = 4 * [None] + pad[0] = 0 # up + pad[1] = 0 # left + pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down + pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + + +def transfer(model, model_weights): + transfered_model_weights = {} + for weights_name in model.state_dict().keys(): + transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])] + return transfered_model_weights + + +def is_normalized(keypoints: List[Optional[Keypoint]]) -> bool: + point_normalized = [ + 0 <= abs(k.x) <= 1 and 0 <= abs(k.y) <= 1 + for k in keypoints + if k is not None + ] + if not point_normalized: + return False + return all(point_normalized) + + +def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray: + """ + Draw keypoints and limbs representing body pose on a given canvas. + + Args: + canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose. + keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn. + + Returns: + np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose. + + Note: + The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1. + """ + if not is_normalized(keypoints): + H, W = 1.0, 1.0 + else: + H, W, _ = canvas.shape + + stickwidth = 4 + + limbSeq = [ + [2, 3], [2, 6], [3, 4], [4, 5], + [6, 7], [7, 8], [2, 9], [9, 10], + [10, 11], [2, 12], [12, 13], [13, 14], + [2, 1], [1, 15], [15, 17], [1, 16], + [16, 18], + ] + + colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ + [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ + [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] + + for (k1_index, k2_index), color in zip(limbSeq, colors): + keypoint1 = keypoints[k1_index - 1] + keypoint2 = keypoints[k2_index - 1] + + if keypoint1 is None or keypoint2 is None: + continue + + Y = np.array([keypoint1.x, keypoint2.x]) * float(W) + X = np.array([keypoint1.y, keypoint2.y]) * float(H) + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color]) + + for keypoint, color in zip(keypoints, colors): + if keypoint is None: + continue + + x, y = keypoint.x, keypoint.y + x = int(x * W) + y = int(y * H) + cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1) + + return canvas + + +def draw_handpose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray: + """ + Draw keypoints and connections representing hand pose on a given canvas. + + Args: + canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose. + keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn + or None if no keypoints are present. + + Returns: + np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose. + + Note: + The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1. + """ + if not keypoints: + return canvas + + if not is_normalized(keypoints): + H, W = 1.0, 1.0 + else: + H, W, _ = canvas.shape + + edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ + [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + + for ie, (e1, e2) in enumerate(edges): + k1 = keypoints[e1] + k2 = keypoints[e2] + if k1 is None or k2 is None: + continue + + x1 = int(k1.x * W) + y1 = int(k1.y * H) + x2 = int(k2.x * W) + y2 = int(k2.y * H) + if x1 > eps and y1 > eps and x2 > eps and y2 > eps: + cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2) + + for keypoint in keypoints: + if keypoint is None: + continue + + x, y = keypoint.x, keypoint.y + x = int(x * W) + y = int(y * H) + if x > eps and y > eps: + cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1) + return canvas + + +def draw_facepose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray: + """ + Draw keypoints representing face pose on a given canvas. + + Args: + canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the face pose. + keypoints (List[Keypoint]| None): A list of Keypoint objects representing the face keypoints to be drawn + or None if no keypoints are present. + + Returns: + np.ndarray: A 3D numpy array representing the modified canvas with the drawn face pose. + + Note: + The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1. + """ + if not keypoints: + return canvas + + if not is_normalized(keypoints): + H, W = 1.0, 1.0 + else: + H, W, _ = canvas.shape + + for keypoint in keypoints: + if keypoint is None: + continue + + x, y = keypoint.x, keypoint.y + x = int(x * W) + y = int(y * H) + if x > eps and y > eps: + cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1) + return canvas + + +# detect hand according to body pose keypoints +# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp +def handDetect(body: BodyResult, oriImg) -> List[Tuple[int, int, int, bool]]: + """ + Detect hands in the input body pose keypoints and calculate the bounding box for each hand. + + Args: + body (BodyResult): A BodyResult object containing the detected body pose keypoints. + oriImg (numpy.ndarray): A 3D numpy array representing the original input image. + + Returns: + List[Tuple[int, int, int, bool]]: A list of tuples, each containing the coordinates (x, y) of the top-left + corner of the bounding box, the width (height) of the bounding box, and + a boolean flag indicating whether the hand is a left hand (True) or a + right hand (False). + + Notes: + - The width and height of the bounding boxes are equal since the network requires squared input. + - The minimum bounding box size is 20 pixels. + """ + ratioWristElbow = 0.33 + detect_result = [] + image_height, image_width = oriImg.shape[0:2] + + keypoints = body.keypoints + # right hand: wrist 4, elbow 3, shoulder 2 + # left hand: wrist 7, elbow 6, shoulder 5 + left_shoulder = keypoints[5] + left_elbow = keypoints[6] + left_wrist = keypoints[7] + right_shoulder = keypoints[2] + right_elbow = keypoints[3] + right_wrist = keypoints[4] + + # if any of three not detected + has_left = all(keypoint is not None for keypoint in (left_shoulder, left_elbow, left_wrist)) + has_right = all(keypoint is not None for keypoint in (right_shoulder, right_elbow, right_wrist)) + if not (has_left or has_right): + return [] + + hands = [] + #left hand + if has_left: + hands.append([ + left_shoulder.x, left_shoulder.y, + left_elbow.x, left_elbow.y, + left_wrist.x, left_wrist.y, + True + ]) + # right hand + if has_right: + hands.append([ + right_shoulder.x, right_shoulder.y, + right_elbow.x, right_elbow.y, + right_wrist.x, right_wrist.y, + False + ]) + + for x1, y1, x2, y2, x3, y3, is_left in hands: + # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox + # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]); + # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]); + # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow); + # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder); + # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder); + x = x3 + ratioWristElbow * (x3 - x2) + y = y3 + ratioWristElbow * (y3 - y2) + distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2) + distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) + width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder) + # x-y refers to the center --> offset to topLeft point + # handRectangle.x -= handRectangle.width / 2.f; + # handRectangle.y -= handRectangle.height / 2.f; + x -= width / 2 + y -= width / 2 # width = height + # overflow the image + if x < 0: x = 0 + if y < 0: y = 0 + width1 = width + width2 = width + if x + width > image_width: width1 = image_width - x + if y + width > image_height: width2 = image_height - y + width = min(width1, width2) + # the max hand box value is 20 pixels + if width >= 20: + detect_result.append((int(x), int(y), int(width), is_left)) + + ''' + return value: [[x, y, w, True if left hand else False]]. + width=height since the network require squared input. + x, y is the coordinate of top left + ''' + return detect_result + + +# Written by Lvmin +def faceDetect(body: BodyResult, oriImg) -> Union[Tuple[int, int, int], None]: + """ + Detect the face in the input body pose keypoints and calculate the bounding box for the face. + + Args: + body (BodyResult): A BodyResult object containing the detected body pose keypoints. + oriImg (numpy.ndarray): A 3D numpy array representing the original input image. + + Returns: + Tuple[int, int, int] | None: A tuple containing the coordinates (x, y) of the top-left corner of the + bounding box and the width (height) of the bounding box, or None if the + face is not detected or the bounding box width is less than 20 pixels. + + Notes: + - The width and height of the bounding box are equal. + - The minimum bounding box size is 20 pixels. + """ + # left right eye ear 14 15 16 17 + image_height, image_width = oriImg.shape[0:2] + + keypoints = body.keypoints + head = keypoints[0] + left_eye = keypoints[14] + right_eye = keypoints[15] + left_ear = keypoints[16] + right_ear = keypoints[17] + + if head is None or all(keypoint is None for keypoint in (left_eye, right_eye, left_ear, right_ear)): + return None + + width = 0.0 + x0, y0 = head.x, head.y + + if left_eye is not None: + x1, y1 = left_eye.x, left_eye.y + d = max(abs(x0 - x1), abs(y0 - y1)) + width = max(width, d * 3.0) + + if right_eye is not None: + x1, y1 = right_eye.x, right_eye.y + d = max(abs(x0 - x1), abs(y0 - y1)) + width = max(width, d * 3.0) + + if left_ear is not None: + x1, y1 = left_ear.x, left_ear.y + d = max(abs(x0 - x1), abs(y0 - y1)) + width = max(width, d * 1.5) + + if right_ear is not None: + x1, y1 = right_ear.x, right_ear.y + d = max(abs(x0 - x1), abs(y0 - y1)) + width = max(width, d * 1.5) + + x, y = x0, y0 + + x -= width + y -= width + + if x < 0: + x = 0 + + if y < 0: + y = 0 + + width1 = width * 2 + width2 = width * 2 + + if x + width > image_width: + width1 = image_width - x + + if y + width > image_height: + width2 = image_height - y + + width = min(width1, width2) + + if width >= 20: + return int(x), int(y), int(width) + else: + return None + + +# get max index of 2d array +def npmax(array): + arrayindex = array.argmax(1) + arrayvalue = array.max(1) + i = arrayvalue.argmax() + j = arrayindex[i] + return i, j \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/wholebody.py b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/wholebody.py new file mode 100644 index 0000000000000000000000000000000000000000..37bf1178ce401a7c919c51e4dec76b802fc64ed1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/openpose/wholebody.py @@ -0,0 +1,100 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from .cv_ox_det import inference_detector +from .cv_ox_pose import inference_pose + +from typing import List, Optional +from .types import HumanPoseResult, BodyResult, Keypoint + + +class Wholebody: + def __init__(self, onnx_det: str, onnx_pose: str): + # Always loads to CPU to avoid building OpenCV. + device = 'cpu' + backend = cv2.dnn.DNN_BACKEND_OPENCV if device == 'cpu' else cv2.dnn.DNN_BACKEND_CUDA + # You need to manually build OpenCV through cmake to work with your GPU. + providers = cv2.dnn.DNN_TARGET_CPU if device == 'cpu' else cv2.dnn.DNN_TARGET_CUDA + + self.session_det = cv2.dnn.readNetFromONNX(onnx_det) + self.session_det.setPreferableBackend(backend) + self.session_det.setPreferableTarget(providers) + + self.session_pose = cv2.dnn.readNetFromONNX(onnx_pose) + self.session_pose.setPreferableBackend(backend) + self.session_pose.setPreferableTarget(providers) + + def __call__(self, oriImg) -> Optional[np.ndarray]: + det_result = inference_detector(self.session_det, oriImg) + if det_result is None: + return None + + keypoints, scores = inference_pose(self.session_pose, det_result, oriImg) + + keypoints_info = np.concatenate( + (keypoints, scores[..., None]), axis=-1) + # compute neck joint + neck = np.mean(keypoints_info[:, [5, 6]], axis=1) + # neck score when visualizing pred + neck[:, 2:4] = np.logical_and( + keypoints_info[:, 5, 2:4] > 0.3, + keypoints_info[:, 6, 2:4] > 0.3).astype(int) + new_keypoints_info = np.insert( + keypoints_info, 17, neck, axis=1) + mmpose_idx = [ + 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3 + ] + openpose_idx = [ + 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17 + ] + new_keypoints_info[:, openpose_idx] = \ + new_keypoints_info[:, mmpose_idx] + keypoints_info = new_keypoints_info + + return keypoints_info + + @staticmethod + def format_result(keypoints_info: Optional[np.ndarray]) -> List[HumanPoseResult]: + def format_keypoint_part( + part: np.ndarray, + ) -> Optional[List[Optional[Keypoint]]]: + keypoints = [ + Keypoint(x, y, score, i) if score >= 0.3 else None + for i, (x, y, score) in enumerate(part) + ] + return ( + None if all(keypoint is None for keypoint in keypoints) else keypoints + ) + + def total_score(keypoints: Optional[List[Optional[Keypoint]]]) -> float: + return ( + sum(keypoint.score for keypoint in keypoints if keypoint is not None) + if keypoints is not None + else 0.0 + ) + + pose_results = [] + if keypoints_info is None: + return pose_results + + for instance in keypoints_info: + body_keypoints = format_keypoint_part(instance[:18]) or ([None] * 18) + left_hand = format_keypoint_part(instance[92:113]) + right_hand = format_keypoint_part(instance[113:134]) + face = format_keypoint_part(instance[24:92]) + + # Openpose face consists of 70 points in total, while DWPose only + # provides 68 points. Padding the last 2 points. + if face is not None: + # left eye + face.append(body_keypoints[14]) + # right eye + face.append(body_keypoints[15]) + + body = BodyResult( + body_keypoints, total_score(body_keypoints), len(body_keypoints) + ) + pose_results.append(HumanPoseResult(body, left_hand, right_hand, face)) + + return pose_results diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..913b6cf92c19d37b6ee4f7bc99c65f655e7f840c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/LICENSE @@ -0,0 +1,21 @@ +It is just for research purpose, and commercial use should be contacted with authors first. + +Copyright (c) 2021 Zhuo Su + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64195e9bc71865c6dcbc29b14d5d3a599ebe60de --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/__init__.py @@ -0,0 +1,51 @@ +import os +import torch +import numpy as np +from einops import rearrange +from annotator.pidinet.model import pidinet +from annotator.util import safe_step +from modules import devices +from annotator.annotator_path import models_path +from ldm_patched.modules.utils import load_torch_file + +netNetwork = None +remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/table5_pidinet.pth" +modeldir = os.path.join(models_path, "pidinet") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) + +def apply_pidinet(input_image, is_safe=False, apply_fliter=False): + global netNetwork + if netNetwork is None: + modelpath = os.path.join(modeldir, "table5_pidinet.pth") + old_modelpath = os.path.join(old_modeldir, "table5_pidinet.pth") + if os.path.exists(old_modelpath): + modelpath = old_modelpath + elif not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=modeldir) + netNetwork = pidinet() + ckp = load_torch_file(modelpath) + netNetwork.load_state_dict({k.replace('module.',''):v for k, v in ckp.items()}) + + netNetwork = netNetwork.to(devices.get_device_for("controlnet")) + netNetwork.eval() + assert input_image.ndim == 3 + input_image = input_image[:, :, ::-1].copy() + with torch.no_grad(): + image_pidi = torch.from_numpy(input_image).float().to(devices.get_device_for("controlnet")) + image_pidi = image_pidi / 255.0 + image_pidi = rearrange(image_pidi, 'h w c -> 1 c h w') + edge = netNetwork(image_pidi)[-1] + edge = edge.cpu().numpy() + if apply_fliter: + edge = edge > 0.5 + if is_safe: + edge = safe_step(edge) + edge = (edge * 255.0).clip(0, 255).astype(np.uint8) + + return edge[0][0] + +def unload_pid_model(): + global netNetwork + if netNetwork is not None: + netNetwork.cpu() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/model.py new file mode 100644 index 0000000000000000000000000000000000000000..c778b89c1fe0cce25c21db1ed22a90377fc8d1c8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/pidinet/model.py @@ -0,0 +1,654 @@ +""" +Author: Zhuo Su, Wenzhe Liu +Date: Feb 18, 2021 +""" + +import math + +import cv2 +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from modules import devices +from basicsr.utils import img2tensor + +nets = { + 'baseline': { + 'layer0': 'cv', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'cv', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'cv', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'cv', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'c-v15': { + 'layer0': 'cd', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'cv', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'cv', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'cv', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'a-v15': { + 'layer0': 'ad', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'cv', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'cv', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'cv', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'r-v15': { + 'layer0': 'rd', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'cv', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'cv', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'cv', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'cvvv4': { + 'layer0': 'cd', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'cd', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'cd', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'cd', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'avvv4': { + 'layer0': 'ad', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'ad', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'ad', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'ad', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'rvvv4': { + 'layer0': 'rd', + 'layer1': 'cv', + 'layer2': 'cv', + 'layer3': 'cv', + 'layer4': 'rd', + 'layer5': 'cv', + 'layer6': 'cv', + 'layer7': 'cv', + 'layer8': 'rd', + 'layer9': 'cv', + 'layer10': 'cv', + 'layer11': 'cv', + 'layer12': 'rd', + 'layer13': 'cv', + 'layer14': 'cv', + 'layer15': 'cv', + }, + 'cccv4': { + 'layer0': 'cd', + 'layer1': 'cd', + 'layer2': 'cd', + 'layer3': 'cv', + 'layer4': 'cd', + 'layer5': 'cd', + 'layer6': 'cd', + 'layer7': 'cv', + 'layer8': 'cd', + 'layer9': 'cd', + 'layer10': 'cd', + 'layer11': 'cv', + 'layer12': 'cd', + 'layer13': 'cd', + 'layer14': 'cd', + 'layer15': 'cv', + }, + 'aaav4': { + 'layer0': 'ad', + 'layer1': 'ad', + 'layer2': 'ad', + 'layer3': 'cv', + 'layer4': 'ad', + 'layer5': 'ad', + 'layer6': 'ad', + 'layer7': 'cv', + 'layer8': 'ad', + 'layer9': 'ad', + 'layer10': 'ad', + 'layer11': 'cv', + 'layer12': 'ad', + 'layer13': 'ad', + 'layer14': 'ad', + 'layer15': 'cv', + }, + 'rrrv4': { + 'layer0': 'rd', + 'layer1': 'rd', + 'layer2': 'rd', + 'layer3': 'cv', + 'layer4': 'rd', + 'layer5': 'rd', + 'layer6': 'rd', + 'layer7': 'cv', + 'layer8': 'rd', + 'layer9': 'rd', + 'layer10': 'rd', + 'layer11': 'cv', + 'layer12': 'rd', + 'layer13': 'rd', + 'layer14': 'rd', + 'layer15': 'cv', + }, + 'c16': { + 'layer0': 'cd', + 'layer1': 'cd', + 'layer2': 'cd', + 'layer3': 'cd', + 'layer4': 'cd', + 'layer5': 'cd', + 'layer6': 'cd', + 'layer7': 'cd', + 'layer8': 'cd', + 'layer9': 'cd', + 'layer10': 'cd', + 'layer11': 'cd', + 'layer12': 'cd', + 'layer13': 'cd', + 'layer14': 'cd', + 'layer15': 'cd', + }, + 'a16': { + 'layer0': 'ad', + 'layer1': 'ad', + 'layer2': 'ad', + 'layer3': 'ad', + 'layer4': 'ad', + 'layer5': 'ad', + 'layer6': 'ad', + 'layer7': 'ad', + 'layer8': 'ad', + 'layer9': 'ad', + 'layer10': 'ad', + 'layer11': 'ad', + 'layer12': 'ad', + 'layer13': 'ad', + 'layer14': 'ad', + 'layer15': 'ad', + }, + 'r16': { + 'layer0': 'rd', + 'layer1': 'rd', + 'layer2': 'rd', + 'layer3': 'rd', + 'layer4': 'rd', + 'layer5': 'rd', + 'layer6': 'rd', + 'layer7': 'rd', + 'layer8': 'rd', + 'layer9': 'rd', + 'layer10': 'rd', + 'layer11': 'rd', + 'layer12': 'rd', + 'layer13': 'rd', + 'layer14': 'rd', + 'layer15': 'rd', + }, + 'carv4': { + 'layer0': 'cd', + 'layer1': 'ad', + 'layer2': 'rd', + 'layer3': 'cv', + 'layer4': 'cd', + 'layer5': 'ad', + 'layer6': 'rd', + 'layer7': 'cv', + 'layer8': 'cd', + 'layer9': 'ad', + 'layer10': 'rd', + 'layer11': 'cv', + 'layer12': 'cd', + 'layer13': 'ad', + 'layer14': 'rd', + 'layer15': 'cv', + }, + } + +def createConvFunc(op_type): + assert op_type in ['cv', 'cd', 'ad', 'rd'], 'unknown op type: %s' % str(op_type) + if op_type == 'cv': + return F.conv2d + + if op_type == 'cd': + def func(x, weights, bias=None, stride=1, padding=0, dilation=1, groups=1): + assert dilation in [1, 2], 'dilation for cd_conv should be in 1 or 2' + assert weights.size(2) == 3 and weights.size(3) == 3, 'kernel size for cd_conv should be 3x3' + assert padding == dilation, 'padding for cd_conv set wrong' + + weights_c = weights.sum(dim=[2, 3], keepdim=True) + yc = F.conv2d(x, weights_c, stride=stride, padding=0, groups=groups) + y = F.conv2d(x, weights, bias, stride=stride, padding=padding, dilation=dilation, groups=groups) + return y - yc + return func + elif op_type == 'ad': + def func(x, weights, bias=None, stride=1, padding=0, dilation=1, groups=1): + assert dilation in [1, 2], 'dilation for ad_conv should be in 1 or 2' + assert weights.size(2) == 3 and weights.size(3) == 3, 'kernel size for ad_conv should be 3x3' + assert padding == dilation, 'padding for ad_conv set wrong' + + shape = weights.shape + weights = weights.view(shape[0], shape[1], -1) + weights_conv = (weights - weights[:, :, [3, 0, 1, 6, 4, 2, 7, 8, 5]]).view(shape) # clock-wise + y = F.conv2d(x, weights_conv, bias, stride=stride, padding=padding, dilation=dilation, groups=groups) + return y + return func + elif op_type == 'rd': + def func(x, weights, bias=None, stride=1, padding=0, dilation=1, groups=1): + assert dilation in [1, 2], 'dilation for rd_conv should be in 1 or 2' + assert weights.size(2) == 3 and weights.size(3) == 3, 'kernel size for rd_conv should be 3x3' + padding = 2 * dilation + + shape = weights.shape + if weights.is_cuda: + buffer = torch.cuda.FloatTensor(shape[0], shape[1], 5 * 5).fill_(0).to(devices.get_device_for("controlnet")) + else: + buffer = torch.zeros(shape[0], shape[1], 5 * 5).to(devices.get_device_for("controlnet")) + weights = weights.view(shape[0], shape[1], -1) + buffer[:, :, [0, 2, 4, 10, 14, 20, 22, 24]] = weights[:, :, 1:] + buffer[:, :, [6, 7, 8, 11, 13, 16, 17, 18]] = -weights[:, :, 1:] + buffer[:, :, 12] = 0 + buffer = buffer.view(shape[0], shape[1], 5, 5) + y = F.conv2d(x, buffer, bias, stride=stride, padding=padding, dilation=dilation, groups=groups) + return y + return func + else: + print('impossible to be here unless you force that') + return None + +class Conv2d(nn.Module): + def __init__(self, pdc, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False): + super(Conv2d, self).__init__() + if in_channels % groups != 0: + raise ValueError('in_channels must be divisible by groups') + if out_channels % groups != 0: + raise ValueError('out_channels must be divisible by groups') + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, kernel_size, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + self.pdc = pdc + + def reset_parameters(self): + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + nn.init.uniform_(self.bias, -bound, bound) + + def forward(self, input): + + return self.pdc(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + +class CSAM(nn.Module): + """ + Compact Spatial Attention Module + """ + def __init__(self, channels): + super(CSAM, self).__init__() + + mid_channels = 4 + self.relu1 = nn.ReLU() + self.conv1 = nn.Conv2d(channels, mid_channels, kernel_size=1, padding=0) + self.conv2 = nn.Conv2d(mid_channels, 1, kernel_size=3, padding=1, bias=False) + self.sigmoid = nn.Sigmoid() + nn.init.constant_(self.conv1.bias, 0) + + def forward(self, x): + y = self.relu1(x) + y = self.conv1(y) + y = self.conv2(y) + y = self.sigmoid(y) + + return x * y + +class CDCM(nn.Module): + """ + Compact Dilation Convolution based Module + """ + def __init__(self, in_channels, out_channels): + super(CDCM, self).__init__() + + self.relu1 = nn.ReLU() + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0) + self.conv2_1 = nn.Conv2d(out_channels, out_channels, kernel_size=3, dilation=5, padding=5, bias=False) + self.conv2_2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, dilation=7, padding=7, bias=False) + self.conv2_3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, dilation=9, padding=9, bias=False) + self.conv2_4 = nn.Conv2d(out_channels, out_channels, kernel_size=3, dilation=11, padding=11, bias=False) + nn.init.constant_(self.conv1.bias, 0) + + def forward(self, x): + x = self.relu1(x) + x = self.conv1(x) + x1 = self.conv2_1(x) + x2 = self.conv2_2(x) + x3 = self.conv2_3(x) + x4 = self.conv2_4(x) + return x1 + x2 + x3 + x4 + + +class MapReduce(nn.Module): + """ + Reduce feature maps into a single edge map + """ + def __init__(self, channels): + super(MapReduce, self).__init__() + self.conv = nn.Conv2d(channels, 1, kernel_size=1, padding=0) + nn.init.constant_(self.conv.bias, 0) + + def forward(self, x): + return self.conv(x) + + +class PDCBlock(nn.Module): + def __init__(self, pdc, inplane, ouplane, stride=1): + super(PDCBlock, self).__init__() + self.stride=stride + + self.stride=stride + if self.stride > 1: + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + self.shortcut = nn.Conv2d(inplane, ouplane, kernel_size=1, padding=0) + self.conv1 = Conv2d(pdc, inplane, inplane, kernel_size=3, padding=1, groups=inplane, bias=False) + self.relu2 = nn.ReLU() + self.conv2 = nn.Conv2d(inplane, ouplane, kernel_size=1, padding=0, bias=False) + + def forward(self, x): + if self.stride > 1: + x = self.pool(x) + y = self.conv1(x) + y = self.relu2(y) + y = self.conv2(y) + if self.stride > 1: + x = self.shortcut(x) + y = y + x + return y + +class PDCBlock_converted(nn.Module): + """ + CPDC, APDC can be converted to vanilla 3x3 convolution + RPDC can be converted to vanilla 5x5 convolution + """ + def __init__(self, pdc, inplane, ouplane, stride=1): + super(PDCBlock_converted, self).__init__() + self.stride=stride + + if self.stride > 1: + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + self.shortcut = nn.Conv2d(inplane, ouplane, kernel_size=1, padding=0) + if pdc == 'rd': + self.conv1 = nn.Conv2d(inplane, inplane, kernel_size=5, padding=2, groups=inplane, bias=False) + else: + self.conv1 = nn.Conv2d(inplane, inplane, kernel_size=3, padding=1, groups=inplane, bias=False) + self.relu2 = nn.ReLU() + self.conv2 = nn.Conv2d(inplane, ouplane, kernel_size=1, padding=0, bias=False) + + def forward(self, x): + if self.stride > 1: + x = self.pool(x) + y = self.conv1(x) + y = self.relu2(y) + y = self.conv2(y) + if self.stride > 1: + x = self.shortcut(x) + y = y + x + return y + +class PiDiNet(nn.Module): + def __init__(self, inplane, pdcs, dil=None, sa=False, convert=False): + super(PiDiNet, self).__init__() + self.sa = sa + if dil is not None: + assert isinstance(dil, int), 'dil should be an int' + self.dil = dil + + self.fuseplanes = [] + + self.inplane = inplane + if convert: + if pdcs[0] == 'rd': + init_kernel_size = 5 + init_padding = 2 + else: + init_kernel_size = 3 + init_padding = 1 + self.init_block = nn.Conv2d(3, self.inplane, + kernel_size=init_kernel_size, padding=init_padding, bias=False) + block_class = PDCBlock_converted + else: + self.init_block = Conv2d(pdcs[0], 3, self.inplane, kernel_size=3, padding=1) + block_class = PDCBlock + + self.block1_1 = block_class(pdcs[1], self.inplane, self.inplane) + self.block1_2 = block_class(pdcs[2], self.inplane, self.inplane) + self.block1_3 = block_class(pdcs[3], self.inplane, self.inplane) + self.fuseplanes.append(self.inplane) # C + + inplane = self.inplane + self.inplane = self.inplane * 2 + self.block2_1 = block_class(pdcs[4], inplane, self.inplane, stride=2) + self.block2_2 = block_class(pdcs[5], self.inplane, self.inplane) + self.block2_3 = block_class(pdcs[6], self.inplane, self.inplane) + self.block2_4 = block_class(pdcs[7], self.inplane, self.inplane) + self.fuseplanes.append(self.inplane) # 2C + + inplane = self.inplane + self.inplane = self.inplane * 2 + self.block3_1 = block_class(pdcs[8], inplane, self.inplane, stride=2) + self.block3_2 = block_class(pdcs[9], self.inplane, self.inplane) + self.block3_3 = block_class(pdcs[10], self.inplane, self.inplane) + self.block3_4 = block_class(pdcs[11], self.inplane, self.inplane) + self.fuseplanes.append(self.inplane) # 4C + + self.block4_1 = block_class(pdcs[12], self.inplane, self.inplane, stride=2) + self.block4_2 = block_class(pdcs[13], self.inplane, self.inplane) + self.block4_3 = block_class(pdcs[14], self.inplane, self.inplane) + self.block4_4 = block_class(pdcs[15], self.inplane, self.inplane) + self.fuseplanes.append(self.inplane) # 4C + + self.conv_reduces = nn.ModuleList() + if self.sa and self.dil is not None: + self.attentions = nn.ModuleList() + self.dilations = nn.ModuleList() + for i in range(4): + self.dilations.append(CDCM(self.fuseplanes[i], self.dil)) + self.attentions.append(CSAM(self.dil)) + self.conv_reduces.append(MapReduce(self.dil)) + elif self.sa: + self.attentions = nn.ModuleList() + for i in range(4): + self.attentions.append(CSAM(self.fuseplanes[i])) + self.conv_reduces.append(MapReduce(self.fuseplanes[i])) + elif self.dil is not None: + self.dilations = nn.ModuleList() + for i in range(4): + self.dilations.append(CDCM(self.fuseplanes[i], self.dil)) + self.conv_reduces.append(MapReduce(self.dil)) + else: + for i in range(4): + self.conv_reduces.append(MapReduce(self.fuseplanes[i])) + + self.classifier = nn.Conv2d(4, 1, kernel_size=1) # has bias + nn.init.constant_(self.classifier.weight, 0.25) + nn.init.constant_(self.classifier.bias, 0) + + # print('initialization done') + + def get_weights(self): + conv_weights = [] + bn_weights = [] + relu_weights = [] + for pname, p in self.named_parameters(): + if 'bn' in pname: + bn_weights.append(p) + elif 'relu' in pname: + relu_weights.append(p) + else: + conv_weights.append(p) + + return conv_weights, bn_weights, relu_weights + + def forward(self, x): + H, W = x.size()[2:] + + x = self.init_block(x) + + x1 = self.block1_1(x) + x1 = self.block1_2(x1) + x1 = self.block1_3(x1) + + x2 = self.block2_1(x1) + x2 = self.block2_2(x2) + x2 = self.block2_3(x2) + x2 = self.block2_4(x2) + + x3 = self.block3_1(x2) + x3 = self.block3_2(x3) + x3 = self.block3_3(x3) + x3 = self.block3_4(x3) + + x4 = self.block4_1(x3) + x4 = self.block4_2(x4) + x4 = self.block4_3(x4) + x4 = self.block4_4(x4) + + x_fuses = [] + if self.sa and self.dil is not None: + for i, xi in enumerate([x1, x2, x3, x4]): + x_fuses.append(self.attentions[i](self.dilations[i](xi))) + elif self.sa: + for i, xi in enumerate([x1, x2, x3, x4]): + x_fuses.append(self.attentions[i](xi)) + elif self.dil is not None: + for i, xi in enumerate([x1, x2, x3, x4]): + x_fuses.append(self.dilations[i](xi)) + else: + x_fuses = [x1, x2, x3, x4] + + e1 = self.conv_reduces[0](x_fuses[0]) + e1 = F.interpolate(e1, (H, W), mode="bilinear", align_corners=False) + + e2 = self.conv_reduces[1](x_fuses[1]) + e2 = F.interpolate(e2, (H, W), mode="bilinear", align_corners=False) + + e3 = self.conv_reduces[2](x_fuses[2]) + e3 = F.interpolate(e3, (H, W), mode="bilinear", align_corners=False) + + e4 = self.conv_reduces[3](x_fuses[3]) + e4 = F.interpolate(e4, (H, W), mode="bilinear", align_corners=False) + + outputs = [e1, e2, e3, e4] + + output = self.classifier(torch.cat(outputs, dim=1)) + #if not self.training: + # return torch.sigmoid(output) + + outputs.append(output) + outputs = [torch.sigmoid(r) for r in outputs] + return outputs + +def config_model(model): + model_options = list(nets.keys()) + assert model in model_options, \ + 'unrecognized model, please choose from %s' % str(model_options) + + # print(str(nets[model])) + + pdcs = [] + for i in range(16): + layer_name = 'layer%d' % i + op = nets[model][layer_name] + pdcs.append(createConvFunc(op)) + + return pdcs + +def pidinet(): + pdcs = config_model('carv4') + dil = 24 #if args.dil else None + return PiDiNet(60, pdcs, dil=dil, sa=True) + + +if __name__ == '__main__': + model = pidinet() + ckp = torch.load('table5_pidinet.pth')['state_dict'] + model.load_state_dict({k.replace('module.',''):v for k, v in ckp.items()}) + im = cv2.imread('examples/test_my/cat_v4.png') + im = img2tensor(im).unsqueeze(0)/255. + res = model(im)[-1] + res = res>0.5 + res = res.float() + res = (res[0,0].cpu().data.numpy()*255.).astype(np.uint8) + print(res.shape) + cv2.imwrite('edge.png', res) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/shuffle/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/shuffle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..92bf1182f498606b4a6ec7f1c664a1f6b6690b62 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/shuffle/__init__.py @@ -0,0 +1,18 @@ +import cv2 +import numpy as np +from annotator.util import make_noise_disk + + +class ContentShuffleDetector: + def __call__(self, img, h=None, w=None, f=None): + H, W, C = img.shape + if h is None: + h = H + if w is None: + w = W + if f is None: + f = 256 + x = make_noise_disk(h, w, 1, f) * float(W - 1) + y = make_noise_disk(h, w, 1, f) * float(H - 1) + flow = np.concatenate([x, y], axis=2).astype(np.float32) + return cv2.remap(img, flow, None, cv2.INTER_LINEAR) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fmish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fmish.py new file mode 100644 index 0000000000000000000000000000000000000000..40c867a272bdaf948d435a46a6aaa70478036994 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fmish.py @@ -0,0 +1,17 @@ +""" +Script provides functional interface for Mish activation function. +""" + +# import pytorch +import torch +import torch.nn.functional as F + + +@torch.jit.script +def mish(input): + """ + Applies the mish function element-wise: + mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x))) + See additional documentation for mish class. + """ + return input * torch.tanh(F.softplus(input)) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fsmish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fsmish.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8c55cad89953f202384eee81173e2b2ae10712 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Fsmish.py @@ -0,0 +1,20 @@ +""" +Script based on: +Wang, Xueliang, Honge Ren, and Achuan Wang. + "Smish: A Novel Activation Function for Deep Learning Methods. + " Electronics 11.4 (2022): 540. +""" + +# import pytorch +import torch +import torch.nn.functional as F + + +@torch.jit.script +def smish(input): + """ + Applies the mish function element-wise: + mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(sigmoid(x)))) + See additional documentation for mish class. + """ + return input * torch.tanh(torch.log(1+torch.sigmoid(input))) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/LICENSE.txt b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a99ffdd7372b1bfa44ea302330343cb7370d0e9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Xavier Soria Poma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xmish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xmish.py new file mode 100644 index 0000000000000000000000000000000000000000..15e84ed98d165ab6eb4db672dbfdf50ef0953e31 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xmish.py @@ -0,0 +1,43 @@ +""" +Applies the mish function element-wise: +mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x))) +""" + +# import pytorch +import torch +import torch.nn.functional as F +from torch import nn + +# import activation functions +from .Fmish import mish + + +class Mish(nn.Module): + """ + Applies the mish function element-wise: + mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x))) + Shape: + - Input: (N, *) where * means, any number of additional + dimensions + - Output: (N, *), same shape as the input + Examples: + >>> m = Mish() + >>> input = torch.randn(2) + >>> output = m(input) + Reference: https://pytorch.org/docs/stable/generated/torch.nn.Mish.html + """ + + def __init__(self): + """ + Init method. + """ + super().__init__() + + def forward(self, input): + """ + Forward pass of the function. + """ + if torch.__version__ >= "1.9": + return F.mish(input) + else: + return mish(input) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xsmish.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xsmish.py new file mode 100644 index 0000000000000000000000000000000000000000..df75bee4d3d3585b1b265435a713ef0186b1e701 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/Xsmish.py @@ -0,0 +1,43 @@ +""" +Script based on: +Wang, Xueliang, Honge Ren, and Achuan Wang. + "Smish: A Novel Activation Function for Deep Learning Methods. + " Electronics 11.4 (2022): 540. +smish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + sigmoid(x))) +""" + +# import pytorch +import torch +import torch.nn.functional as F +from torch import nn + +# import activation functions +from .Fsmish import smish + + +class Smish(nn.Module): + """ + Applies the mish function element-wise: + mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x))) + Shape: + - Input: (N, *) where * means, any number of additional + dimensions + - Output: (N, *), same shape as the input + Examples: + >>> m = Mish() + >>> input = torch.randn(2) + >>> output = m(input) + Reference: https://pytorch.org/docs/stable/generated/torch.nn.Mish.html + """ + + def __init__(self): + """ + Init method. + """ + super().__init__() + + def forward(self, input): + """ + Forward pass of the function. + """ + return smish(input) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..47517043f7856318be11ee802ddfdbe846a1cd52 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/__init__.py @@ -0,0 +1,55 @@ +""" +Hello, welcome on board, +""" +from __future__ import print_function + +import os +import cv2 +import numpy as np + +import torch + +from annotator.teed.ted import TED # TEED architecture +from einops import rearrange +from modules import devices +from annotator.util import load_model,safe_step +from annotator.annotator_path import models_path + +class TEEDDector: + """https://github.com/xavysp/TEED""" + + model_dir = os.path.join(models_path, "TEED") + + def __init__(self): + self.device = devices.get_device_for("controlnet") + self.model = TED().to(self.device).eval() + remote_url = os.environ.get( + "CONTROLNET_TEED_MODEL_URL", + "https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/resolve/main/Annotators/7_model.pth", + ) + model_path = load_model( + "7_model.pth", remote_url=remote_url, model_dir=self.model_dir + ) + self.model.load_state_dict(torch.load(model_path)) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, image: np.ndarray, safe_steps: int = 2) -> np.ndarray: + + self.model.to(self.device) + + H, W, _ = image.shape + with torch.no_grad(): + image_teed = torch.from_numpy(image.copy()).float().to(self.device) + image_teed = rearrange(image_teed, 'h w c -> 1 c h w') + edges = self.model(image_teed) + edges = [e.detach().cpu().numpy().astype(np.float32)[0, 0] for e in edges] + edges = [cv2.resize(e, (W, H), interpolation=cv2.INTER_LINEAR) for e in edges] + edges = np.stack(edges, axis=2) + edge = 1 / (1 + np.exp(-np.mean(edges, axis=2).astype(np.float64))) + if safe_steps != 0: + edge = safe_step(edge, safe_steps) + edge = (edge * 255.0).clip(0, 255).astype(np.uint8) + return edge \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/teed/ted.py b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/ted.py new file mode 100644 index 0000000000000000000000000000000000000000..ff347d5acf767126cc95022a2c2036c0262db40d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/teed/ted.py @@ -0,0 +1,296 @@ +# TEED: is a Tiny but Efficient Edge Detection, it comes from the LDC-B3 +# with a Slightly modification +# LDC parameters: +# 155665 +# TED > 58K + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .Fsmish import smish as Fsmish +from .Xsmish import Smish + + +def weight_init(m): + if isinstance(m, (nn.Conv2d,)): + torch.nn.init.xavier_normal_(m.weight, gain=1.0) + + if m.bias is not None: + torch.nn.init.zeros_(m.bias) + + # for fusion layer + if isinstance(m, (nn.ConvTranspose2d,)): + torch.nn.init.xavier_normal_(m.weight, gain=1.0) + if m.bias is not None: + torch.nn.init.zeros_(m.bias) + +class CoFusion(nn.Module): + # from LDC + + def __init__(self, in_ch, out_ch): + super(CoFusion, self).__init__() + self.conv1 = nn.Conv2d(in_ch, 32, kernel_size=3, + stride=1, padding=1) # before 64 + self.conv3= nn.Conv2d(32, out_ch, kernel_size=3, + stride=1, padding=1)# before 64 instead of 32 + self.relu = nn.ReLU() + self.norm_layer1 = nn.GroupNorm(4, 32) # before 64 + + def forward(self, x): + # fusecat = torch.cat(x, dim=1) + attn = self.relu(self.norm_layer1(self.conv1(x))) + attn = F.softmax(self.conv3(attn), dim=1) + return ((x * attn).sum(1)).unsqueeze(1) + + +class CoFusion2(nn.Module): + # TEDv14-3 + def __init__(self, in_ch, out_ch): + super(CoFusion2, self).__init__() + self.conv1 = nn.Conv2d(in_ch, 32, kernel_size=3, + stride=1, padding=1) # before 64 + # self.conv2 = nn.Conv2d(32, 32, kernel_size=3, + # stride=1, padding=1)# before 64 + self.conv3 = nn.Conv2d(32, out_ch, kernel_size=3, + stride=1, padding=1)# before 64 instead of 32 + self.smish= Smish()#nn.ReLU(inplace=True) + + + def forward(self, x): + # fusecat = torch.cat(x, dim=1) + attn = self.conv1(self.smish(x)) + attn = self.conv3(self.smish(attn)) # before , )dim=1) + + # return ((fusecat * attn).sum(1)).unsqueeze(1) + return ((x * attn).sum(1)).unsqueeze(1) + +class DoubleFusion(nn.Module): + # TED fusion before the final edge map prediction + def __init__(self, in_ch, out_ch): + super(DoubleFusion, self).__init__() + self.DWconv1 = nn.Conv2d(in_ch, in_ch*8, kernel_size=3, + stride=1, padding=1, groups=in_ch) # before 64 + self.PSconv1 = nn.PixelShuffle(1) + + self.DWconv2 = nn.Conv2d(24, 24*1, kernel_size=3, + stride=1, padding=1,groups=24)# before 64 instead of 32 + + self.AF= Smish()#XAF() #nn.Tanh()# XAF() # # Smish()# + + + def forward(self, x): + # fusecat = torch.cat(x, dim=1) + attn = self.PSconv1(self.DWconv1(self.AF(x))) # #TEED best res TEDv14 [8, 32, 352, 352] + + attn2 = self.PSconv1(self.DWconv2(self.AF(attn))) # #TEED best res TEDv14[8, 3, 352, 352] + + return Fsmish(((attn2 +attn).sum(1)).unsqueeze(1)) #TED best res + +class _DenseLayer(nn.Sequential): + def __init__(self, input_features, out_features): + super(_DenseLayer, self).__init__() + + self.add_module('conv1', nn.Conv2d(input_features, out_features, + kernel_size=3, stride=1, padding=2, bias=True)), + self.add_module('smish1', Smish()), + self.add_module('conv2', nn.Conv2d(out_features, out_features, + kernel_size=3, stride=1, bias=True)) + def forward(self, x): + x1, x2 = x + + new_features = super(_DenseLayer, self).forward(Fsmish(x1)) # F.relu() + + return 0.5 * (new_features + x2), x2 + + +class _DenseBlock(nn.Sequential): + def __init__(self, num_layers, input_features, out_features): + super(_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _DenseLayer(input_features, out_features) + self.add_module('denselayer%d' % (i + 1), layer) + input_features = out_features + + +class UpConvBlock(nn.Module): + def __init__(self, in_features, up_scale): + super(UpConvBlock, self).__init__() + self.up_factor = 2 + self.constant_features = 16 + + layers = self.make_deconv_layers(in_features, up_scale) + assert layers is not None, layers + self.features = nn.Sequential(*layers) + + def make_deconv_layers(self, in_features, up_scale): + layers = [] + all_pads=[0,0,1,3,7] + for i in range(up_scale): + kernel_size = 2 ** up_scale + pad = all_pads[up_scale] # kernel_size-1 + out_features = self.compute_out_features(i, up_scale) + layers.append(nn.Conv2d(in_features, out_features, 1)) + layers.append(Smish()) + layers.append(nn.ConvTranspose2d( + out_features, out_features, kernel_size, stride=2, padding=pad)) + in_features = out_features + return layers + + def compute_out_features(self, idx, up_scale): + return 1 if idx == up_scale - 1 else self.constant_features + + def forward(self, x): + return self.features(x) + + +class SingleConvBlock(nn.Module): + def __init__(self, in_features, out_features, stride, use_ac=False): + super(SingleConvBlock, self).__init__() + # self.use_bn = use_bs + self.use_ac=use_ac + self.conv = nn.Conv2d(in_features, out_features, 1, stride=stride, + bias=True) + if self.use_ac: + self.smish = Smish() + + def forward(self, x): + x = self.conv(x) + if self.use_ac: + return self.smish(x) + else: + return x + +class DoubleConvBlock(nn.Module): + def __init__(self, in_features, mid_features, + out_features=None, + stride=1, + use_act=True): + super(DoubleConvBlock, self).__init__() + + self.use_act = use_act + if out_features is None: + out_features = mid_features + self.conv1 = nn.Conv2d(in_features, mid_features, + 3, padding=1, stride=stride) + self.conv2 = nn.Conv2d(mid_features, out_features, 3, padding=1) + self.smish= Smish()#nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv1(x) + x = self.smish(x) + x = self.conv2(x) + if self.use_act: + x = self.smish(x) + return x + + +class TED(nn.Module): + """ Definition of Tiny and Efficient Edge Detector + model + """ + + def __init__(self): + super(TED, self).__init__() + self.block_1 = DoubleConvBlock(3, 16, 16, stride=2,) + self.block_2 = DoubleConvBlock(16, 32, use_act=False) + self.dblock_3 = _DenseBlock(1, 32, 48) # [32,48,100,100] before (2, 32, 64) + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + # skip1 connection, see fig. 2 + self.side_1 = SingleConvBlock(16, 32, 2) + + # skip2 connection, see fig. 2 + self.pre_dense_3 = SingleConvBlock(32, 48, 1) # before (32, 64, 1) + + # USNet + self.up_block_1 = UpConvBlock(16, 1) + self.up_block_2 = UpConvBlock(32, 1) + self.up_block_3 = UpConvBlock(48, 2) # (32, 64, 1) + + self.block_cat = DoubleFusion(3,3) # TEED: DoubleFusion + + self.apply(weight_init) + + def slice(self, tensor, slice_shape): + t_shape = tensor.shape + img_h, img_w = slice_shape + if img_w!=t_shape[-1] or img_h!=t_shape[2]: + new_tensor = F.interpolate( + tensor, size=(img_h, img_w), mode='bicubic',align_corners=False) + + else: + new_tensor=tensor + # tensor[..., :height, :width] + return new_tensor + def resize_input(self,tensor): + t_shape = tensor.shape + if t_shape[2] % 8 != 0 or t_shape[3] % 8 != 0: + img_w= ((t_shape[3]// 8) + 1) * 8 + img_h = ((t_shape[2] // 8) + 1) * 8 + new_tensor = F.interpolate( + tensor, size=(img_h, img_w), mode='bicubic', align_corners=False) + else: + new_tensor = tensor + return new_tensor + + def crop_bdcn(data1, h, w, crop_h, crop_w): + # Based on BDCN Implementation @ https://github.com/pkuCactus/BDCN + _, _, h1, w1 = data1.size() + assert (h <= h1 and w <= w1) + data = data1[:, :, crop_h:crop_h + h, crop_w:crop_w + w] + return data + + + def forward(self, x, single_test=False): + assert x.ndim == 4, x.shape + # supose the image size is 352x352 + + # Block 1 + block_1 = self.block_1(x) # [8,16,176,176] + block_1_side = self.side_1(block_1) # 16 [8,32,88,88] + + # Block 2 + block_2 = self.block_2(block_1) # 32 # [8,32,176,176] + block_2_down = self.maxpool(block_2) # [8,32,88,88] + block_2_add = block_2_down + block_1_side # [8,32,88,88] + + # Block 3 + block_3_pre_dense = self.pre_dense_3(block_2_down) # [8,64,88,88] block 3 L connection + block_3, _ = self.dblock_3([block_2_add, block_3_pre_dense]) # [8,64,88,88] + + # upsampling blocks + out_1 = self.up_block_1(block_1) + out_2 = self.up_block_2(block_2) + out_3 = self.up_block_3(block_3) + + results = [out_1, out_2, out_3] + + # concatenate multiscale outputs + block_cat = torch.cat(results, dim=1) # Bx6xHxW + block_cat = self.block_cat(block_cat) # Bx1xHxW DoubleFusion + + results.append(block_cat) + return results + + +if __name__ == '__main__': + batch_size = 8 + img_height = 352 + img_width = 352 + + # device = "cuda" if torch.cuda.is_available() else "cpu" + device = "cpu" + input = torch.rand(batch_size, 3, img_height, img_width).to(device) + # target = torch.rand(batch_size, 1, img_height, img_width).to(device) + print(f"input shape: {input.shape}") + model = TED().to(device) + output = model(input) + print(f"output shapes: {[t.shape for t in output]}") + + # for i in range(20000): + # print(i) + # output = model(input) + # loss = nn.MSELoss()(output[-1], target) + # loss.backward() diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..c38dc639e6e238fbf59608f80b3a6ff1928ac429 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/LICENSE @@ -0,0 +1,203 @@ +Copyright 2022 SenseTime X-Lab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 SenseTime X-Lab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0eb67b48f84aad9404ee7d7e53e4a87f51b3c9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/__init__.py @@ -0,0 +1,56 @@ +import os +from annotator.annotator_path import models_path +from modules import devices +from annotator.uniformer.inference import init_segmentor, inference_segmentor, show_result_pyplot + +try: + from mmseg.core.evaluation import get_palette +except ImportError: + from annotator.mmpkg.mmseg.core.evaluation import get_palette + +modeldir = os.path.join(models_path, "uniformer") +checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" +config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "upernet_global_small.py") +old_modeldir = os.path.dirname(os.path.realpath(__file__)) +model = None + +def unload_uniformer_model(): + global model + if model is not None: + model = model.cpu() + +def apply_uniformer(img): + global model + if model is None: + modelpath = os.path.join(modeldir, "upernet_global_small.pth") + old_modelpath = os.path.join(old_modeldir, "upernet_global_small.pth") + if os.path.exists(old_modelpath): + modelpath = old_modelpath + elif not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(checkpoint_file, model_dir=modeldir) + + model = init_segmentor(config_file, modelpath, device=devices.get_device_for("controlnet")) + model = model.to(devices.get_device_for("controlnet")) + + if devices.get_device_for("controlnet").type == 'mps': + # adaptive_avg_pool2d can fail on MPS, workaround with CPU + import torch.nn.functional + + orig_adaptive_avg_pool2d = torch.nn.functional.adaptive_avg_pool2d + def cpu_if_exception(input, *args, **kwargs): + try: + return orig_adaptive_avg_pool2d(input, *args, **kwargs) + except: + return orig_adaptive_avg_pool2d(input.cpu(), *args, **kwargs).to(input.device) + + try: + torch.nn.functional.adaptive_avg_pool2d = cpu_if_exception + result = inference_segmentor(model, img) + finally: + torch.nn.functional.adaptive_avg_pool2d = orig_adaptive_avg_pool2d + else: + result = inference_segmentor(model, img) + + res_img = show_result_pyplot(model, img, result, get_palette('ade'), opacity=1) + return res_img diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/ade20k.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..efc8b4bb20c981f3db6df7eb52b3dc0744c94cc0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/ade20k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/chase_db1.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..298594ea925f87f22b37094a2ec50e370aec96a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f21867c63e1835f6fceb61f066e802fd8fd2a735 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..336c7b254fe392b4703039fec86a83acdbd2e1a5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/drive.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..06e8ff606e0d2a4514ec8b7d2c6c436a32efcbf4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/drive.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/hrf.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..242d790eb1b83e75cf6b7eaa7a35c674099311ad --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/hrf.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..ff65bad1b86d7e3a5980bb5b9fc55798dc8df5f4 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..37585abab89834b95cd5bdd993b994fca1db65f6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1d42d0c5781f56dc177d860d856bb34adce555 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..3f23b6717d53ad29f02dd15046802a2631a5076b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,9 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=[ + 'ImageSets/Segmentation/train.txt', + 'ImageSets/Segmentation/aug.txt' + ])) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/stare.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..3f71b25488cc11a6b4d582ac52b5a24e1ad1cf8e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/datasets/stare.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/default_runtime.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..b564cc4e7e7d9a67dacaaddecb100e4d8f5c005b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/default_runtime.py @@ -0,0 +1,14 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ann_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cb653827e44e6015b3b83bc578003e614a6aa1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f5316cbcf3896ba9de7ca2c801eba512f01d5e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..794148f576b9e215c3c6963e73dffe98204b7717 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/cgnet.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..eff8d9458c877c5db894957e0b1b4597e40da6ab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/cgnet.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/danet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..2c934939fac48525f22ad86f489a041dd7db7d09 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a43bee01422ad4795dd27874e0cd4bb6cbfecf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..0cd262999d8b2cb8e14a5c32190ae73f479d8e81 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..050e39e091d816df9028d23aa3ecf9db74e441e1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d22ba52640bebd805b3b8d07025e276dfb023759 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..edb4c174c51e34c103737ba39bfc48bf831e561d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..26adcd430926de0862204a71d345f2543167f27b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..be777123a886503172a95fe0719e956a147bbd68 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fast_scnn.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..32fdeb659355a5ce5ef2cc7c2f30742703811cdf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fast_scnn.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_hr18.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c3e299bc89ada56ca14bbffcbdb08a586b8ed9e9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5e98f6cc918b6146fc6d613c6918e825ef1355c3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..a33e7972877f902d0e7d18401ca675e3e4e60a18 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_r50.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..86ab327db92e44c14822d65f1c9277cb007f17c1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_uniformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..8aae98c5991055bfcc08e82ccdc09f8b1d9f8a8d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/fpn_uniformer.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole') +) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..3d2ad69f5c22adfe79d5fdabf920217628987166 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..93258242a90695cc94a7c6bd41562d6a75988771 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,25 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5674a39854cafd1f2e363bac99c58ccae62f24da --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c60f62a7cdf3f5c5096a7a7e725e8268fddcb057 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..615aa3ff703942b6c22b2d6e9642504dd3e41ebd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pointrend_r50.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..9d323dbf9466d41e0800aa57ef84045f3d874bdf --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..689513fa9d2a40f14bf0ae4ae61f38f0dcc1b3da --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..f451e08ad2eb0732dcb806b1851eb978d4acf136 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..fcff9ec4f41fad158344ecd77313dc14564f3682 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_r50.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..10974962fdd7136031fd06de1700f497d355ceaa --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_uniformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..41aa4db809dc6e2c508e98051f61807d07477903 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/models/upernet_uniformer.py @@ -0,0 +1,43 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_160k.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000000000000000000000000000000000..52603890b10f25faf8eec9f9e5a4468fae09b811 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_20k.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000000000000000000000000000000000000..bf780a1b6f6521833c6a5859675147824efa599d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_40k.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000000000000000000000000000000000000..cdbf841abcb26eed87bf76ab816aff4bae0630ee --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_80k.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000000000000000000000000000000000000..c190cee6bdc7922b688ea75dc8f152fa15c24617 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU') diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/inference.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..667d3e7ed5761bbe742226eb82f85bd952ca13bd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/inference.py @@ -0,0 +1,144 @@ + +import torch + +try: + import mmcv as mmcv + from mmcv.parallel import collate, scatter + from mmcv.runner import load_checkpoint + from mmseg.datasets.pipelines import Compose + from mmseg.models import build_segmentor +except ImportError: + import annotator.mmpkg.mmcv as mmcv + from annotator.mmpkg.mmcv.parallel import collate, scatter + from annotator.mmpkg.mmcv.runner import load_checkpoint + from annotator.mmpkg.mmseg.datasets.pipelines import Compose + from annotator.mmpkg.mmseg.models import build_segmentor + +def init_segmentor(config, checkpoint=None, device='cuda:0'): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + data['img'] = [x.to(device) for x in data['img']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, + img, + result, + palette=None, + fig_size=(15, 10), + opacity=0.5, + title='', + block=True): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result( + img, result, palette=palette, show=False, opacity=opacity) + # plt.figure(figsize=fig_size) + # plt.imshow(mmcv.bgr2rgb(img)) + # plt.title(title) + # plt.tight_layout() + # plt.show(block=block) + return mmcv.bgr2rgb(img) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b958738b9fd93bfcec239c550df1d9a44b8c536 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .checkpoint import load_checkpoint + +__all__ = ['load_checkpoint'] \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/checkpoint.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..48c1b16b53107cb1301edf6cc07ccfe6f7010da6 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/mmcv_custom/checkpoint.py @@ -0,0 +1,508 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo +from torch.nn import functional as F + +try: + import mmcv as mmcv + from mmcv.fileio import FileClient + from mmcv.fileio import load as load_file + from mmcv.parallel import is_module_wrapper + from mmcv.utils import mkdir_or_exist + from mmcv.runner import get_dist_info +except ImportError: + import annotator.mmpkg.mmcv as mmcv + from annotator.mmpkg.mmcv.fileio import FileClient + from annotator.mmpkg.mmcv.fileio import load as load_file + from annotator.mmpkg.mmcv.parallel import is_module_wrapper + from annotator.mmpkg.mmcv.utils import mkdir_or_exist + from annotator.mmpkg.mmcv.runner import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def load_url_dist(url, model_dir=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + return checkpoint + + +def load_pavimodel_dist(model_path, map_location=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load( + downloaded_file, map_location=map_location) + return checkpoint + + +def load_fileclient_dist(filename, backend, map_location): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + allowed_backends = ['ceph'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + if rank == 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + fileclient = FileClient(backend=backend) + buffer = io.BytesIO(fileclient.get(filename)) + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +def _load_checkpoint(filename, map_location=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict | OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_urls = get_torchvision_models() + model_name = filename[11:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('torchvision://'): + model_urls = get_torchvision_models() + model_name = filename[14:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('open-mmlab://'): + model_urls = get_external_models() + model_name = filename[13:] + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'open-mmlab://{model_name} is deprecated in favor ' + f'of open-mmlab://{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_url_dist(model_url) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + elif filename.startswith('mmcls://'): + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_url_dist(model_urls[model_name]) + checkpoint = _process_mmcls_checkpoint(checkpoint) + elif filename.startswith(('http://', 'https://')): + checkpoint = load_url_dist(filename) + elif filename.startswith('pavi://'): + model_path = filename[7:] + checkpoint = load_pavimodel_dist(model_path, map_location=map_location) + elif filename.startswith('s3://'): + checkpoint = load_fileclient_dist( + filename, backend='ceph', map_location=map_location) + else: + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +def load_checkpoint(model, + filename, + map_location='cpu', + strict=False, + logger=None): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # for MoBY, load model of online branch + if sorted(list(state_dict.keys()))[0].startswith('encoder'): + state_dict = {k.replace('encoder.', ''): v for k, v in state_dict.items() if k.startswith('encoder.')} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = model.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H*W: + logger.warning("Error in loading absolute_pos_embed, pass") + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2) + + # interpolate position bias table if needed + relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + table_current = model.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + logger.warning(f"Error in loading {table_key}, pass") + else: + if L1 != L2: + S1 = int(L1 ** 0.5) + S2 = int(L2 ** 0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).view(1, nH1, S1, S1), + size=(S2, S2), mode='bicubic') + state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0) + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + try: + from pavi import modelcloud + from pavi.exception import NodeNotFoundError + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + mmcv.mkdir_or_exist(osp.dirname(filename)) + # immediately flush buffer + with open(filename, 'wb') as f: + torch.save(checkpoint, f) + f.flush() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/uniformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..f5726fbe63888e0d7a85563308ffd2ab526fed32 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/uniformer.py @@ -0,0 +1,426 @@ +# -------------------------------------------------------- +# UniFormer +# Copyright (c) 2022 SenseTime X-Lab +# Licensed under The MIT License [see LICENSE for details] +# Written by Kunchang Li +# -------------------------------------------------------- + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint + +from functools import partial +from collections import OrderedDict +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +try: + from mmseg.utils import get_root_logger + from mmseg.models.builder import BACKBONES +except ImportError: + from annotator.mmpkg.mmseg.utils import get_root_logger + from annotator.mmpkg.mmseg.models.builder import BACKBONES + +from annotator.uniformer.mmcv_custom import load_checkpoint + + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CMlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.act = act_layer() + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class CBlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = nn.BatchNorm2d(dim) + self.conv1 = nn.Conv2d(dim, dim, 1) + self.conv2 = nn.Conv2d(dim, dim, 1) + self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = nn.BatchNorm2d(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x))))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SABlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + B, N, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.transpose(1, 2).reshape(B, N, H, W) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class SABlock_Windows(nn.Module): + def __init__(self, dim, num_heads, window_size=14, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.window_size=window_size + self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim) + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.pos_embed(x) + x = x.permute(0, 2, 3, 1) + B, H, W, C = x.shape + shortcut = x + x = self.norm1(x) + + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + x_windows = window_partition(x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + x = x.permute(0, 3, 1, 2).reshape(B, C, H, W) + return x + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + self.norm = nn.LayerNorm(embed_dim) + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + B, _, H, W = x.shape + x = self.proj(x) + B, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + return x + + +@BACKBONES.register_module() +class UniFormer(nn.Module): + """ Vision Transformer + A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - + https://arxiv.org/abs/2010.11929 + """ + def __init__(self, layers=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=80, embed_dim=[64, 128, 320, 512], + head_dim=64, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, + drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=partial(nn.LayerNorm, eps=1e-6), + pretrained_path=None, use_checkpoint=False, checkpoint_num=[0, 0, 0, 0], + windows=False, hybrid=False, window_size=14): + """ + Args: + layer (list): number of block in each layer + img_size (int, tuple): input image size + in_chans (int): number of input channels + num_classes (int): number of classes for classification head + embed_dim (int): embedding dimension + head_dim (int): dimension of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + qk_scale (float): override default qk scale of head_dim ** -0.5 if set + representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set + drop_rate (float): dropout rate + attn_drop_rate (float): attention dropout rate + drop_path_rate (float): stochastic depth rate + norm_layer (nn.Module): normalization layer + pretrained_path (str): path of pretrained model + use_checkpoint (bool): whether use checkpoint + checkpoint_num (list): index for using checkpoint in every stage + windows (bool): whether use window MHRA + hybrid (bool): whether use hybrid MHRA + window_size (int): size of window (>14) + """ + super().__init__() + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.checkpoint_num = checkpoint_num + self.windows = windows + print(f'Use Checkpoint: {self.use_checkpoint}') + print(f'Checkpoint Number: {self.checkpoint_num}') + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) + + self.patch_embed1 = PatchEmbed( + img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0]) + self.patch_embed2 = PatchEmbed( + img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1]) + self.patch_embed3 = PatchEmbed( + img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2]) + self.patch_embed4 = PatchEmbed( + img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3]) + + self.pos_drop = nn.Dropout(p=drop_rate) + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] # stochastic depth decay rule + num_heads = [dim // head_dim for dim in embed_dim] + self.blocks1 = nn.ModuleList([ + CBlock( + dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) + for i in range(layers[0])]) + self.norm1=norm_layer(embed_dim[0]) + self.blocks2 = nn.ModuleList([ + CBlock( + dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]], norm_layer=norm_layer) + for i in range(layers[1])]) + self.norm2 = norm_layer(embed_dim[1]) + if self.windows: + print('Use local window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + elif hybrid: + print('Use hybrid window for blocks in stage3') + block3 = [] + for i in range(layers[2]): + if (i + 1) % 4 == 0: + block3.append(SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + else: + block3.append(SABlock_Windows( + dim=embed_dim[2], num_heads=num_heads[2], window_size=window_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer)) + self.blocks3 = nn.ModuleList(block3) + else: + print('Use global window for all blocks in stage3') + self.blocks3 = nn.ModuleList([ + SABlock( + dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]], norm_layer=norm_layer) + for i in range(layers[2])]) + self.norm3 = norm_layer(embed_dim[2]) + self.blocks4 = nn.ModuleList([ + SABlock( + dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+layers[0]+layers[1]+layers[2]], norm_layer=norm_layer) + for i in range(layers[3])]) + self.norm4 = norm_layer(embed_dim[3]) + + # Representation layer + if representation_size: + self.num_features = representation_size + self.pre_logits = nn.Sequential(OrderedDict([ + ('fc', nn.Linear(embed_dim, representation_size)), + ('act', nn.Tanh()) + ])) + else: + self.pre_logits = nn.Identity() + + self.apply(self._init_weights) + self.init_weights(pretrained=pretrained_path) + + def init_weights(self, pretrained): + if isinstance(pretrained, str): + logger = get_root_logger() + load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + print(f'Load pretrained model from {pretrained}') + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed', 'cls_token'} + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + out = [] + x = self.patch_embed1(x) + x = self.pos_drop(x) + for i, blk in enumerate(self.blocks1): + if self.use_checkpoint and i < self.checkpoint_num[0]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm1(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed2(x) + for i, blk in enumerate(self.blocks2): + if self.use_checkpoint and i < self.checkpoint_num[1]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm2(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed3(x) + for i, blk in enumerate(self.blocks3): + if self.use_checkpoint and i < self.checkpoint_num[2]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm3(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + x = self.patch_embed4(x) + for i, blk in enumerate(self.blocks4): + if self.use_checkpoint and i < self.checkpoint_num[3]: + x = checkpoint.checkpoint(blk, x) + else: + x = blk(x) + x_out = self.norm4(x.permute(0, 2, 3, 1)) + out.append(x_out.permute(0, 3, 1, 2).contiguous()) + return tuple(out) + + def forward(self, x): + x = self.forward_features(x) + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/upernet_global_small.py b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/upernet_global_small.py new file mode 100644 index 0000000000000000000000000000000000000000..16b14768b80035b52a9a975af67c23c1c7693265 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/uniformer/upernet_global_small.py @@ -0,0 +1,44 @@ +_base_ = [ + 'configs/_base_/models/upernet_uniformer.py', + 'configs/_base_/datasets/ade20k.py', + 'configs/_base_/default_runtime.py', + 'configs/_base_/schedules/schedule_160k.py' +] + +custom_imports = dict( + imports=['annotator.uniformer.uniformer'], + allow_failed_imports=False +) + +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/util.py b/extensions-builtin/forge_legacy_preprocessors/annotator/util.py new file mode 100644 index 0000000000000000000000000000000000000000..d4fa0473d037c183122105f4865254dfc2fc204f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/util.py @@ -0,0 +1,79 @@ +import numpy as np +import cv2 +import os + + +def load_model(filename: str, remote_url: str, model_dir: str) -> str: + """ + Load the model from the specified filename and remote URL if it doesn't exist locally. + + Args: + filename (str): The filename of the model. + remote_url (str): The remote URL of the model. + """ + local_path = os.path.join(model_dir, filename) + if not os.path.exists(local_path): + from modules.modelloader import load_file_from_url + + load_file_from_url(remote_url, model_dir=model_dir) + return local_path + + +def HWC3(x): + assert x.dtype == np.uint8 + if x.ndim == 2: + x = x[:, :, None] + assert x.ndim == 3 + H, W, C = x.shape + assert C == 1 or C == 3 or C == 4 + if C == 3: + return x + if C == 1: + return np.concatenate([x, x, x], axis=2) + if C == 4: + color = x[:, :, 0:3].astype(np.float32) + alpha = x[:, :, 3:4].astype(np.float32) / 255.0 + y = color * alpha + 255.0 * (1.0 - alpha) + y = y.clip(0, 255).astype(np.uint8) + return y + + +def make_noise_disk(H, W, C, F): + noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C)) + noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC) + noise = noise[F: F + H, F: F + W] + noise -= np.min(noise) + noise /= np.max(noise) + if C == 1: + noise = noise[:, :, None] + return noise + + +def nms(x, t, s): + x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s) + + f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) + f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) + f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) + f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) + + y = np.zeros_like(x) + + for f in [f1, f2, f3, f4]: + np.putmask(y, cv2.dilate(x, kernel=f) == x, x) + + z = np.zeros_like(y, dtype=np.uint8) + z[y > t] = 255 + return z + + +def min_max_norm(x): + x -= np.min(x) + x /= np.maximum(np.max(x), 1e-5) + return x + + +def safe_step(x, step=2): + y = x.astype(np.float32) * float(step + 1) + y = y.astype(np.int32).astype(np.float32) / float(step) + return y diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..7a1e90d007836c327846ce8e5151013b115042ab --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Intelligent Systems Lab Org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b5f7f4d47f7924f5523580c2969f0cfe6f3f861d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/__init__.py @@ -0,0 +1,59 @@ +import os +import cv2 +import numpy as np +import torch + +from einops import rearrange +from .zoedepth.models.zoedepth.zoedepth_v1 import ZoeDepth +from .zoedepth.utils.config import get_config +from modules import devices +from annotator.annotator_path import models_path + + +class ZoeDetector: + model_dir = os.path.join(models_path, "zoedepth") + + def __init__(self): + self.model = None + self.device = devices.get_device_for("controlnet") + + def load_model(self): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" + modelpath = os.path.join(self.model_dir, "ZoeD_M12_N.pt") + if not os.path.exists(modelpath): + from modules.modelloader import load_file_from_url + load_file_from_url(remote_model_path, model_dir=self.model_dir) + conf = get_config("zoedepth", "infer") + model = ZoeDepth.build_from_config(conf) + model.load_state_dict(torch.load(modelpath, map_location=model.device)['model'], strict=False) + model.eval() + self.model = model.to(self.device) + + def unload_model(self): + if self.model is not None: + self.model.cpu() + + def __call__(self, input_image): + if self.model is None: + self.load_model() + self.model.to(self.device) + + assert input_image.ndim == 3 + image_depth = input_image + with torch.no_grad(): + image_depth = torch.from_numpy(image_depth).float().to(self.device) + image_depth = image_depth / 255.0 + image_depth = rearrange(image_depth, 'h w c -> 1 c h w') + depth = self.model.infer(image_depth) + + depth = depth[0, 0].cpu().numpy() + + vmin = np.percentile(depth, 2) + vmax = np.percentile(depth, 85) + + depth -= vmin + depth /= vmax - vmin + depth = 1.0 - depth + depth_image = (depth * 255.0).clip(0, 255).astype(np.uint8) + + return depth_image diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f2668792389157609abb2a0846fb620e7d67eb9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f2668792389157609abb2a0846fb620e7d67eb9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas.py new file mode 100644 index 0000000000000000000000000000000000000000..172778b306b87fc056aaa0eaacdde269ec4f7cc2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas.py @@ -0,0 +1,379 @@ +# MIT License +import os + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn +import numpy as np +from torchvision.transforms import Normalize + + +def denormalize(x): + """Reverses the imagenet normalization applied to the input. + + Args: + x (torch.Tensor - shape(N,3,H,W)): input tensor + + Returns: + torch.Tensor - shape(N,3,H,W): Denormalized input + """ + mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) + std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) + return x * std + mean + +def get_activation(name, bank): + def hook(model, input, output): + bank[name] = output + return hook + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + ): + """Init. + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + print("Params passed to Resize transform:") + print("\twidth: ", width) + print("\theight: ", height) + print("\tresize_target: ", resize_target) + print("\tkeep_aspect_ratio: ", keep_aspect_ratio) + print("\tensure_multiple_of: ", ensure_multiple_of) + print("\tresize_method: ", resize_method) + + self.__width = width + self.__height = height + + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, x): + width, height = self.get_size(*x.shape[-2:][::-1]) + return nn.functional.interpolate(x, (int(height), int(width)), mode='bilinear', align_corners=True) + +class PrepForMidas(object): + def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True): + if isinstance(img_size, int): + img_size = (img_size, img_size) + net_h, net_w = img_size + self.normalization = Normalize( + mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \ + if do_resize else nn.Identity() + + def __call__(self, x): + return self.normalization(self.resizer(x)) + + +class MidasCore(nn.Module): + def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True, + img_size=384, **kwargs): + """Midas Base model used for multi-scale feature extraction. + + Args: + midas (torch.nn.Module): Midas model. + trainable (bool, optional): Train midas model. Defaults to False. + fetch_features (bool, optional): Extract multi-scale features. Defaults to True. + layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'). + freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False. + keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True. + img_size (int, tuple, optional): Input resolution. Defaults to 384. + """ + super().__init__() + self.core = midas + self.output_channels = None + self.core_out = {} + self.trainable = trainable + self.fetch_features = fetch_features + # midas.scratch.output_conv = nn.Identity() + self.handles = [] + # self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1'] + self.layer_names = layer_names + + self.set_trainable(trainable) + self.set_fetch_features(fetch_features) + + self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio, + img_size=img_size, do_resize=kwargs.get('do_resize', True)) + + if freeze_bn: + self.freeze_bn() + + def set_trainable(self, trainable): + self.trainable = trainable + if trainable: + self.unfreeze() + else: + self.freeze() + return self + + def set_fetch_features(self, fetch_features): + self.fetch_features = fetch_features + if fetch_features: + if len(self.handles) == 0: + self.attach_hooks(self.core) + else: + self.remove_hooks() + return self + + def freeze(self): + for p in self.parameters(): + p.requires_grad = False + self.trainable = False + return self + + def unfreeze(self): + for p in self.parameters(): + p.requires_grad = True + self.trainable = True + return self + + def freeze_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + return self + + def forward(self, x, denorm=False, return_rel_depth=False): + with torch.no_grad(): + if denorm: + x = denormalize(x) + x = self.prep(x) + # print("Shape after prep: ", x.shape) + + with torch.set_grad_enabled(self.trainable): + + # print("Input size to Midascore", x.shape) + rel_depth = self.core(x) + # print("Output from midas shape", rel_depth.shape) + if not self.fetch_features: + return rel_depth + out = [self.core_out[k] for k in self.layer_names] + + if return_rel_depth: + return rel_depth, out + return out + + def get_rel_pos_params(self): + for name, p in self.core.pretrained.named_parameters(): + if "relative_position" in name: + yield p + + def get_enc_params_except_rel_pos(self): + for name, p in self.core.pretrained.named_parameters(): + if "relative_position" not in name: + yield p + + def freeze_encoder(self, freeze_rel_pos=False): + if freeze_rel_pos: + for p in self.core.pretrained.parameters(): + p.requires_grad = False + else: + for p in self.get_enc_params_except_rel_pos(): + p.requires_grad = False + return self + + def attach_hooks(self, midas): + if len(self.handles) > 0: + self.remove_hooks() + if "out_conv" in self.layer_names: + self.handles.append(list(midas.scratch.output_conv.children())[ + 3].register_forward_hook(get_activation("out_conv", self.core_out))) + if "r4" in self.layer_names: + self.handles.append(midas.scratch.refinenet4.register_forward_hook( + get_activation("r4", self.core_out))) + if "r3" in self.layer_names: + self.handles.append(midas.scratch.refinenet3.register_forward_hook( + get_activation("r3", self.core_out))) + if "r2" in self.layer_names: + self.handles.append(midas.scratch.refinenet2.register_forward_hook( + get_activation("r2", self.core_out))) + if "r1" in self.layer_names: + self.handles.append(midas.scratch.refinenet1.register_forward_hook( + get_activation("r1", self.core_out))) + if "l4_rn" in self.layer_names: + self.handles.append(midas.scratch.layer4_rn.register_forward_hook( + get_activation("l4_rn", self.core_out))) + + return self + + def remove_hooks(self): + for h in self.handles: + h.remove() + return self + + def __del__(self): + self.remove_hooks() + + def set_output_channels(self, model_type): + self.output_channels = MIDAS_SETTINGS[model_type] + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs): + if midas_model_type not in MIDAS_SETTINGS: + raise ValueError( + f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}") + if "img_size" in kwargs: + kwargs = MidasCore.parse_img_size(kwargs) + img_size = kwargs.pop("img_size", [384, 384]) + print("img_size", img_size) + midas_path = os.path.join(os.path.dirname(__file__), 'midas_repo') + midas = torch.hub.load(midas_path, midas_model_type, + pretrained=use_pretrained_midas, force_reload=force_reload, source='local') + kwargs.update({'keep_aspect_ratio': force_keep_ar}) + midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features, + freeze_bn=freeze_bn, img_size=img_size, **kwargs) + midas_core.set_output_channels(midas_model_type) + return midas_core + + @staticmethod + def build_from_config(config): + return MidasCore.build(**config) + + @staticmethod + def parse_img_size(config): + assert 'img_size' in config + if isinstance(config['img_size'], str): + assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W" + config['img_size'] = list(map(int, config['img_size'].split(","))) + assert len( + config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W" + elif isinstance(config['img_size'], int): + config['img_size'] = [config['img_size'], config['img_size']] + else: + assert isinstance(config['img_size'], list) and len( + config['img_size']) == 2, "img_size should be a list of H,W" + return config + + +nchannels2models = { + tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"], + (512, 256, 128, 64, 64): ["MiDaS_small"] +} + +# Model name to number of output channels +MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items() + for m in v + } diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/.gitignore b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a13c80028de3d297de4a3f09cee1b20759acc006 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +*.png +*.pfm +*.jpg +*.jpeg +*.pt \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/Dockerfile b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..466bc94ba3128ea9cbe4bde82bd2fd1fc9daa8af --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/Dockerfile @@ -0,0 +1,29 @@ +# enables cuda support in docker +FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04 + +# install python 3.6, pip and requirements for opencv-python +# (see https://github.com/NVIDIA/nvidia-docker/issues/864) +RUN apt-get update && apt-get -y install \ + python3 \ + python3-pip \ + libsm6 \ + libxext6 \ + libxrender-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# install python dependencies +RUN pip3 install --upgrade pip +RUN pip3 install torch~=1.8 torchvision opencv-python-headless~=3.4 timm + +# copy inference code +WORKDIR /opt/MiDaS +COPY ./midas ./midas +COPY ./*.py ./ + +# download model weights so the docker image can be used offline +RUN cd weights && {curl -OL https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt; cd -; } +RUN python3 run.py --model_type dpt_hybrid; exit 0 + +# entrypoint (dont forget to mount input and output directories) +CMD python3 run.py --model_type dpt_hybrid diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..277b5c11be103f028a8d10985139f1da10c2f08e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9568ea71c755b6938ee5482ba9f09be722e75943 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/README.md @@ -0,0 +1,259 @@ +## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer + +This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): + +>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + + +and our [preprint](https://arxiv.org/abs/2103.13413): + +> Vision Transformers for Dense Prediction +> René Ranftl, Alexey Bochkovskiy, Vladlen Koltun + + +MiDaS was trained on up to 12 datasets (ReDWeb, DIML, Movies, MegaDepth, WSVD, TartanAir, HRWSI, ApolloScape, BlendedMVS, IRS, KITTI, NYU Depth V2) with +multi-objective optimization. +The original model that was trained on 5 datasets (`MIX 5` in the paper) can be found [here](https://github.com/isl-org/MiDaS/releases/tag/v2). +The figure below shows an overview of the different MiDaS models; the bubble size scales with number of parameters. + +![](figures/Improvement_vs_FPS.png) + +### Setup + +1) Pick one or more models and download the corresponding weights to the `weights` folder: + +MiDaS 3.1 +- For highest quality: [dpt_beit_large_512](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt) +- For moderately less quality, but better speed-performance trade-off: [dpt_swin2_large_384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_large_384.pt) +- For embedded devices: [dpt_swin2_tiny_256](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_tiny_256.pt), [dpt_levit_224](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_levit_224.pt) +- For inference on Intel CPUs, OpenVINO may be used for the small legacy model: openvino_midas_v21_small [.xml](https://github.com/isl-org/MiDaS/releases/download/v3_1/openvino_midas_v21_small_256.xml), [.bin](https://github.com/isl-org/MiDaS/releases/download/v3_1/openvino_midas_v21_small_256.bin) + +MiDaS 3.0: Legacy transformer models [dpt_large_384](https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt) and [dpt_hybrid_384](https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt) + +MiDaS 2.1: Legacy convolutional models [midas_v21_384](https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_384.pt) and [midas_v21_small_256](https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt) + +1) Set up dependencies: + + ```shell + conda env create -f environment.yaml + conda activate midas-py310 + ``` + +#### optional + +For the Next-ViT model, execute + +```shell +git submodule add https://github.com/isl-org/Next-ViT midas/external/next_vit +``` + +For the OpenVINO model, install + +```shell +pip install openvino +``` + +### Usage + +1) Place one or more input images in the folder `input`. + +2) Run the model with + + ```shell + python run.py --model_type --input_path input --output_path output + ``` + where `````` is chosen from [dpt_beit_large_512](#model_type), [dpt_beit_large_384](#model_type), + [dpt_beit_base_384](#model_type), [dpt_swin2_large_384](#model_type), [dpt_swin2_base_384](#model_type), + [dpt_swin2_tiny_256](#model_type), [dpt_swin_large_384](#model_type), [dpt_next_vit_large_384](#model_type), + [dpt_levit_224](#model_type), [dpt_large_384](#model_type), [dpt_hybrid_384](#model_type), + [midas_v21_384](#model_type), [midas_v21_small_256](#model_type), [openvino_midas_v21_small_256](#model_type). + +3) The resulting depth maps are written to the `output` folder. + +#### optional + +1) By default, the inference resizes the height of input images to the size of a model to fit into the encoder. This + size is given by the numbers in the model names of the [accuracy table](#accuracy). Some models do not only support a single + inference height but a range of different heights. Feel free to explore different heights by appending the extra + command line argument `--height`. Unsupported height values will throw an error. Note that using this argument may + decrease the model accuracy. +2) By default, the inference keeps the aspect ratio of input images when feeding them into the encoder if this is + supported by a model (all models except for Swin, Swin2, LeViT). In order to resize to a square resolution, + disregarding the aspect ratio while preserving the height, use the command line argument `--square`. + +#### via Camera + + If you want the input images to be grabbed from the camera and shown in a window, leave the input and output paths + away and choose a model type as shown above: + + ```shell + python run.py --model_type --side + ``` + + The argument `--side` is optional and causes both the input RGB image and the output depth map to be shown + side-by-side for comparison. + +#### via Docker + +1) Make sure you have installed Docker and the + [NVIDIA Docker runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Installation-\(Native-GPU-Support\)). + +2) Build the Docker image: + + ```shell + docker build -t midas . + ``` + +3) Run inference: + + ```shell + docker run --rm --gpus all -v $PWD/input:/opt/MiDaS/input -v $PWD/output:/opt/MiDaS/output -v $PWD/weights:/opt/MiDaS/weights midas + ``` + + This command passes through all of your NVIDIA GPUs to the container, mounts the + `input` and `output` directories and then runs the inference. + +#### via PyTorch Hub + +The pretrained model is also available on [PyTorch Hub](https://pytorch.org/hub/intelisl_midas_v2/) + +#### via TensorFlow or ONNX + +See [README](https://github.com/isl-org/MiDaS/tree/master/tf) in the `tf` subdirectory. + +Currently only supports MiDaS v2.1. + + +#### via Mobile (iOS / Android) + +See [README](https://github.com/isl-org/MiDaS/tree/master/mobile) in the `mobile` subdirectory. + +#### via ROS1 (Robot Operating System) + +See [README](https://github.com/isl-org/MiDaS/tree/master/ros) in the `ros` subdirectory. + +Currently only supports MiDaS v2.1. DPT-based models to be added. + + +### Accuracy + +We provide a **zero-shot error** $\epsilon_d$ which is evaluated for 6 different datasets +(see [paper](https://arxiv.org/abs/1907.01341v3)). **Lower error values are better**. +$\color{green}{\textsf{Overall model quality is represented by the improvement}}$ ([Imp.](#improvement)) with respect to +MiDaS 3.0 DPTL-384. The models are grouped by the height used for inference, whereas the square training resolution is given by +the numbers in the model names. The table also shows the **number of parameters** (in millions) and the +**frames per second** for inference at the training resolution (for GPU RTX 3090): + +| MiDaS Model | DIW
WHDR | Eth3d
AbsRel | Sintel
AbsRel | TUM
δ1 | KITTI
δ1 | NYUv2
δ1 | $\color{green}{\textsf{Imp.}}$
% | Par.
M | FPS
  | +|-----------------------------------------------------------------------------------------------------------------------|-------------------------:|-----------------------------:|------------------------------:|-------------------------:|-------------------------:|-------------------------:|-------------------------------------------------:|----------------------:|--------------------------:| +| **Inference height 512** | | | | | | | | | | +| [v3.1 BEiTL-512](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt) | 0.1137 | 0.0659 | 0.2366 | **6.13** | 11.56* | **1.86*** | $\color{green}{\textsf{19}}$ | **345** | **5.7** | +| [v3.1 BEiTL-512](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt)$\tiny{\square}$ | **0.1121** | **0.0614** | **0.2090** | 6.46 | **5.00*** | 1.90* | $\color{green}{\textsf{34}}$ | **345** | **5.7** | +| | | | | | | | | | | +| **Inference height 384** | | | | | | | | | | +| [v3.1 BEiTL-512](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt) | 0.1245 | 0.0681 | **0.2176** | **6.13** | 6.28* | **2.16*** | $\color{green}{\textsf{28}}$ | 345 | 12 | +| [v3.1 Swin2L-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_large_384.pt)$\tiny{\square}$ | 0.1106 | 0.0732 | 0.2442 | 8.87 | **5.84*** | 2.92* | $\color{green}{\textsf{22}}$ | 213 | 41 | +| [v3.1 Swin2B-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_base_384.pt)$\tiny{\square}$ | 0.1095 | 0.0790 | 0.2404 | 8.93 | 5.97* | 3.28* | $\color{green}{\textsf{22}}$ | 102 | 39 | +| [v3.1 SwinL-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin_large_384.pt)$\tiny{\square}$ | 0.1126 | 0.0853 | 0.2428 | 8.74 | 6.60* | 3.34* | $\color{green}{\textsf{17}}$ | 213 | 49 | +| [v3.1 BEiTL-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_384.pt) | 0.1239 | **0.0667** | 0.2545 | 7.17 | 9.84* | 2.21* | $\color{green}{\textsf{17}}$ | 344 | 13 | +| [v3.1 Next-ViTL-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_next_vit_large_384.pt) | **0.1031** | 0.0954 | 0.2295 | 9.21 | 6.89* | 3.47* | $\color{green}{\textsf{16}}$ | **72** | 30 | +| [v3.1 BEiTB-384](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_base_384.pt) | 0.1159 | 0.0967 | 0.2901 | 9.88 | 26.60* | 3.91* | $\color{green}{\textsf{-31}}$ | 112 | 31 | +| [v3.0 DPTL-384](https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt) | 0.1082 | 0.0888 | 0.2697 | 9.97 | 8.46 | 8.32 | $\color{green}{\textsf{0}}$ | 344 | **61** | +| [v3.0 DPTH-384](https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt) | 0.1106 | 0.0934 | 0.2741 | 10.89 | 11.56 | 8.69 | $\color{green}{\textsf{-10}}$ | 123 | 50 | +| [v2.1 Large384](https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_384.pt) | 0.1295 | 0.1155 | 0.3285 | 12.51 | 16.08 | 8.71 | $\color{green}{\textsf{-32}}$ | 105 | 47 | +| | | | | | | | | | | +| **Inference height 256** | | | | | | | | | | +| [v3.1 Swin2T-256](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_tiny_256.pt)$\tiny{\square}$ | **0.1211** | **0.1106** | **0.2868** | **13.43** | **10.13*** | **5.55*** | $\color{green}{\textsf{-11}}$ | 42 | 64 | +| [v2.1 Small256](https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt) | 0.1344 | 0.1344 | 0.3370 | 14.53 | 29.27 | 13.43 | $\color{green}{\textsf{-76}}$ | **21** | **90** | +| | | | | | | | | | | +| **Inference height 224** | | | | | | | | | | +| [v3.1 LeViT224](https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_levit_224.pt)$\tiny{\square}$ | **0.1314** | **0.1206** | **0.3148** | **18.21** | **15.27*** | **8.64*** | $\color{green}{\textsf{-40}}$ | **51** | **73** | + +* No zero-shot error, because models are also trained on KITTI and NYU Depth V2\ +$\square$ Validation performed at **square resolution**, either because the transformer encoder backbone of a model +does not support non-square resolutions (Swin, Swin2, LeViT) or for comparison with these models. All other +validations keep the aspect ratio. A difference in resolution limits the comparability of the zero-shot error and the +improvement, because these quantities are averages over the pixels of an image and do not take into account the +advantage of more details due to a higher resolution.\ +Best values per column and same validation height in bold + +#### Improvement + +The improvement in the above table is defined as the relative zero-shot error with respect to MiDaS v3.0 +DPTL-384 and averaging over the datasets. So, if $\epsilon_d$ is the zero-shot error for dataset $d$, then +the $\color{green}{\textsf{improvement}}$ is given by $100(1-(1/6)\sum_d\epsilon_d/\epsilon_{d,\rm{DPT_{L-384}}})$%. + +Note that the improvements of 10% for MiDaS v2.0 → v2.1 and 21% for MiDaS v2.1 → v3.0 are not visible from the +improvement column (Imp.) in the table but would require an evaluation with respect to MiDaS v2.1 Large384 +and v2.0 Large384 respectively instead of v3.0 DPTL-384. + +### Depth map comparison + +Zoom in for better visibility +![](figures/Comparison.png) + +### Speed on Camera Feed + +Test configuration +- Windows 10 +- 11th Gen Intel Core i7-1185G7 3.00GHz +- 16GB RAM +- Camera resolution 640x480 +- openvino_midas_v21_small_256 + +Speed: 22 FPS + +### Changelog + +* [Dec 2022] Released MiDaS v3.1: + - New models based on 5 different types of transformers ([BEiT](https://arxiv.org/pdf/2106.08254.pdf), [Swin2](https://arxiv.org/pdf/2111.09883.pdf), [Swin](https://arxiv.org/pdf/2103.14030.pdf), [Next-ViT](https://arxiv.org/pdf/2207.05501.pdf), [LeViT](https://arxiv.org/pdf/2104.01136.pdf)) + - Training datasets extended from 10 to 12, including also KITTI and NYU Depth V2 using [BTS](https://github.com/cleinc/bts) split + - Best model, BEiTLarge 512, with resolution 512x512, is on average about [28% more accurate](#Accuracy) than MiDaS v3.0 + - Integrated live depth estimation from camera feed +* [Sep 2021] Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See [Gradio Web Demo](https://huggingface.co/spaces/akhaliq/DPT-Large). +* [Apr 2021] Released MiDaS v3.0: + - New models based on [Dense Prediction Transformers](https://arxiv.org/abs/2103.13413) are on average [21% more accurate](#Accuracy) than MiDaS v2.1 + - Additional models can be found [here](https://github.com/isl-org/DPT) +* [Nov 2020] Released MiDaS v2.1: + - New model that was trained on 10 datasets and is on average about [10% more accurate](#Accuracy) than [MiDaS v2.0](https://github.com/isl-org/MiDaS/releases/tag/v2) + - New light-weight model that achieves [real-time performance](https://github.com/isl-org/MiDaS/tree/master/mobile) on mobile platforms. + - Sample applications for [iOS](https://github.com/isl-org/MiDaS/tree/master/mobile/ios) and [Android](https://github.com/isl-org/MiDaS/tree/master/mobile/android) + - [ROS package](https://github.com/isl-org/MiDaS/tree/master/ros) for easy deployment on robots +* [Jul 2020] Added TensorFlow and ONNX code. Added [online demo](http://35.202.76.57/). +* [Dec 2019] Released new version of MiDaS - the new model is significantly more accurate and robust +* [Jul 2019] Initial release of MiDaS ([Link](https://github.com/isl-org/MiDaS/releases/tag/v1)) + +### Citation + +Please cite our paper if you use this code or any of the models: +``` +@ARTICLE {Ranftl2022, + author = "Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun", + title = "Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-Shot Cross-Dataset Transfer", + journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", + year = "2022", + volume = "44", + number = "3" +} +``` + +If you use a DPT-based model, please also cite: + +``` +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ICCV}, + year = {2021}, +} +``` + +### Acknowledgements + +Our work builds on and uses code from [timm](https://github.com/rwightman/pytorch-image-models) and [Next-ViT](https://github.com/bytedance/Next-ViT). +We'd like to thank the authors for making these libraries available. + +### License + +MIT License diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/environment.yaml b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/environment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9abe5693b9e0de56b7d20728f4d0e6333c5822d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/environment.yaml @@ -0,0 +1,16 @@ +name: midas-py310 +channels: + - pytorch + - defaults +dependencies: + - nvidia::cudatoolkit=11.7 + - python=3.10.8 + - pytorch::pytorch=1.13.0 + - torchvision=0.14.0 + - pip=22.3.1 + - numpy=1.23.4 + - pip: + - opencv-python==4.6.0.66 + - imutils==0.5.4 + - timm==0.6.12 + - einops==0.6.0 \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/hubconf.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/hubconf.py new file mode 100644 index 0000000000000000000000000000000000000000..0d638be5151c4e305daff0c47d1ea3fc8066377d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/hubconf.py @@ -0,0 +1,435 @@ +dependencies = ["torch"] + +import torch + +from midas.dpt_depth import DPTDepthModel +from midas.midas_net import MidasNet +from midas.midas_net_custom import MidasNet_small + +def DPT_BEiT_L_512(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_BEiT_L_512 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="beitl16_512", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_BEiT_L_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_BEiT_L_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="beitl16_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_BEiT_B_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_BEiT_B_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="beitb16_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_base_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_SwinV2_L_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_SwinV2_L_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="swin2l24_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_large_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_SwinV2_B_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_SwinV2_B_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="swin2b24_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_base_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_SwinV2_T_256(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_SwinV2_T_256 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="swin2t16_256", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_tiny_256.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_Swin_L_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_Swin_L_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="swinl12_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin_large_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_Next_ViT_L_384(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_Next_ViT_L_384 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="next_vit_large_6m", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_next_vit_large_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_LeViT_224(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT_LeViT_224 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="levit_384", + non_negative=True, + head_features_1=64, + head_features_2=8, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_levit_224.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_Large(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT-Large model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="vitl16_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_Hybrid(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT-Hybrid model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="vitb_rn50_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def MiDaS(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS v2.1 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = MidasNet() + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_384.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def MiDaS_small(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS v2.1 small model for monocular depth estimation on resource-constrained devices + pretrained (bool): load pretrained weights into model + """ + + model = MidasNet_small(None, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) + + if pretrained: + checkpoint = ( + "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + + +def transforms(): + import cv2 + from torchvision.transforms import Compose + from midas.transforms import Resize, NormalizeImage, PrepareForNet + from midas import transforms + + transforms.default_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.small_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 256, + 256, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.dpt_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.beit512_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 512, + 512, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.swin384_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.swin256_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 256, + 256, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.levit_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 224, + 224, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + return transforms diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/beit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/beit.py new file mode 100644 index 0000000000000000000000000000000000000000..642a8096a047c47cb7bd089b0d56e34497083236 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/beit.py @@ -0,0 +1,198 @@ +import timm +import torch +import types + +import numpy as np +import torch.nn.functional as F + +from .utils import forward_adapted_unflatten, make_backbone_default +from timm.models.beit import gen_relative_position_index +from torch.utils.checkpoint import checkpoint +from typing import Optional + + +def forward_beit(pretrained, x): + return forward_adapted_unflatten(pretrained, x, "forward_features") + + +def patch_embed_forward(self, x): + """ + Modification of timm.models.layers.patch_embed.py: PatchEmbed.forward to support arbitrary window sizes. + """ + x = self.proj(x) + if self.flatten: + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + return x + + +def _get_rel_pos_bias(self, window_size): + """ + Modification of timm.models.beit.py: Attention._get_rel_pos_bias to support arbitrary window sizes. + """ + old_height = 2 * self.window_size[0] - 1 + old_width = 2 * self.window_size[1] - 1 + + new_height = 2 * window_size[0] - 1 + new_width = 2 * window_size[1] - 1 + + old_relative_position_bias_table = self.relative_position_bias_table + + old_num_relative_distance = self.num_relative_distance + new_num_relative_distance = new_height * new_width + 3 + + old_sub_table = old_relative_position_bias_table[:old_num_relative_distance - 3] + + old_sub_table = old_sub_table.reshape(1, old_width, old_height, -1).permute(0, 3, 1, 2) + new_sub_table = F.interpolate(old_sub_table, size=(int(new_height), int(new_width)), mode="bilinear") + new_sub_table = new_sub_table.permute(0, 2, 3, 1).reshape(new_num_relative_distance - 3, -1) + + new_relative_position_bias_table = torch.cat( + [new_sub_table, old_relative_position_bias_table[old_num_relative_distance - 3:]]) + + key = str(window_size[1]) + "," + str(window_size[0]) + if key not in self.relative_position_indices.keys(): + self.relative_position_indices[key] = gen_relative_position_index(window_size) + + relative_position_bias = new_relative_position_bias_table[ + self.relative_position_indices[key].view(-1)].view( + window_size[0] * window_size[1] + 1, + window_size[0] * window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + return relative_position_bias.unsqueeze(0) + + +def attention_forward(self, x, resolution, shared_rel_pos_bias: Optional[torch.Tensor] = None): + """ + Modification of timm.models.beit.py: Attention.forward to support arbitrary window sizes. + """ + B, N, C = x.shape + + qkv_bias = torch.cat((self.q_bias, self.k_bias, self.v_bias)) if self.q_bias is not None else None + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + if self.relative_position_bias_table is not None: + window_size = tuple(np.array(resolution) // 16) + attn = attn + self._get_rel_pos_bias(window_size) + if shared_rel_pos_bias is not None: + attn = attn + shared_rel_pos_bias + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +def block_forward(self, x, resolution, shared_rel_pos_bias: Optional[torch.Tensor] = None): + """ + Modification of timm.models.beit.py: Block.forward to support arbitrary window sizes. + """ + if hasattr(self, 'drop_path1') and not hasattr(self, 'drop_path'): + self.drop_path = self.drop_path1 + if self.gamma_1 is None: + x = x + self.drop_path(self.attn(self.norm1(x), resolution, shared_rel_pos_bias=shared_rel_pos_bias)) + x = x + self.drop_path(self.mlp(self.norm2(x))) + else: + x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x), resolution, + shared_rel_pos_bias=shared_rel_pos_bias)) + x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + return x + + +def beit_forward_features(self, x): + """ + Modification of timm.models.beit.py: Beit.forward_features to support arbitrary window sizes. + """ + resolution = x.shape[2:] + + x = self.patch_embed(x) + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + if self.pos_embed is not None: + x = x + self.pos_embed + x = self.pos_drop(x) + + rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None + for blk in self.blocks: + if self.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(blk, x, shared_rel_pos_bias=rel_pos_bias) + else: + x = blk(x, resolution, shared_rel_pos_bias=rel_pos_bias) + x = self.norm(x) + return x + + +def _make_beit_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[0, 4, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + backbone = make_backbone_default(model, features, size, hooks, vit_features, use_readout, start_index, + start_index_readout) + + backbone.model.patch_embed.forward = types.MethodType(patch_embed_forward, backbone.model.patch_embed) + backbone.model.forward_features = types.MethodType(beit_forward_features, backbone.model) + + for block in backbone.model.blocks: + attn = block.attn + attn._get_rel_pos_bias = types.MethodType(_get_rel_pos_bias, attn) + attn.forward = types.MethodType(attention_forward, attn) + attn.relative_position_indices = {} + + block.forward = types.MethodType(block_forward, block) + + return backbone + + +def _make_pretrained_beitl16_512(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_large_patch16_512", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks is None else hooks + + features = [256, 512, 1024, 1024] + + return _make_beit_backbone( + model, + features=features, + size=[512, 512], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_beitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks is None else hooks + return _make_beit_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_beitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks is None else hooks + return _make_beit_backbone( + model, + features=[96, 192, 384, 768], + hooks=hooks, + use_readout=use_readout, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py new file mode 100644 index 0000000000000000000000000000000000000000..6d023a98702a0451806d26f33f8bccf931814f10 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py @@ -0,0 +1,106 @@ +import timm +import torch +import torch.nn as nn +import numpy as np + +from .utils import activations, get_activation, Transpose + + +def forward_levit(pretrained, x): + pretrained.model.forward_features(x) + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + + layer_1 = pretrained.act_postprocess1(layer_1) + layer_2 = pretrained.act_postprocess2(layer_2) + layer_3 = pretrained.act_postprocess3(layer_3) + + return layer_1, layer_2, layer_3 + + +def _make_levit_backbone( + model, + hooks=[3, 11, 21], + patch_grid=[14, 14] +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + + pretrained.activations = activations + + patch_grid_size = np.array(patch_grid, dtype=int) + + pretrained.act_postprocess1 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) + ) + pretrained.act_postprocess2 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist())) + ) + pretrained.act_postprocess3 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist())) + ) + + return pretrained + + +class ConvTransposeNorm(nn.Sequential): + """ + Modification of + https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm + such that ConvTranspose2d is used instead of Conv2d. + """ + + def __init__( + self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1, + groups=1, bn_weight_init=1): + super().__init__() + self.add_module('c', + nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False)) + self.add_module('bn', nn.BatchNorm2d(out_chs)) + + nn.init.constant_(self.bn.weight, bn_weight_init) + + @torch.no_grad() + def fuse(self): + c, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = c.weight * w[:, None, None, None] + b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5 + m = nn.ConvTranspose2d( + w.size(1), w.size(0), w.shape[2:], stride=self.c.stride, + padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +def stem_b4_transpose(in_chs, out_chs, activation): + """ + Modification of + https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16 + such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half. + """ + return nn.Sequential( + ConvTransposeNorm(in_chs, out_chs, 3, 2, 1), + activation(), + ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1), + activation()) + + +def _make_pretrained_levit_384(pretrained, hooks=None): + model = timm.create_model("levit_384", pretrained=pretrained) + + hooks = [3, 11, 21] if hooks == None else hooks + return _make_levit_backbone( + model, + hooks=hooks + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py new file mode 100644 index 0000000000000000000000000000000000000000..8afdd8b743b5ab023a359dc3b721e601b1a40d11 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py @@ -0,0 +1,39 @@ +import timm + +import torch.nn as nn + +from pathlib import Path +from .utils import activations, forward_default, get_activation + +from ..external.next_vit.classification.nextvit import * + + +def forward_next_vit(pretrained, x): + return forward_default(pretrained, x, "forward") + + +def _make_next_vit_backbone( + model, + hooks=[2, 6, 36, 39], +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + return pretrained + + +def _make_pretrained_next_vit_large_6m(hooks=None): + model = timm.create_model("nextvit_large") + + hooks = [2, 6, 36, 39] if hooks == None else hooks + return _make_next_vit_backbone( + model, + hooks=hooks, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py new file mode 100644 index 0000000000000000000000000000000000000000..f8c71367e3e78b087f80b2ab3e2f495a9c372f1a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py @@ -0,0 +1,13 @@ +import timm + +from .swin_common import _make_swin_backbone + + +def _make_pretrained_swinl12_384(pretrained, hooks=None): + model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py new file mode 100644 index 0000000000000000000000000000000000000000..ce4c8f1d6fc1807a207dc6b9a261c6f7b14a87a3 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py @@ -0,0 +1,34 @@ +import timm + +from .swin_common import _make_swin_backbone + + +def _make_pretrained_swin2l24_384(pretrained, hooks=None): + model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) + + +def _make_pretrained_swin2b24_384(pretrained, hooks=None): + model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) + + +def _make_pretrained_swin2t16_256(pretrained, hooks=None): + model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) + + hooks = [1, 1, 5, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks, + patch_grid=[64, 64] + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py new file mode 100644 index 0000000000000000000000000000000000000000..94d63d408f18511179d90b3ac6f697385d1e556d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py @@ -0,0 +1,52 @@ +import torch + +import torch.nn as nn +import numpy as np + +from .utils import activations, forward_default, get_activation, Transpose + + +def forward_swin(pretrained, x): + return forward_default(pretrained, x) + + +def _make_swin_backbone( + model, + hooks=[1, 1, 17, 1], + patch_grid=[96, 96] +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + if hasattr(model, "patch_grid"): + used_patch_grid = model.patch_grid + else: + used_patch_grid = patch_grid + + patch_grid_size = np.array(used_patch_grid, dtype=int) + + pretrained.act_postprocess1 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) + ) + pretrained.act_postprocess2 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) + ) + pretrained.act_postprocess3 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) + ) + pretrained.act_postprocess4 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) + ) + + return pretrained diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0558899dddcfccec5f01a764d4f21738eb612149 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/utils.py @@ -0,0 +1,249 @@ +import torch + +import torch.nn as nn + + +class Slice(nn.Module): + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index:] + + +class AddReadout(nn.Module): + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index:] + readout.unsqueeze(1) + + +class ProjectReadout(nn.Module): + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index:]) + features = torch.cat((x[:, self.start_index:], readout), -1) + + return self.project(features) + + +class Transpose(nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + x = x.transpose(self.dim0, self.dim1) + return x + + +activations = {} + + +def get_activation(name): + def hook(model, input, output): + activations[name] = output + + return hook + + +def forward_default(pretrained, x, function_name="forward_features"): + exec(f"pretrained.model.{function_name}(x)") + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + if hasattr(pretrained, "act_postprocess1"): + layer_1 = pretrained.act_postprocess1(layer_1) + if hasattr(pretrained, "act_postprocess2"): + layer_2 = pretrained.act_postprocess2(layer_2) + if hasattr(pretrained, "act_postprocess3"): + layer_3 = pretrained.act_postprocess3(layer_3) + if hasattr(pretrained, "act_postprocess4"): + layer_4 = pretrained.act_postprocess4(layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def forward_adapted_unflatten(pretrained, x, function_name="forward_features"): + b, c, h, w = x.shape + + exec(f"glob = pretrained.model.{function_name}(x)") + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + layer_1 = pretrained.act_postprocess1[0:2](layer_1) + layer_2 = pretrained.act_postprocess2[0:2](layer_2) + layer_3 = pretrained.act_postprocess3[0:2](layer_3) + layer_4 = pretrained.act_postprocess4[0:2](layer_4) + + unflatten = nn.Sequential( + nn.Unflatten( + 2, + torch.Size( + [ + h // pretrained.model.patch_size[1], + w // pretrained.model.patch_size[0], + ] + ), + ) + ) + + if layer_1.ndim == 3: + layer_1 = unflatten(layer_1) + if layer_2.ndim == 3: + layer_2 = unflatten(layer_2) + if layer_3.ndim == 3: + layer_3 = unflatten(layer_3) + if layer_4.ndim == 3: + layer_4 = unflatten(layer_4) + + layer_1 = pretrained.act_postprocess1[3: len(pretrained.act_postprocess1)](layer_1) + layer_2 = pretrained.act_postprocess2[3: len(pretrained.act_postprocess2)](layer_2) + layer_3 = pretrained.act_postprocess3[3: len(pretrained.act_postprocess3)](layer_3) + layer_4 = pretrained.act_postprocess4[3: len(pretrained.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def get_readout_oper(vit_features, features, use_readout, start_index=1): + if use_readout == "ignore": + readout_oper = [Slice(start_index)] * len(features) + elif use_readout == "add": + readout_oper = [AddReadout(start_index)] * len(features) + elif use_readout == "project": + readout_oper = [ + ProjectReadout(vit_features, start_index) for out_feat in features + ] + else: + assert ( + False + ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'" + + return readout_oper + + +def make_backbone_default( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index_readout) + + # 32, 48, 136, 384 + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + return pretrained diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/vit.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..413f9693bd4548342280e329c9128c1a52cea920 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/vit.py @@ -0,0 +1,221 @@ +import torch +import torch.nn as nn +import timm +import types +import math +import torch.nn.functional as F + +from .utils import (activations, forward_adapted_unflatten, get_activation, get_readout_oper, + make_backbone_default, Transpose) + + +def forward_vit(pretrained, x): + return forward_adapted_unflatten(pretrained, x, "forward_flex") + + +def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, : self.start_index], + posemb[0, self.start_index:], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1) + + posemb = torch.cat([posemb_tok, posemb_grid], dim=1) + + return posemb + + +def forward_flex(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed( + self.pos_embed, h // self.patch_size[1], w // self.patch_size[0] + ) + + B = x.shape[0] + + if hasattr(self.patch_embed, "backbone"): + x = self.patch_embed.backbone(x) + if isinstance(x, (list, tuple)): + x = x[-1] # last feature if backbone outputs list/tuple of features + + x = self.patch_embed.proj(x).flatten(2).transpose(1, 2) + + if getattr(self, "dist_token", None) is not None: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + dist_token = self.dist_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, dist_token, x), dim=1) + else: + if self.no_embed_class: + x = x + pos_embed + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + x = torch.cat((cls_tokens, x), dim=1) + + if not self.no_embed_class: + x = x + pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + + return x + + +def _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + pretrained = make_backbone_default(model, features, size, hooks, vit_features, use_readout, start_index, + start_index_readout) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=[0, 1, 8, 11], + vit_features=768, + patch_size=[16, 16], + number_stages=2, + use_vit_only=False, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + + used_number_stages = 0 if use_vit_only else number_stages + for s in range(used_number_stages): + pretrained.model.patch_embed.backbone.stages[s].register_forward_hook( + get_activation(str(s + 1)) + ) + for s in range(used_number_stages, 4): + pretrained.model.blocks[hooks[s]].register_forward_hook(get_activation(str(s + 1))) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + for s in range(used_number_stages): + value = nn.Sequential(nn.Identity(), nn.Identity(), nn.Identity()) + exec(f"pretrained.act_postprocess{s + 1}=value") + for s in range(used_number_stages, 4): + if s < number_stages: + final_layer = nn.ConvTranspose2d( + in_channels=features[s], + out_channels=features[s], + kernel_size=4 // (2 ** s), + stride=4 // (2 ** s), + padding=0, + bias=True, + dilation=1, + groups=1, + ) + elif s > number_stages: + final_layer = nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ) + else: + final_layer = None + + layers = [ + readout_oper[s], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[s], + kernel_size=1, + stride=1, + padding=0, + ), + ] + if final_layer is not None: + layers.append(final_layer) + + value = nn.Sequential(*layers) + exec(f"pretrained.act_postprocess{s + 1}=value") + + pretrained.model.start_index = start_index + pretrained.model.patch_size = patch_size + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitb_rn50_384( + pretrained, use_readout="ignore", hooks=None, use_vit_only=False +): + model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained) + + hooks = [0, 1, 8, 11] if hooks == None else hooks + return _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf430239b47ec5ec07531263f26f5c24a2311cd --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py @@ -0,0 +1,16 @@ +import torch + + +class BaseModel(torch.nn.Module): + def load(self, path): + """Load model from file. + + Args: + path (str): file path + """ + parameters = torch.load(path, map_location=torch.device('cpu')) + + if "optimizer" in parameters: + parameters = parameters["model"] + + self.load_state_dict(parameters) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..6d87a00680bb6ed9a6d7c3043ea30a1e90361794 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py @@ -0,0 +1,439 @@ +import torch +import torch.nn as nn + +from .backbones.beit import ( + _make_pretrained_beitl16_512, + _make_pretrained_beitl16_384, + _make_pretrained_beitb16_384, + forward_beit, +) +from .backbones.swin_common import ( + forward_swin, +) +from .backbones.swin2 import ( + _make_pretrained_swin2l24_384, + _make_pretrained_swin2b24_384, + _make_pretrained_swin2t16_256, +) +from .backbones.swin import ( + _make_pretrained_swinl12_384, +) +from .backbones.levit import ( + _make_pretrained_levit_384, + forward_levit, +) +from .backbones.vit import ( + _make_pretrained_vitb_rn50_384, + _make_pretrained_vitl16_384, + _make_pretrained_vitb16_384, + forward_vit, +) + +def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, + use_vit_only=False, use_readout="ignore", in_features=[96, 256, 512, 1024]): + if backbone == "beitl16_512": + pretrained = _make_pretrained_beitl16_512( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # BEiT_512-L (backbone) + elif backbone == "beitl16_384": + pretrained = _make_pretrained_beitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # BEiT_384-L (backbone) + elif backbone == "beitb16_384": + pretrained = _make_pretrained_beitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # BEiT_384-B (backbone) + elif backbone == "swin2l24_384": + pretrained = _make_pretrained_swin2l24_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [192, 384, 768, 1536], features, groups=groups, expand=expand + ) # Swin2-L/12to24 (backbone) + elif backbone == "swin2b24_384": + pretrained = _make_pretrained_swin2b24_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [128, 256, 512, 1024], features, groups=groups, expand=expand + ) # Swin2-B/12to24 (backbone) + elif backbone == "swin2t16_256": + pretrained = _make_pretrained_swin2t16_256( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # Swin2-T/16 (backbone) + elif backbone == "swinl12_384": + pretrained = _make_pretrained_swinl12_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [192, 384, 768, 1536], features, groups=groups, expand=expand + ) # Swin-L/12 (backbone) + elif backbone == "next_vit_large_6m": + from .backbones.next_vit import _make_pretrained_next_vit_large_6m + pretrained = _make_pretrained_next_vit_large_6m(hooks=hooks) + scratch = _make_scratch( + in_features, features, groups=groups, expand=expand + ) # Next-ViT-L on ImageNet-1K-6M (backbone) + elif backbone == "levit_384": + pretrained = _make_pretrained_levit_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [384, 512, 768], features, groups=groups, expand=expand + ) # LeViT 384 (backbone) + elif backbone == "vitl16_384": + pretrained = _make_pretrained_vitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # ViT-L/16 - 85.0% Top1 (backbone) + elif backbone == "vitb_rn50_384": + pretrained = _make_pretrained_vitb_rn50_384( + use_pretrained, + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) + scratch = _make_scratch( + [256, 512, 768, 768], features, groups=groups, expand=expand + ) # ViT-H/16 - 85.0% Top1 (backbone) + elif backbone == "vitb16_384": + pretrained = _make_pretrained_vitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # ViT-B/16 - 84.6% Top1 (backbone) + elif backbone == "resnext101_wsl": + pretrained = _make_pretrained_resnext101_wsl(use_pretrained) + scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3 + elif backbone == "efficientnet_lite3": + pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable) + scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3 + else: + print(f"Backbone '{backbone}' not implemented") + assert False + + return pretrained, scratch + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + if len(in_shape) >= 4: + out_shape4 = out_shape + + if expand: + out_shape1 = out_shape + out_shape2 = out_shape*2 + out_shape3 = out_shape*4 + if len(in_shape) >= 4: + out_shape4 = out_shape*8 + + scratch.layer1_rn = nn.Conv2d( + in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer2_rn = nn.Conv2d( + in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer3_rn = nn.Conv2d( + in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + if len(in_shape) >= 4: + scratch.layer4_rn = nn.Conv2d( + in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + + return scratch + + +def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): + efficientnet = torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=use_pretrained, + exportable=exportable + ) + return _make_efficientnet_backbone(efficientnet) + + +def _make_efficientnet_backbone(effnet): + pretrained = nn.Module() + + pretrained.layer1 = nn.Sequential( + effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2] + ) + pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3]) + pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5]) + pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9]) + + return pretrained + + +def _make_resnet_backbone(resnet): + pretrained = nn.Module() + pretrained.layer1 = nn.Sequential( + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) + + pretrained.layer2 = resnet.layer2 + pretrained.layer3 = resnet.layer3 + pretrained.layer4 = resnet.layer4 + + return pretrained + + +def _make_pretrained_resnext101_wsl(use_pretrained): + resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl") + return _make_resnet_backbone(resnet) + + + +class Interpolate(nn.Module): + """Interpolation module. + """ + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners + ) + + return x + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True + ) + + return output + + + + +class ResidualConvUnit_custom(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + if self.bn==True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn==True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn==True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + # return out + x + + +class FeatureFusionBlock_custom(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand==True: + out_features = features//2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + self.size=size + + def forward(self, *xs, size=None): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + # output += res + + output = self.resConfUnit2(output) + + if (size is None) and (self.size is None): + modifier = {"scale_factor": 2} + elif size is None: + modifier = {"size": self.size} + else: + modifier = {"size": size} + + output = nn.functional.interpolate( + output, **modifier, mode="bilinear", align_corners=self.align_corners + ) + + output = self.out_conv(output) + + return output + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/dpt_depth.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/dpt_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..3129d09cb43a7c79b23916236991fabbedb78f55 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/dpt_depth.py @@ -0,0 +1,166 @@ +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import ( + FeatureFusionBlock_custom, + Interpolate, + _make_encoder, + forward_beit, + forward_swin, + forward_levit, + forward_vit, +) +from .backbones.levit import stem_b4_transpose +from timm.models.layers import get_act_layer + + +def _make_fusion_block(features, use_bn, size = None): + return FeatureFusionBlock_custom( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + size=size, + ) + + +class DPT(BaseModel): + def __init__( + self, + head, + features=256, + backbone="vitb_rn50_384", + readout="project", + channels_last=False, + use_bn=False, + **kwargs + ): + + super(DPT, self).__init__() + + self.channels_last = channels_last + + # For the Swin, Swin 2, LeViT and Next-ViT Transformers, the hierarchical architectures prevent setting the + # hooks freely. Instead, the hooks have to be chosen according to the ranges specified in the comments. + hooks = { + "beitl16_512": [5, 11, 17, 23], + "beitl16_384": [5, 11, 17, 23], + "beitb16_384": [2, 5, 8, 11], + "swin2l24_384": [1, 1, 17, 1], # Allowed ranges: [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "swin2b24_384": [1, 1, 17, 1], # [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "swin2t16_256": [1, 1, 5, 1], # [0, 1], [0, 1], [ 0, 5], [ 0, 1] + "swinl12_384": [1, 1, 17, 1], # [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "next_vit_large_6m": [2, 6, 36, 39], # [0, 2], [3, 6], [ 7, 36], [37, 39] + "levit_384": [3, 11, 21], # [0, 3], [6, 11], [14, 21] + "vitb_rn50_384": [0, 1, 8, 11], + "vitb16_384": [2, 5, 8, 11], + "vitl16_384": [5, 11, 17, 23], + }[backbone] + + if "next_vit" in backbone: + in_features = { + "next_vit_large_6m": [96, 256, 512, 1024], + }[backbone] + else: + in_features = None + + # Instantiate backbone and reassemble blocks + self.pretrained, self.scratch = _make_encoder( + backbone, + features, + False, # Set to true of you want to train from scratch, uses ImageNet weights + groups=1, + expand=False, + exportable=False, + hooks=hooks, + use_readout=readout, + in_features=in_features, + ) + + self.number_layers = len(hooks) if hooks is not None else 4 + size_refinenet3 = None + self.scratch.stem_transpose = None + + if "beit" in backbone: + self.forward_transformer = forward_beit + elif "swin" in backbone: + self.forward_transformer = forward_swin + elif "next_vit" in backbone: + from .backbones.next_vit import forward_next_vit + self.forward_transformer = forward_next_vit + elif "levit" in backbone: + self.forward_transformer = forward_levit + size_refinenet3 = 7 + self.scratch.stem_transpose = stem_b4_transpose(256, 128, get_act_layer("hard_swish")) + else: + self.forward_transformer = forward_vit + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn, size_refinenet3) + if self.number_layers >= 4: + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + self.scratch.output_conv = head + + + def forward(self, x): + if self.channels_last == True: + x.contiguous(memory_format=torch.channels_last) + + layers = self.forward_transformer(self.pretrained, x) + if self.number_layers == 3: + layer_1, layer_2, layer_3 = layers + else: + layer_1, layer_2, layer_3, layer_4 = layers + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + if self.number_layers >= 4: + layer_4_rn = self.scratch.layer4_rn(layer_4) + + if self.number_layers == 3: + path_3 = self.scratch.refinenet3(layer_3_rn, size=layer_2_rn.shape[2:]) + else: + path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:]) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:]) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:]) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + if self.scratch.stem_transpose is not None: + path_1 = self.scratch.stem_transpose(path_1) + + out = self.scratch.output_conv(path_1) + + return out + + +class DPTDepthModel(DPT): + def __init__(self, path=None, non_negative=True, **kwargs): + features = kwargs["features"] if "features" in kwargs else 256 + head_features_1 = kwargs["head_features_1"] if "head_features_1" in kwargs else features + head_features_2 = kwargs["head_features_2"] if "head_features_2" in kwargs else 32 + kwargs.pop("head_features_1", None) + kwargs.pop("head_features_2", None) + + head = nn.Sequential( + nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + super().__init__(head, **kwargs) + + if path is not None: + self.load(path) + + def forward(self, x): + return super().forward(x).squeeze(dim=1) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py new file mode 100644 index 0000000000000000000000000000000000000000..8a954977800b0a0f48807e80fa63041910e33c1f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py @@ -0,0 +1,76 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, Interpolate, _make_encoder + + +class MidasNet(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=256, non_negative=True): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet, self).__init__() + + use_pretrained = False if path is None else True + + self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) + + self.scratch.refinenet4 = FeatureFusionBlock(features) + self.scratch.refinenet3 = FeatureFusionBlock(features) + self.scratch.refinenet2 = FeatureFusionBlock(features) + self.scratch.refinenet1 = FeatureFusionBlock(features) + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + ) + + if path: + self.load(path) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net_custom.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..50e4acb5e53d5fabefe3dde16ab49c33c2b7797c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net_custom.py @@ -0,0 +1,128 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder + + +class MidasNet_small(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True, + blocks={'expand': True}): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet_small, self).__init__() + + use_pretrained = False if path else True + + self.channels_last = channels_last + self.blocks = blocks + self.backbone = backbone + + self.groups = 1 + + features1=features + features2=features + features3=features + features4=features + self.expand = False + if "expand" in self.blocks and self.blocks['expand'] == True: + self.expand = True + features1=features + features2=features*2 + features3=features*4 + features4=features*8 + + self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable) + + self.scratch.activation = nn.ReLU(False) + + self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners) + + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1), + self.scratch.activation, + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + if path: + self.load(path) + + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + if self.channels_last==True: + print("self.channels_last = ", self.channels_last) + x.contiguous(memory_format=torch.channels_last) + + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) + + + +def fuse_model(m): + prev_previous_type = nn.Identity() + prev_previous_name = '' + previous_type = nn.Identity() + previous_name = '' + for name, module in m.named_modules(): + if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU: + # print("FUSED ", prev_previous_name, previous_name, name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True) + elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d: + # print("FUSED ", prev_previous_name, previous_name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True) + # elif previous_type == nn.Conv2d and type(module) == nn.ReLU: + # print("FUSED ", previous_name, name) + # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True) + + prev_previous_type = previous_type + prev_previous_name = previous_name + previous_type = type(module) + previous_name = name \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/model_loader.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/model_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..f1cd1f2d43054bfd3d650587c7b2ed35f1347c9e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/model_loader.py @@ -0,0 +1,242 @@ +import cv2 +import torch + +from midas.dpt_depth import DPTDepthModel +from midas.midas_net import MidasNet +from midas.midas_net_custom import MidasNet_small +from midas.transforms import Resize, NormalizeImage, PrepareForNet + +from torchvision.transforms import Compose + +default_models = { + "dpt_beit_large_512": "weights/dpt_beit_large_512.pt", + "dpt_beit_large_384": "weights/dpt_beit_large_384.pt", + "dpt_beit_base_384": "weights/dpt_beit_base_384.pt", + "dpt_swin2_large_384": "weights/dpt_swin2_large_384.pt", + "dpt_swin2_base_384": "weights/dpt_swin2_base_384.pt", + "dpt_swin2_tiny_256": "weights/dpt_swin2_tiny_256.pt", + "dpt_swin_large_384": "weights/dpt_swin_large_384.pt", + "dpt_next_vit_large_384": "weights/dpt_next_vit_large_384.pt", + "dpt_levit_224": "weights/dpt_levit_224.pt", + "dpt_large_384": "weights/dpt_large_384.pt", + "dpt_hybrid_384": "weights/dpt_hybrid_384.pt", + "midas_v21_384": "weights/midas_v21_384.pt", + "midas_v21_small_256": "weights/midas_v21_small_256.pt", + "openvino_midas_v21_small_256": "weights/openvino_midas_v21_small_256.xml", +} + + +def load_model(device, model_path, model_type="dpt_large_384", optimize=True, height=None, square=False): + """Load the specified network. + + Args: + device (device): the torch device used + model_path (str): path to saved model + model_type (str): the type of the model to be loaded + optimize (bool): optimize the model to half-integer on CUDA? + height (int): inference encoder image height + square (bool): resize to a square resolution? + + Returns: + The loaded network, the transform which prepares images as input to the network and the dimensions of the + network input + """ + if "openvino" in model_type: + from openvino.runtime import Core + + keep_aspect_ratio = not square + + if model_type == "dpt_beit_large_512": + model = DPTDepthModel( + path=model_path, + backbone="beitl16_512", + non_negative=True, + ) + net_w, net_h = 512, 512 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_beit_large_384": + model = DPTDepthModel( + path=model_path, + backbone="beitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_beit_base_384": + model = DPTDepthModel( + path=model_path, + backbone="beitb16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_large_384": + model = DPTDepthModel( + path=model_path, + backbone="swin2l24_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_base_384": + model = DPTDepthModel( + path=model_path, + backbone="swin2b24_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_tiny_256": + model = DPTDepthModel( + path=model_path, + backbone="swin2t16_256", + non_negative=True, + ) + net_w, net_h = 256, 256 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin_large_384": + model = DPTDepthModel( + path=model_path, + backbone="swinl12_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_next_vit_large_384": + model = DPTDepthModel( + path=model_path, + backbone="next_vit_large_6m", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + # We change the notation from dpt_levit_224 (MiDaS notation) to levit_384 (timm notation) here, where the 224 refers + # to the resolution 224x224 used by LeViT and 384 is the first entry of the embed_dim, see _cfg and model_cfgs of + # https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/levit.py + # (commit id: 927f031293a30afb940fff0bee34b85d9c059b0e) + elif model_type == "dpt_levit_224": + model = DPTDepthModel( + path=model_path, + backbone="levit_384", + non_negative=True, + head_features_1=64, + head_features_2=8, + ) + net_w, net_h = 224, 224 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_large_384": + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid_384": + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21_384": + model = MidasNet(model_path, non_negative=True) + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "midas_v21_small_256": + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, + non_negative=True, blocks={'expand': True}) + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "openvino_midas_v21_small_256": + ie = Core() + uncompiled_model = ie.read_model(model=model_path) + model = ie.compile_model(uncompiled_model, "CPU") + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + if not "openvino" in model_type: + print("Model loaded, number of parameters = {:.0f}M".format(sum(p.numel() for p in model.parameters()) / 1e6)) + else: + print("Model loaded, optimized with OpenVINO") + + if "openvino" in model_type: + keep_aspect_ratio = False + + if height is not None: + net_w, net_h = height, height + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=keep_aspect_ratio, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + if not "openvino" in model_type: + model.eval() + + if optimize and (device == torch.device("cuda")): + if not "openvino" in model_type: + model = model.to(memory_format=torch.channels_last) + model = model.half() + else: + print("Error: OpenVINO models are already optimized. No optimization to half-float possible.") + exit() + + if not "openvino" in model_type: + model.to(device) + + return model, transform, net_w, net_h diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6606ec028d1c629986e7019fe3564f5b4bfe425d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Alexey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1d43c2606767798ee46b34292e0483197424ec23 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md @@ -0,0 +1,131 @@ +# MiDaS for ROS1 by using LibTorch in C++ + +### Requirements + +- Ubuntu 17.10 / 18.04 / 20.04, Debian Stretch +- ROS Melodic for Ubuntu (17.10 / 18.04) / Debian Stretch, ROS Noetic for Ubuntu 20.04 +- C++11 +- LibTorch >= 1.6 + +## Quick Start with a MiDaS Example + +MiDaS is a neural network to compute depth from a single image. + +* input from `image_topic`: `sensor_msgs/Image` - `RGB8` image with any shape +* output to `midas_topic`: `sensor_msgs/Image` - `TYPE_32FC1` inverse relative depth maps in range [0 - 255] with original size and channels=1 + +### Install Dependecies + +* install ROS Melodic for Ubuntu 17.10 / 18.04: +```bash +wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_melodic_ubuntu_17_18.sh +./install_ros_melodic_ubuntu_17_18.sh +``` + +or Noetic for Ubuntu 20.04: + +```bash +wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_noetic_ubuntu_20.sh +./install_ros_noetic_ubuntu_20.sh +``` + + +* install LibTorch 1.7 with CUDA 11.0: + +On **Jetson (ARM)**: +```bash +wget https://nvidia.box.com/shared/static/wa34qwrwtk9njtyarwt5nvo6imenfy26.whl -O torch-1.7.0-cp36-cp36m-linux_aarch64.whl +sudo apt-get install python3-pip libopenblas-base libopenmpi-dev +pip3 install Cython +pip3 install numpy torch-1.7.0-cp36-cp36m-linux_aarch64.whl +``` +Or compile LibTorch from source: https://github.com/pytorch/pytorch#from-source + +On **Linux (x86_64)**: +```bash +cd ~/ +wget https://download.pytorch.org/libtorch/cu110/libtorch-cxx11-abi-shared-with-deps-1.7.0%2Bcu110.zip +unzip libtorch-cxx11-abi-shared-with-deps-1.7.0+cu110.zip +``` + +* create symlink for OpenCV: + +```bash +sudo ln -s /usr/include/opencv4 /usr/include/opencv +``` + +* download and install MiDaS: + +```bash +source ~/.bashrc +cd ~/ +mkdir catkin_ws +cd catkin_ws +git clone https://github.com/isl-org/MiDaS +mkdir src +cp -r MiDaS/ros/* src + +chmod +x src/additions/*.sh +chmod +x src/*.sh +chmod +x src/midas_cpp/scripts/*.py +cp src/additions/do_catkin_make.sh ./do_catkin_make.sh +./do_catkin_make.sh +./src/additions/downloads.sh +``` + +### Usage + +* run only `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh` + +#### Test + +* Test - capture video and show result in the window: + * place any `test.mp4` video file to the directory `~/catkin_ws/src/` + * run `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh` + * run test nodes in another terminal: `cd ~/catkin_ws/src && ./run_talker_listener_test.sh` and wait 30 seconds + + (to use Python 2, run command `sed -i 's/python3/python2/' ~/catkin_ws/src/midas_cpp/scripts/*.py` ) + +## Mobile version of MiDaS - Monocular Depth Estimation + +### Accuracy + +* MiDaS v2 small - ResNet50 default-decoder 384x384 +* MiDaS v2.1 small - EfficientNet-Lite3 small-decoder 256x256 + +**Zero-shot error** (the lower - the better): + +| Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 | +|---|---|---|---|---|---|---| +| MiDaS v2 small 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 | +| MiDaS v2.1 small 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** | +| Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** | + +None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning. + +### Inference speed (FPS) on nVidia GPU + +Inference speed excluding pre and post processing, batch=1, **Frames Per Second** (the higher - the better): + +| Model | Jetson Nano, FPS | RTX 2080Ti, FPS | +|---|---|---| +| MiDaS v2 small 384x384 | 1.6 | 117 | +| MiDaS v2.1 small 256x256 | 8.1 | 232 | +| SpeedUp, X times | **5x** | **2x** | + +### Citation + +This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): + +>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + +Please cite our paper if you use this code or any of the models: +``` +@article{Ranftl2020, + author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, + title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, + year = {2020}, +} +``` diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh new file mode 100644 index 0000000000000000000000000000000000000000..0d416fc00282aab146326bbba12a9274e1ba29b8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh @@ -0,0 +1,5 @@ +mkdir src +catkin_make +source devel/setup.bash +echo $ROS_PACKAGE_PATH +chmod +x ./devel/setup.bash diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh new file mode 100644 index 0000000000000000000000000000000000000000..9c967d4e2dc7997da26399a063b5a54ecc314eb1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh @@ -0,0 +1,5 @@ +mkdir ~/.ros +wget https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-traced.pt +cp ./model-small-traced.pt ~/.ros/model-small-traced.pt + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh new file mode 100644 index 0000000000000000000000000000000000000000..b868112631e9d9bc7bccb601407dfc857b8a99d5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh @@ -0,0 +1,34 @@ +#@title { display-mode: "code" } + +#from http://wiki.ros.org/indigo/Installation/Ubuntu + +#1.2 Setup sources.list +sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' + +# 1.3 Setup keys +sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 +sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116 + +curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - + +# 1.4 Installation +sudo apt-get update +sudo apt-get upgrade + +# Desktop-Full Install: +sudo apt-get install ros-melodic-desktop-full + +printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc + +# 1.5 Initialize rosdep +sudo rosdep init +rosdep update + + +# 1.7 Getting rosinstall (python) +sudo apt-get install python-rosinstall +sudo apt-get install python-catkin-tools +sudo apt-get install python-rospy +sudo apt-get install python-rosdep +sudo apt-get install python-roscd +sudo apt-get install python-pip \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh new file mode 100644 index 0000000000000000000000000000000000000000..d73ea1a3d92359819167d735a92d2a650b9bc245 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh @@ -0,0 +1,33 @@ +#@title { display-mode: "code" } + +#from http://wiki.ros.org/indigo/Installation/Ubuntu + +#1.2 Setup sources.list +sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' + +# 1.3 Setup keys +sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 + +curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - + +# 1.4 Installation +sudo apt-get update +sudo apt-get upgrade + +# Desktop-Full Install: +sudo apt-get install ros-noetic-desktop-full + +printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc + +# 1.5 Initialize rosdep +sudo rosdep init +rosdep update + + +# 1.7 Getting rosinstall (python) +sudo apt-get install python3-rosinstall +sudo apt-get install python3-catkin-tools +sudo apt-get install python3-rospy +sudo apt-get install python3-rosdep +sudo apt-get install python3-roscd +sudo apt-get install python3-pip \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..d0ef6073a9c9ce40744e1c81d557c1c68255b95e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh @@ -0,0 +1,16 @@ +cd ~/catkin_ws/src +catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport +cd ~/catkin_ws +catkin_make + +chmod +x ~/catkin_ws/devel/setup.bash +printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc +source ~/catkin_ws/devel/setup.bash + + +sudo rosdep init +rosdep update +#rospack depends1 midas_cpp +roscd midas_cpp +#cat package.xml +#rospack depends midas_cpp \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..5a0d1583fffdc49216c625dfd07af2ae3b01a7a0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh @@ -0,0 +1,2 @@ +source ~/catkin_ws/devel/setup.bash +roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true" \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..885341691d217f9c4c8fcb1e4ff568d87788c7b8 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt @@ -0,0 +1,189 @@ +cmake_minimum_required(VERSION 3.0.2) +project(midas_cpp) + +## Compile as C++11, supported in ROS Kinetic and newer +# add_compile_options(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs +) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + +list(APPEND CMAKE_PREFIX_PATH "~/libtorch") +list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python3.6/dist-packages/torch/lib") +list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python2.7/dist-packages/torch/lib") + +if(NOT EXISTS "~/libtorch") + if (EXISTS "/usr/local/lib/python3.6/dist-packages/torch") + include_directories(/usr/local/include) + include_directories(/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include) + include_directories(/usr/local/lib/python3.6/dist-packages/torch/include) + + link_directories(/usr/local/lib) + link_directories(/usr/local/lib/python3.6/dist-packages/torch/lib) + + set(CMAKE_PREFIX_PATH /usr/local/lib/python3.6/dist-packages/torch) + set(Boost_USE_MULTITHREADED ON) + set(Torch_DIR /usr/local/lib/python3.6/dist-packages/torch) + + elseif (EXISTS "/usr/local/lib/python2.7/dist-packages/torch") + + include_directories(/usr/local/include) + include_directories(/usr/local/lib/python2.7/dist-packages/torch/include/torch/csrc/api/include) + include_directories(/usr/local/lib/python2.7/dist-packages/torch/include) + + link_directories(/usr/local/lib) + link_directories(/usr/local/lib/python2.7/dist-packages/torch/lib) + + set(CMAKE_PREFIX_PATH /usr/local/lib/python2.7/dist-packages/torch) + set(Boost_USE_MULTITHREADED ON) + set(Torch_DIR /usr/local/lib/python2.7/dist-packages/torch) + endif() +endif() + + + +find_package(Torch REQUIRED) +find_package(OpenCV REQUIRED) +include_directories( ${OpenCV_INCLUDE_DIRS} ) + +add_executable(midas_cpp src/main.cpp) +target_link_libraries(midas_cpp "${TORCH_LIBRARIES}" "${OpenCV_LIBS} ${catkin_LIBRARIES}") +set_property(TARGET midas_cpp PROPERTY CXX_STANDARD 14) + + + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if your package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES midas_cpp +# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +include_directories( +# include + ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/midas_cpp.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/midas_cpp_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +# catkin_install_python(PROGRAMS +# scripts/my_python_script +# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark executables for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html +# install(TARGETS ${PROJECT_NAME}_node +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark libraries for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html +# install(TARGETS ${PROJECT_NAME} +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_midas_cpp.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) + +install(TARGETS ${PROJECT_NAME} + ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +add_custom_command( + TARGET midas_cpp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_BINARY_DIR}/midas_cpp + ${CMAKE_SOURCE_DIR}/midas_cpp +) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch new file mode 100644 index 0000000000000000000000000000000000000000..88e86f42f668e76ad4976ec6794a8cb0f20cac65 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch new file mode 100644 index 0000000000000000000000000000000000000000..8817a4f4933c56986fe0edc0886b2fded3d3406d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml new file mode 100644 index 0000000000000000000000000000000000000000..9cac90eba75409bd170f73531c54c83c52ff047a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml @@ -0,0 +1,77 @@ + + + midas_cpp + 0.1.0 + The midas_cpp package + + Alexey Bochkovskiy + MIT + https://github.com/isl-org/MiDaS/tree/master/ros + + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + + + + + + + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py new file mode 100644 index 0000000000000000000000000000000000000000..6927ea7a83ac9309e5f883ee974a5dcfa8a2aa3b --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +from __future__ import print_function + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +import numpy as np +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + +class video_show: + + def __init__(self): + self.show_output = rospy.get_param('~show_output', True) + self.save_output = rospy.get_param('~save_output', False) + self.output_video_file = rospy.get_param('~output_video_file','result.mp4') + # rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") + + self.bridge = CvBridge() + self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback) + + def callback(self, data): + try: + cv_image = self.bridge.imgmsg_to_cv2(data) + except CvBridgeError as e: + print(e) + return + + if cv_image.size == 0: + return + + rospy.loginfo("Listener: Received new frame") + cv_image = cv_image.astype("uint8") + + if self.show_output==True: + cv2.imshow("video_show", cv_image) + cv2.waitKey(10) + + if self.save_output==True: + if self.video_writer_init==False: + fourcc = cv2.VideoWriter_fourcc(*'XVID') + self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) + + self.out.write(cv_image) + + + +def main(args): + rospy.init_node('listener', anonymous=True) + ic = video_show() + try: + rospy.spin() + except KeyboardInterrupt: + print("Shutting down") + cv2.destroyAllWindows() + +if __name__ == '__main__': + main(sys.argv) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py new file mode 100644 index 0000000000000000000000000000000000000000..20e235f6958d644b89383752ab18e9e2275f55e5 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +from __future__ import print_function + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +import numpy as np +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + +class video_show: + + def __init__(self): + self.show_output = rospy.get_param('~show_output', True) + self.save_output = rospy.get_param('~save_output', False) + self.output_video_file = rospy.get_param('~output_video_file','result.mp4') + # rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") + + self.bridge = CvBridge() + self.image_sub = rospy.Subscriber("image_topic", Image, self.callback) + + def callback(self, data): + try: + cv_image = self.bridge.imgmsg_to_cv2(data) + except CvBridgeError as e: + print(e) + return + + if cv_image.size == 0: + return + + rospy.loginfo("Listener_original: Received new frame") + cv_image = cv_image.astype("uint8") + + if self.show_output==True: + cv2.imshow("video_show_orig", cv_image) + cv2.waitKey(10) + + if self.save_output==True: + if self.video_writer_init==False: + fourcc = cv2.VideoWriter_fourcc(*'XVID') + self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) + + self.out.write(cv_image) + + + +def main(args): + rospy.init_node('listener_original', anonymous=True) + ic = video_show() + try: + rospy.spin() + except KeyboardInterrupt: + print("Shutting down") + cv2.destroyAllWindows() + +if __name__ == '__main__': + main(sys.argv) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py new file mode 100644 index 0000000000000000000000000000000000000000..8219cc8632484a2efd02984347c615efad6b78b2 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + + +def talker(): + rospy.init_node('talker', anonymous=True) + + use_camera = rospy.get_param('~use_camera', False) + input_video_file = rospy.get_param('~input_video_file','test.mp4') + # rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}") + + # rospy.loginfo("Talker: Trying to open a video stream") + if use_camera == True: + cap = cv2.VideoCapture(0) + else: + cap = cv2.VideoCapture(input_video_file) + + pub = rospy.Publisher('image_topic', Image, queue_size=1) + rate = rospy.Rate(30) # 30hz + bridge = CvBridge() + + while not rospy.is_shutdown(): + ret, cv_image = cap.read() + if ret==False: + print("Talker: Video is over") + rospy.loginfo("Video is over") + return + + try: + image = bridge.cv2_to_imgmsg(cv_image, "bgr8") + except CvBridgeError as e: + rospy.logerr("Talker: cv2image conversion failed: ", e) + print(e) + continue + + rospy.loginfo("Talker: Publishing frame") + pub.publish(image) + rate.sleep() + +if __name__ == '__main__': + try: + talker() + except rospy.ROSInterruptException: + pass diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4fc72c6955f66af71c9cb1fc7a7b1f643129685 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp @@ -0,0 +1,285 @@ +#include +#include +#include +#include + +#include + +#include // One-stop header. + +#include +#include +#include +#include + +#include +#include + +// includes for OpenCV >= 3.x +#ifndef CV_VERSION_EPOCH +#include +#include +#include +#endif + +// OpenCV includes for OpenCV 2.x +#ifdef CV_VERSION_EPOCH +#include +#include +#include +#include +#endif + +static const std::string OPENCV_WINDOW = "Image window"; + +class Midas +{ + ros::NodeHandle nh_; + image_transport::ImageTransport it_; + image_transport::Subscriber image_sub_; + image_transport::Publisher image_pub_; + + torch::jit::script::Module module; + torch::Device device; + + auto ToTensor(cv::Mat img, bool show_output = false, bool unsqueeze = false, int unsqueeze_dim = 0) + { + //std::cout << "image shape: " << img.size() << std::endl; + at::Tensor tensor_image = torch::from_blob(img.data, { img.rows, img.cols, 3 }, at::kByte); + + if (unsqueeze) + { + tensor_image.unsqueeze_(unsqueeze_dim); + //std::cout << "tensors new shape: " << tensor_image.sizes() << std::endl; + } + + if (show_output) + { + std::cout << tensor_image.slice(2, 0, 1) << std::endl; + } + //std::cout << "tenor shape: " << tensor_image.sizes() << std::endl; + return tensor_image; + } + + auto ToInput(at::Tensor tensor_image) + { + // Create a vector of inputs. + return std::vector{tensor_image}; + } + + auto ToCvImage(at::Tensor tensor, int cv_type = CV_8UC3) + { + int width = tensor.sizes()[0]; + int height = tensor.sizes()[1]; + try + { + cv::Mat output_mat; + if (cv_type == CV_8UC4 || cv_type == CV_8UC3 || cv_type == CV_8UC2 || cv_type == CV_8UC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + else if (cv_type == CV_32FC4 || cv_type == CV_32FC3 || cv_type == CV_32FC2 || cv_type == CV_32FC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + else if (cv_type == CV_64FC4 || cv_type == CV_64FC3 || cv_type == CV_64FC2 || cv_type == CV_64FC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + + //show_image(output_mat, "converted image from tensor"); + return output_mat.clone(); + } + catch (const c10::Error& e) + { + std::cout << "an error has occured : " << e.msg() << std::endl; + } + return cv::Mat(height, width, CV_8UC3); + } + + std::string input_topic, output_topic, model_name; + bool out_orig_size; + int net_width, net_height; + torch::NoGradGuard guard; + at::Tensor mean, std; + at::Tensor output, tensor; + +public: + Midas() + : nh_(), it_(nh_), device(torch::Device(torch::kCPU)) + { + ros::param::param("~input_topic", input_topic, "image_topic"); + ros::param::param("~output_topic", output_topic, "midas_topic"); + ros::param::param("~model_name", model_name, "model-small-traced.pt"); + ros::param::param("~out_orig_size", out_orig_size, true); + ros::param::param("~net_width", net_width, 256); + ros::param::param("~net_height", net_height, 256); + + std::cout << ", input_topic = " << input_topic << + ", output_topic = " << output_topic << + ", model_name = " << model_name << + ", out_orig_size = " << out_orig_size << + ", net_width = " << net_width << + ", net_height = " << net_height << + std::endl; + + // Subscrive to input video feed and publish output video feed + image_sub_ = it_.subscribe(input_topic, 1, &Midas::imageCb, this); + image_pub_ = it_.advertise(output_topic, 1); + + std::cout << "Try to load torchscript model \n"; + + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + module = torch::jit::load(model_name); + } + catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + exit(0); + } + + std::cout << "ok\n"; + + try { + module.eval(); + torch::jit::getProfilingMode() = false; + torch::jit::setGraphExecutorOptimize(true); + + mean = torch::tensor({ 0.485, 0.456, 0.406 }); + std = torch::tensor({ 0.229, 0.224, 0.225 }); + + if (torch::hasCUDA()) { + std::cout << "cuda is available" << std::endl; + at::globalContext().setBenchmarkCuDNN(true); + device = torch::Device(torch::kCUDA); + module.to(device); + mean = mean.to(device); + std = std.to(device); + } + } + catch (const c10::Error& e) + { + std::cerr << " module initialization: " << e.msg() << std::endl; + } + } + + ~Midas() + { + } + + void imageCb(const sensor_msgs::ImageConstPtr& msg) + { + cv_bridge::CvImagePtr cv_ptr; + try + { + // sensor_msgs::Image to cv::Mat + cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8); + } + catch (cv_bridge::Exception& e) + { + ROS_ERROR("cv_bridge exception: %s", e.what()); + return; + } + + // pre-processing + auto tensor_cpu = ToTensor(cv_ptr->image); // OpenCV-image -> Libtorch-tensor + + try { + tensor = tensor_cpu.to(device); // move to device (CPU or GPU) + + tensor = tensor.toType(c10::kFloat); + tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW + tensor = tensor.unsqueeze(0); + tensor = at::upsample_bilinear2d(tensor, { net_height, net_width }, true); // resize + tensor = tensor.squeeze(0); + tensor = tensor.permute({ 1, 2, 0 }); // CHW -> HWC + + tensor = tensor.div(255).sub(mean).div(std); // normalization + tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW + tensor.unsqueeze_(0); // CHW -> NCHW + } + catch (const c10::Error& e) + { + std::cerr << " pre-processing exception: " << e.msg() << std::endl; + return; + } + + auto input_to_net = ToInput(tensor); // input to the network + + // inference + output; + try { + output = module.forward(input_to_net).toTensor(); // run inference + } + catch (const c10::Error& e) + { + std::cerr << " module.forward() exception: " << e.msg() << std::endl; + return; + } + + output = output.detach().to(torch::kF32); + + // move to CPU temporary + at::Tensor output_tmp = output; + output_tmp = output_tmp.to(torch::kCPU); + + // normalization + float min_val = std::numeric_limits::max(); + float max_val = std::numeric_limits::min(); + + for (int i = 0; i < net_width * net_height; ++i) { + float val = output_tmp.data_ptr()[i]; + if (min_val > val) min_val = val; + if (max_val < val) max_val = val; + } + float range_val = max_val - min_val; + + output = output.sub(min_val).div(range_val).mul(255.0F).clamp(0, 255).to(torch::kF32); // .to(torch::kU8); + + // resize to the original size if required + if (out_orig_size) { + try { + output = at::upsample_bilinear2d(output.unsqueeze(0), { cv_ptr->image.size().height, cv_ptr->image.size().width }, true); + output = output.squeeze(0); + } + catch (const c10::Error& e) + { + std::cout << " upsample_bilinear2d() exception: " << e.msg() << std::endl; + return; + } + } + output = output.permute({ 1, 2, 0 }).to(torch::kCPU); + + int cv_type = CV_32FC1; // CV_8UC1; + auto cv_img = ToCvImage(output, cv_type); + + sensor_msgs::Image img_msg; + + try { + // cv::Mat -> sensor_msgs::Image + std_msgs::Header header; // empty header + header.seq = 0; // user defined counter + header.stamp = ros::Time::now();// time + //cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::MONO8, cv_img); + cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::TYPE_32FC1, cv_img); + + img_bridge.toImageMsg(img_msg); // cv_bridge -> sensor_msgs::Image + } + catch (cv_bridge::Exception& e) + { + ROS_ERROR("cv_bridge exception: %s", e.what()); + return; + } + + // Output modified video stream + image_pub_.publish(img_msg); + } +}; + +int main(int argc, char** argv) +{ + ros::init(argc, argv, "midas", ros::init_options::AnonymousName); + Midas ic; + ros::spin(); + return 0; +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..a997c4261072d0d627598fe06a723fcc7522d347 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh @@ -0,0 +1,16 @@ +# place any test.mp4 file near with this file + +# roscore +# rosnode kill -a + +source ~/catkin_ws/devel/setup.bash + +roscore & +P1=$! +rosrun midas_cpp talker.py & +P2=$! +rosrun midas_cpp listener_original.py & +P3=$! +rosrun midas_cpp listener.py & +P4=$! +wait $P1 $P2 $P3 $P4 \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py new file mode 100644 index 0000000000000000000000000000000000000000..5696ef0547af093713ea416d18edd77d11879d0a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py @@ -0,0 +1,277 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import torch +import utils +import cv2 +import argparse +import time + +import numpy as np + +from imutils.video import VideoStream +from midas.model_loader import default_models, load_model + +first_execution = True +def process(device, model, model_type, image, input_size, target_size, optimize, use_camera): + """ + Run the inference and interpolate. + + Args: + device (torch.device): the torch device used + model: the model used for inference + model_type: the type of the model + image: the image fed into the neural network + input_size: the size (width, height) of the neural network input (for OpenVINO) + target_size: the size (width, height) the neural network output is interpolated to + optimize: optimize the model to half-floats on CUDA? + use_camera: is the camera used? + + Returns: + the prediction + """ + global first_execution + + if "openvino" in model_type: + if first_execution or not use_camera: + print(f" Input resized to {input_size[0]}x{input_size[1]} before entering the encoder") + first_execution = False + + sample = [np.reshape(image, (1, 3, *input_size))] + prediction = model(sample)[model.output(0)][0] + prediction = cv2.resize(prediction, dsize=target_size, + interpolation=cv2.INTER_CUBIC) + else: + sample = torch.from_numpy(image).to(device).unsqueeze(0) + + if optimize and device == torch.device("cuda"): + if first_execution: + print(" Optimization to half-floats activated. Use with caution, because models like Swin require\n" + " float precision to work properly and may yield non-finite depth values to some extent for\n" + " half-floats.") + sample = sample.to(memory_format=torch.channels_last) + sample = sample.half() + + if first_execution or not use_camera: + height, width = sample.shape[2:] + print(f" Input resized to {width}x{height} before entering the encoder") + first_execution = False + + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=target_size[::-1], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + return prediction + + +def create_side_by_side(image, depth, grayscale): + """ + Take an RGB image and depth map and place them side by side. This includes a proper normalization of the depth map + for better visibility. + + Args: + image: the RGB image + depth: the depth map + grayscale: use a grayscale colormap? + + Returns: + the image and depth map place side by side + """ + depth_min = depth.min() + depth_max = depth.max() + normalized_depth = 255 * (depth - depth_min) / (depth_max - depth_min) + normalized_depth *= 3 + + right_side = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3 + if not grayscale: + right_side = cv2.applyColorMap(np.uint8(right_side), cv2.COLORMAP_INFERNO) + + if image is None: + return right_side + else: + return np.concatenate((image, right_side), axis=1) + + +def run(input_path, output_path, model_path, model_type="dpt_beit_large_512", optimize=False, side=False, height=None, + square=False, grayscale=False): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + model_type (str): the model type + optimize (bool): optimize the model to half-floats on CUDA? + side (bool): RGB and depth side by side in output images? + height (int): inference encoder image height + square (bool): resize to a square resolution? + grayscale (bool): use a grayscale colormap? + """ + print("Initialize") + + # select device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Device: %s" % device) + + model, transform, net_w, net_h = load_model(device, model_path, model_type, optimize, height, square) + + # get input + if input_path is not None: + image_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(image_names) + else: + print("No input path specified. Grabbing images from camera.") + + # create output folder + if output_path is not None: + os.makedirs(output_path, exist_ok=True) + + print("Start processing") + + if input_path is not None: + if output_path is None: + print("Warning: No output path specified. Images will be processed but not shown or stored anywhere.") + for index, image_name in enumerate(image_names): + + print(" Processing {} ({}/{})".format(image_name, index + 1, num_images)) + + # input + original_image_rgb = utils.read_image(image_name) # in [0, 1] + image = transform({"image": original_image_rgb})["image"] + + # compute + with torch.no_grad(): + prediction = process(device, model, model_type, image, (net_w, net_h), original_image_rgb.shape[1::-1], + optimize, False) + + # output + if output_path is not None: + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(image_name))[0] + '-' + model_type + ) + if not side: + utils.write_depth(filename, prediction, grayscale, bits=2) + else: + original_image_bgr = np.flip(original_image_rgb, 2) + content = create_side_by_side(original_image_bgr*255, prediction, grayscale) + cv2.imwrite(filename + ".png", content) + utils.write_pfm(filename + ".pfm", prediction.astype(np.float32)) + + else: + with torch.no_grad(): + fps = 1 + video = VideoStream(0).start() + time_start = time.time() + frame_index = 0 + while True: + frame = video.read() + if frame is not None: + original_image_rgb = np.flip(frame, 2) # in [0, 255] (flip required to get RGB) + image = transform({"image": original_image_rgb/255})["image"] + + prediction = process(device, model, model_type, image, (net_w, net_h), + original_image_rgb.shape[1::-1], optimize, True) + + original_image_bgr = np.flip(original_image_rgb, 2) if side else None + content = create_side_by_side(original_image_bgr, prediction, grayscale) + cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', content/255) + + if output_path is not None: + filename = os.path.join(output_path, 'Camera' + '-' + model_type + '_' + str(frame_index)) + cv2.imwrite(filename + ".png", content) + + alpha = 0.1 + if time.time()-time_start > 0: + fps = (1 - alpha) * fps + alpha * 1 / (time.time()-time_start) # exponential moving average + time_start = time.time() + print(f"\rFPS: {round(fps,2)}", end="") + + if cv2.waitKey(1) == 27: # Escape key + break + + frame_index += 1 + print() + + print("Finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default=None, + help='Folder with input images (if no input path is specified, images are tried to be grabbed ' + 'from camera)' + ) + + parser.add_argument('-o', '--output_path', + default=None, + help='Folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default=None, + help='Path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='dpt_beit_large_512', + help='Model type: ' + 'dpt_beit_large_512, dpt_beit_large_384, dpt_beit_base_384, dpt_swin2_large_384, ' + 'dpt_swin2_base_384, dpt_swin2_tiny_256, dpt_swin_large_384, dpt_next_vit_large_384, ' + 'dpt_levit_224, dpt_large_384, dpt_hybrid_384, midas_v21_384, midas_v21_small_256 or ' + 'openvino_midas_v21_small_256' + ) + + parser.add_argument('-s', '--side', + action='store_true', + help='Output images contain RGB and depth images side by side' + ) + + parser.add_argument('--optimize', dest='optimize', action='store_true', help='Use half-float optimization') + parser.set_defaults(optimize=False) + + parser.add_argument('--height', + type=int, default=None, + help='Preferred height of images feed into the encoder during inference. Note that the ' + 'preferred height may differ from the actual height, because an alignment to multiples of ' + '32 takes place. Many models support only the height chosen during training, which is ' + 'used automatically if this parameter is not set.' + ) + parser.add_argument('--square', + action='store_true', + help='Option to resize images to a square resolution by changing their widths when images are ' + 'fed into the encoder during inference. If this parameter is not set, the aspect ratio of ' + 'images is tried to be preserved if supported by the model.' + ) + parser.add_argument('--grayscale', + action='store_true', + help='Use a grayscale colormap instead of the inferno one. Although the inferno colormap, ' + 'which is used by default, is better for visibility, it does not allow storing 16-bit ' + 'depth values in PNGs but only 8-bit ones due to the precision limitation of this ' + 'colormap.' + ) + + args = parser.parse_args() + + + if args.model_weights is None: + args.model_weights = default_models[args.model_type] + + # set torch options + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type, args.optimize, args.side, args.height, + args.square, args.grayscale) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b5fe0e63668eab45a55b140826cb3762862b17c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md @@ -0,0 +1,147 @@ +## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer + +### TensorFlow inference using `.pb` and `.onnx` models + +1. [Run inference on TensorFlow-model by using TensorFlow](#run-inference-on-tensorflow-model-by-using-tensorFlow) + +2. [Run inference on ONNX-model by using TensorFlow](#run-inference-on-onnx-model-by-using-tensorflow) + +3. [Make ONNX model from downloaded Pytorch model file](#make-onnx-model-from-downloaded-pytorch-model-file) + + +### Run inference on TensorFlow-model by using TensorFlow + +1) Download the model weights [model-f6b98070.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pb) +and [model-small.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.pb) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install TensorFlow +pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0 +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_pb.py + ``` + + Or run the small model: + + ```shell + python tf/run_pb.py --model_weights model-small.pb --model_type small + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + +### Run inference on ONNX-model by using ONNX-Runtime + +1) Download the model weights [model-f6b98070.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.onnx) +and [model-small.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.onnx) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install ONNX +pip install onnx==1.7.0 + +# install ONNX Runtime +pip install onnxruntime==1.5.2 +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_onnx.py + ``` + + Or run the small model: + + ```shell + python tf/run_onnx.py --model_weights model-small.onnx --model_type small + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + + +### Make ONNX model from downloaded Pytorch model file + +1) Download the model weights [model-f6b98070.pt](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pt) and place the +file in the root folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install PyTorch TorchVision +pip install -I torch==1.7.0 torchvision==0.8.0 + +# install TensorFlow +pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0 + +# install ONNX +pip install onnx==1.7.0 + +# install ONNX-TensorFlow +git clone https://github.com/onnx/onnx-tensorflow.git +cd onnx-tensorflow +git checkout 095b51b88e35c4001d70f15f80f31014b592b81e +pip install -e . +``` + +#### Usage + +1) Run the converter: + + ```shell + python tf/make_onnx_model.py + ``` + +2) The resulting `model-f6b98070.onnx` file is written to the `/tf/` folder. + + +### Requirements + + The code was tested with Python 3.6.9, PyTorch 1.5.1, TensorFlow 2.2.0, TensorFlow-addons 0.8.3, ONNX 1.7.0, ONNX-TensorFlow (GitHub-master-17.07.2020) and OpenCV 4.3.0. + +### Citation + +Please cite our paper if you use this code or any of the models: +``` +@article{Ranftl2019, + author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, + title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, + year = {2020}, +} +``` + +### License + +MIT License + + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py new file mode 100644 index 0000000000000000000000000000000000000000..d14b0e4e1d2ea70fa315fd7ca7dfd72440a19376 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py @@ -0,0 +1,112 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import ntpath +import glob +import torch +import utils +import cv2 +import numpy as np +from torchvision.transforms import Compose, Normalize +from torchvision import transforms + +from shutil import copyfile +import fileinput +import sys +sys.path.append(os.getcwd() + '/..') + +def modify_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename, modify_filename+'.bak') + + with open(modify_filename, 'r') as file : + filedata = file.read() + + filedata = filedata.replace('align_corners=True', 'align_corners=False') + filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models') + filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()') + + with open(modify_filename, 'w') as file: + file.write(filedata) + +def restore_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename+'.bak', modify_filename) + +modify_file() + +from midas.midas_net import MidasNet +from midas.transforms import Resize, NormalizeImage, PrepareForNet + +restore_file() + + +class MidasNet_preprocessing(MidasNet): + """Network for monocular depth estimation. + """ + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + mean = torch.tensor([0.485, 0.456, 0.406]) + std = torch.tensor([0.229, 0.224, 0.225]) + x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) + + return MidasNet.forward(self, x) + + +def run(model_path): + """Run MonoDepthNN to compute depth maps. + + Args: + model_path (str): path to saved model + """ + print("initialize") + + # select device + + # load network + #model = MidasNet(model_path, non_negative=True) + model = MidasNet_preprocessing(model_path, non_negative=True) + + model.eval() + + print("start processing") + + # input + img_input = np.zeros((3, 384, 384), np.float32) + + # compute + with torch.no_grad(): + sample = torch.from_numpy(img_input).unsqueeze(0) + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img_input.shape[:2], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9) + + print("finished") + + +if __name__ == "__main__": + # set paths + # MODEL_PATH = "model.pt" + MODEL_PATH = "../model-f6b98070.pt" + + # compute depth maps + run(MODEL_PATH) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..7107b99969a127f951814f743d5c562a436b2430 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py @@ -0,0 +1,119 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 +import sys +import numpy as np +import argparse + +import onnx +import onnxruntime as rt + +from transforms import Resize, NormalizeImage, PrepareForNet + + +def run(input_path, output_path, model_path, model_type="large"): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # select device + device = "CUDA:0" + #device = "CPU" + print("device: %s" % device) + + # network resolution + if model_type == "large": + net_w, net_h = 384, 384 + elif model_type == "small": + net_w, net_h = 256, 256 + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + # load network + print("loading model...") + model = rt.InferenceSession(model_path) + input_name = model.get_inputs()[0].name + output_name = model.get_outputs()[0].name + + resize_image = Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0] + prediction = np.array(output).reshape(net_h, net_w) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + print("finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default='input', + help='folder with input images' + ) + + parser.add_argument('-o', '--output_path', + default='output', + help='folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default='model-f6b98070.onnx', + help='path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='large', + help='model type: large or small' + ) + + args = parser.parse_args() + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py new file mode 100644 index 0000000000000000000000000000000000000000..e46254f7b37f72e7d87672d70fd4b2f393ad7658 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py @@ -0,0 +1,135 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 +import argparse + +import tensorflow as tf + +from transforms import Resize, NormalizeImage, PrepareForNet + +def run(input_path, output_path, model_path, model_type="large"): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # the runtime initialization will not allocate all memory on the device to avoid out of GPU memory + gpus = tf.config.experimental.list_physical_devices('GPU') + if gpus: + try: + for gpu in gpus: + #tf.config.experimental.set_memory_growth(gpu, True) + tf.config.experimental.set_virtual_device_configuration(gpu, + [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)]) + except RuntimeError as e: + print(e) + + # network resolution + if model_type == "large": + net_w, net_h = 384, 384 + elif model_type == "small": + net_w, net_h = 256, 256 + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + # load network + graph_def = tf.compat.v1.GraphDef() + with tf.io.gfile.GFile(model_path, 'rb') as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + + model_operations = tf.compat.v1.get_default_graph().get_operations() + input_node = '0:0' + output_layer = model_operations[len(model_operations) - 1].name + ':0' + print("Last layer name: ", output_layer) + + resize_image = Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + with tf.compat.v1.Session() as sess: + try: + # load images + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + prob_tensor = sess.graph.get_tensor_by_name(output_layer) + prediction, = sess.run(prob_tensor, {input_node: [img_input] }) + prediction = prediction.reshape(net_h, net_w) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + except KeyError: + print ("Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".") + exit(-1) + + print("finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default='input', + help='folder with input images' + ) + + parser.add_argument('-o', '--output_path', + default='output', + help='folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default='model-f6b98070.pb', + help='path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='large', + help='model type: large or small' + ) + + args = parser.parse_args() + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ff9a54bd55f5e31a90fad21242efbfda5a6cc1a7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py @@ -0,0 +1,82 @@ +import numpy as np +import sys +import cv2 + + +def write_pfm(path, image, scale=1): + """Write pfm file. + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + +def read_image(path): + """Read image and output RGB image (0-1). + Args: + path (str): path to file + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = 0 + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7a3976fd97dfe6a9dc7d4fa144be8fcb0b18b2db --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py @@ -0,0 +1,199 @@ +"""Utils for monoDepth. +""" +import sys +import re +import numpy as np +import cv2 +import torch + + +def read_pfm(path): + """Read pfm file. + + Args: + path (str): path to file + + Returns: + tuple: (data, scale) + """ + with open(path, "rb") as file: + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + if header.decode("ascii") == "PF": + color = True + elif header.decode("ascii") == "Pf": + color = False + else: + raise Exception("Not a PFM file: " + path) + + dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) + if dim_match: + width, height = list(map(int, dim_match.groups())) + else: + raise Exception("Malformed PFM header.") + + scale = float(file.readline().decode("ascii").rstrip()) + if scale < 0: + # little-endian + endian = "<" + scale = -scale + else: + # big-endian + endian = ">" + + data = np.fromfile(file, endian + "f") + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + + return data, scale + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + + +def resize_image(img): + """Resize image and make it fit for network. + + Args: + img (array): image + + Returns: + tensor: data ready for network + """ + height_orig = img.shape[0] + width_orig = img.shape[1] + + if width_orig > height_orig: + scale = width_orig / 384 + else: + scale = height_orig / 384 + + height = (np.ceil(height_orig / scale / 32) * 32).astype(int) + width = (np.ceil(width_orig / scale / 32) * 32).astype(int) + + img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) + + img_resized = ( + torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float() + ) + img_resized = img_resized.unsqueeze(0) + + return img_resized + + +def resize_depth(depth, width, height): + """Resize depth map and bring to CPU (numpy). + + Args: + depth (tensor): depth + width (int): image width + height (int): image height + + Returns: + array: processed depth + """ + depth = torch.squeeze(depth[0, :, :, :]).to("cpu") + + depth_resized = cv2.resize( + depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC + ) + + return depth_resized + +def write_depth(path, depth, grayscale, bits=1): + """Write depth map to png file. + + Args: + path (str): filepath without extension + depth (array): depth + grayscale (bool): use a grayscale colormap? + """ + if not grayscale: + bits = 1 + + if not np.isfinite(depth).all(): + depth=np.nan_to_num(depth, nan=0.0, posinf=0.0, neginf=0.0) + print("WARNING: Non-finite depth values present") + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.dtype) + + if not grayscale: + out = cv2.applyColorMap(np.uint8(out), cv2.COLORMAP_INFERNO) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/builder.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..0818311b642561712a03a66655c638ce09a04cca --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/builder.py @@ -0,0 +1,51 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from importlib import import_module +from .depth_model import DepthModel + +def build_model(config) -> DepthModel: + """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. + This function should be used to construct models for training and evaluation. + + Args: + config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. + + Returns: + torch.nn.Module: Model corresponding to name and version as specified in config + """ + module_name = f"zoedepth.models.{config.model}" + try: + module = import_module(module_name) + except ModuleNotFoundError as e: + # print the original error message + print(e) + raise ValueError( + f"Model {config.model} not found. Refer above error for details.") from e + try: + get_version = getattr(module, "get_version") + except AttributeError as e: + raise ValueError( + f"Model {config.model} has no get_version function.") from e + return get_version(config.version_name).build_from_config(config) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/depth_model.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/depth_model.py new file mode 100644 index 0000000000000000000000000000000000000000..fc421c108ea3928c9add62b4c190500d9bd4eda1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/depth_model.py @@ -0,0 +1,152 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision import transforms +import PIL.Image +from PIL import Image +from typing import Union + + +class DepthModel(nn.Module): + def __init__(self): + super().__init__() + self.device = 'cpu' + + def to(self, device) -> nn.Module: + self.device = device + return super().to(device) + + def forward(self, x, *args, **kwargs): + raise NotImplementedError + + def _infer(self, x: torch.Tensor): + """ + Inference interface for the model + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + return self(x)['metric_depth'] + + def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor: + """ + Inference interface for the model with padding augmentation + Padding augmentation fixes the boundary artifacts in the output depth map. + Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image. + This augmentation pads the input image and crops the prediction back to the original size / view. + + Note: This augmentation is not required for the models trained with 'avoid_boundary'=True. + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to pad the input or not. Defaults to True. + fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3. + fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3. + upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'. + padding_mode (str, optional): padding mode. Defaults to "reflect". + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + # assert x is nchw and c = 3 + assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim()) + assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1]) + + if pad_input: + assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0" + pad_h = int(np.sqrt(x.shape[2]/2) * fh) + pad_w = int(np.sqrt(x.shape[3]/2) * fw) + padding = [pad_w, pad_w] + if pad_h > 0: + padding += [pad_h, pad_h] + + x = F.pad(x, padding, mode=padding_mode, **kwargs) + out = self._infer(x) + if out.shape[-2:] != x.shape[-2:]: + out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False) + if pad_input: + # crop to the original size, handling the case where pad_h and pad_w is 0 + if pad_h > 0: + out = out[:, :, pad_h:-pad_h,:] + if pad_w > 0: + out = out[:, :, :, pad_w:-pad_w] + return out + + def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor: + """ + Inference interface for the model with horizontal flip augmentation + Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip. + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + # infer with horizontal flip and average + out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs) + out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs) + out = (out + torch.flip(out_flip, dims=[3])) / 2 + return out + + def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor: + """ + Inference interface for the model + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True. + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + if with_flip_aug: + return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs) + else: + return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs) + + @torch.no_grad() + def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]: + """ + Inference interface for the model for PIL image + Args: + pil_img (PIL.Image.Image): input PIL image + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True. + output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy". + """ + x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device) + out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs) + if output_type == "numpy": + return out_tensor.squeeze().cpu().numpy() + elif output_type == "pil": + # uint16 is required for depth pil image + out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16) + return Image.fromarray(out_16bit_numpy) + elif output_type == "tensor": + return out_tensor.squeeze().cpu() + else: + raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'") + \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/attractor.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/attractor.py new file mode 100644 index 0000000000000000000000000000000000000000..2a8efe645adea1d88a12e2ac5cc6bb2a251eef9d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/attractor.py @@ -0,0 +1,208 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +@torch.jit.script +def exp_attractor(dx, alpha: float = 300, gamma: int = 2): + """Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor + + Args: + dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center. + alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300. + gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2. + + Returns: + torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc + """ + return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx) + + +@torch.jit.script +def inv_attractor(dx, alpha: float = 300, gamma: int = 2): + """Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center + This is the default one according to the accompanying paper. + + Args: + dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center. + alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300. + gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2. + + Returns: + torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc + """ + return dx.div(1+alpha*dx.pow(gamma)) + + +class AttractorLayer(nn.Module): + def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10, + alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False): + """ + Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth) + """ + super().__init__() + + self.n_attractors = n_attractors + self.n_bins = n_bins + self.min_depth = min_depth + self.max_depth = max_depth + self.alpha = alpha + self.gamma = gamma + self.kind = kind + self.attractor_type = attractor_type + self.memory_efficient = memory_efficient + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm + nn.ReLU(inplace=True) + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + Args: + x (torch.Tensor) : feature block; shape - n, c, h, w + b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w + + Returns: + tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate( + prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + + A = self._net(x) + eps = 1e-3 + A = A + eps + n, c, h, w = A.shape + A = A.view(n, self.n_attractors, 2, h, w) + A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w + A_normed = A[:, :, 0, ...] # n, na, h, w + + b_prev = nn.functional.interpolate( + b_prev, (h, w), mode='bilinear', align_corners=True) + b_centers = b_prev + + if self.attractor_type == 'exp': + dist = exp_attractor + else: + dist = inv_attractor + + if not self.memory_efficient: + func = {'mean': torch.mean, 'sum': torch.sum}[self.kind] + # .shape N, nbins, h, w + delta_c = func(dist(A_normed.unsqueeze( + 2) - b_centers.unsqueeze(1)), dim=1) + else: + delta_c = torch.zeros_like(b_centers, device=b_centers.device) + for i in range(self.n_attractors): + # .shape N, nbins, h, w + delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers) + + if self.kind == 'mean': + delta_c = delta_c / self.n_attractors + + b_new_centers = b_centers + delta_c + B_centers = (self.max_depth - self.min_depth) * \ + b_new_centers + self.min_depth + B_centers, _ = torch.sort(B_centers, dim=1) + B_centers = torch.clip(B_centers, self.min_depth, self.max_depth) + return b_new_centers, B_centers + + +class AttractorLayerUnnormed(nn.Module): + def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10, + alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False): + """ + Attractor layer for bin centers. Bin centers are unbounded + """ + super().__init__() + + self.n_attractors = n_attractors + self.n_bins = n_bins + self.min_depth = min_depth + self.max_depth = max_depth + self.alpha = alpha + self.gamma = gamma + self.kind = kind + self.attractor_type = attractor_type + self.memory_efficient = memory_efficient + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0), + nn.Softplus() + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + Args: + x (torch.Tensor) : feature block; shape - n, c, h, w + b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w + + Returns: + tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate( + prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + + A = self._net(x) + n, c, h, w = A.shape + + b_prev = nn.functional.interpolate( + b_prev, (h, w), mode='bilinear', align_corners=True) + b_centers = b_prev + + if self.attractor_type == 'exp': + dist = exp_attractor + else: + dist = inv_attractor + + if not self.memory_efficient: + func = {'mean': torch.mean, 'sum': torch.sum}[self.kind] + # .shape N, nbins, h, w + delta_c = func( + dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1) + else: + delta_c = torch.zeros_like(b_centers, device=b_centers.device) + for i in range(self.n_attractors): + delta_c += dist(A[:, i, ...].unsqueeze(1) - + b_centers) # .shape N, nbins, h, w + + if self.kind == 'mean': + delta_c = delta_c / self.n_attractors + + b_new_centers = b_centers + delta_c + B_centers = b_new_centers + + return b_new_centers, B_centers diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/dist_layers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/dist_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..3208405dfb78fdfc28d5765e5a6d5dbe31967a23 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/dist_layers.py @@ -0,0 +1,121 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +def log_binom(n, k, eps=1e-7): + """ log(nCk) using stirling approximation """ + n = n + eps + k = k + eps + return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps) + + +class LogBinomial(nn.Module): + def __init__(self, n_classes=256, act=torch.softmax): + """Compute log binomial distribution for n_classes + + Args: + n_classes (int, optional): number of output classes. Defaults to 256. + """ + super().__init__() + self.K = n_classes + self.act = act + self.register_buffer('k_idx', torch.arange( + 0, n_classes).view(1, -1, 1, 1)) + self.register_buffer('K_minus_1', torch.Tensor( + [self.K-1]).view(1, -1, 1, 1)) + + def forward(self, x, t=1., eps=1e-4): + """Compute log binomial distribution for x + + Args: + x (torch.Tensor - NCHW): probabilities + t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1.. + eps (float, optional): Small number for numerical stability. Defaults to 1e-4. + + Returns: + torch.Tensor -NCHW: log binomial distribution logbinomial(p;t) + """ + if x.ndim == 3: + x = x.unsqueeze(1) # make it nchw + + one_minus_x = torch.clamp(1 - x, eps, 1) + x = torch.clamp(x, eps, 1) + y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \ + torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x) + return self.act(y/t, dim=1) + + +class ConditionalLogBinomial(nn.Module): + def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax): + """Conditional Log Binomial distribution + + Args: + in_features (int): number of input channels in main feature + condition_dim (int): number of input channels in condition feature + n_classes (int, optional): Number of classes. Defaults to 256. + bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2. + p_eps (float, optional): small eps value. Defaults to 1e-4. + max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50. + min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7. + """ + super().__init__() + self.p_eps = p_eps + self.max_temp = max_temp + self.min_temp = min_temp + self.log_binomial_transform = LogBinomial(n_classes, act=act) + bottleneck = (in_features + condition_dim) // bottleneck_factor + self.mlp = nn.Sequential( + nn.Conv2d(in_features + condition_dim, bottleneck, + kernel_size=1, stride=1, padding=0), + nn.GELU(), + # 2 for p linear norm, 2 for t linear norm + nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0), + nn.Softplus() + ) + + def forward(self, x, cond): + """Forward pass + + Args: + x (torch.Tensor - NCHW): Main feature + cond (torch.Tensor - NCHW): condition feature + + Returns: + torch.Tensor: Output log binomial distribution + """ + pt = self.mlp(torch.concat((x, cond), dim=1)) + p, t = pt[:, :2, ...], pt[:, 2:, ...] + + p = p + self.p_eps + p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...]) + + t = t + self.p_eps + t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...]) + t = t.unsqueeze(1) + t = (self.max_temp - self.min_temp) * t + self.min_temp + + return self.log_binomial_transform(p, t) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/localbins_layers.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/localbins_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..f94481605c3e6958ce50e73b2eb31d9f0c07dc67 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/localbins_layers.py @@ -0,0 +1,169 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +class SeedBinRegressor(nn.Module): + def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10): + """Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval. + + Args: + in_features (int): input channels + n_bins (int, optional): Number of bin centers. Defaults to 16. + mlp_dim (int, optional): Hidden dimension. Defaults to 256. + min_depth (float, optional): Min depth value. Defaults to 1e-3. + max_depth (float, optional): Max depth value. Defaults to 10. + """ + super().__init__() + self.version = "1_1" + self.min_depth = min_depth + self.max_depth = max_depth + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_bins, 1, 1, 0), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + """ + Returns tensor of bin_width vectors (centers). One vector b for every pixel + """ + B = self._net(x) + eps = 1e-3 + B = B + eps + B_widths_normed = B / B.sum(dim=1, keepdim=True) + B_widths = (self.max_depth - self.min_depth) * \ + B_widths_normed # .shape NCHW + # pad has the form (left, right, top, bottom, front, back) + B_widths = nn.functional.pad( + B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth) + B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW + + B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...]) + return B_widths_normed, B_centers + + +class SeedBinRegressorUnnormed(nn.Module): + def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10): + """Bin center regressor network. Bin centers are unbounded + + Args: + in_features (int): input channels + n_bins (int, optional): Number of bin centers. Defaults to 16. + mlp_dim (int, optional): Hidden dimension. Defaults to 256. + min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor) + max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor) + """ + super().__init__() + self.version = "1_1" + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_bins, 1, 1, 0), + nn.Softplus() + ) + + def forward(self, x): + """ + Returns tensor of bin_width vectors (centers). One vector b for every pixel + """ + B_centers = self._net(x) + return B_centers, B_centers + + +class Projector(nn.Module): + def __init__(self, in_features, out_features, mlp_dim=128): + """Projector MLP + + Args: + in_features (int): input channels + out_features (int): output channels + mlp_dim (int, optional): hidden dimension. Defaults to 128. + """ + super().__init__() + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, out_features, 1, 1, 0), + ) + + def forward(self, x): + return self._net(x) + + + +class LinearSplitter(nn.Module): + def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10): + super().__init__() + + self.prev_nbins = prev_nbins + self.split_factor = split_factor + self.min_depth = min_depth + self.max_depth = max_depth + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.GELU(), + nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0), + nn.ReLU() + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + x : feature block; shape - n, c, h, w + b_prev : previous bin widths normed; shape - n, prev_nbins, h, w + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + S = self._net(x) + eps = 1e-3 + S = S + eps + n, c, h, w = S.shape + S = S.view(n, self.prev_nbins, self.split_factor, h, w) + S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits + + b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True) + + + b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees + # print(b_prev.shape, S_normed.shape) + # if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat? + b = b_prev.unsqueeze(2) * S_normed + b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w + + # calculate bin centers for loss calculation + B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W + # pad has the form (left, right, top, bottom, front, back) + B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth) + B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW + + B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...]) + return b, B_centers \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/patch_transformer.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/patch_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..99d9e51a06b981bae45ce7dd64eaef19a4121991 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/layers/patch_transformer.py @@ -0,0 +1,91 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +class PatchTransformerEncoder(nn.Module): + def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): + """ViT-like transformer block + + Args: + in_channels (int): Input channels + patch_size (int, optional): patch size. Defaults to 10. + embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. + num_heads (int, optional): number of attention heads. Defaults to 4. + use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. + """ + super(PatchTransformerEncoder, self).__init__() + self.use_class_token = use_class_token + encoder_layers = nn.TransformerEncoderLayer( + embedding_dim, num_heads, dim_feedforward=1024) + self.transformer_encoder = nn.TransformerEncoder( + encoder_layers, num_layers=4) # takes shape S,N,E + + self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, + kernel_size=patch_size, stride=patch_size, padding=0) + + def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): + """Generate positional encodings + + Args: + sequence_length (int): Sequence length + embedding_dim (int): Embedding dimension + + Returns: + torch.Tensor SBE: Positional encodings + """ + position = torch.arange( + 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) + index = torch.arange( + 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) + div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) + pos_encoding = position * div_term + pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) + pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) + return pos_encoding + + + def forward(self, x): + """Forward pass + + Args: + x (torch.Tensor - NCHW): Input feature tensor + + Returns: + torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim + """ + embeddings = self.embedding_convPxP(x).flatten( + 2) # .shape = n,c,s = n, embedding_dim, s + if self.use_class_token: + # extra special token at start ? + embeddings = nn.functional.pad(embeddings, (1, 0)) + + # change to S,N,E format required by transformer + embeddings = embeddings.permute(2, 0, 1) + S, N, E = embeddings.shape + embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device) + x = self.transformer_encoder(embeddings) # .shape = S, N, E + return x diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/model_io.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/model_io.py new file mode 100644 index 0000000000000000000000000000000000000000..78b6579631dd847ac76651238cb5a948b5a66286 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/model_io.py @@ -0,0 +1,92 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch + +def load_state_dict(model, state_dict): + """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. + + DataParallel prefixes state_dict keys with 'module.' when saving. + If the model is not a DataParallel model but the state_dict is, then prefixes are removed. + If the model is a DataParallel model but the state_dict is not, then prefixes are added. + """ + state_dict = state_dict.get('model', state_dict) + # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' + + do_prefix = isinstance( + model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) + state = {} + for k, v in state_dict.items(): + if k.startswith('module.') and not do_prefix: + k = k[7:] + + if not k.startswith('module.') and do_prefix: + k = 'module.' + k + + state[k] = v + + model.load_state_dict(state) + print("Loaded successfully") + return model + + +def load_wts(model, checkpoint_path): + ckpt = torch.load(checkpoint_path, map_location='cpu') + return load_state_dict(model, ckpt) + + +def load_state_dict_from_url(model, url, **kwargs): + state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) + return load_state_dict(model, state_dict) + + +def load_state_from_resource(model, resource: str): + """Loads weights to the model from a given resource. A resource can be of following types: + 1. URL. Prefixed with "url::" + e.g. url::http(s)://url.resource.com/ckpt.pt + + 2. Local path. Prefixed with "local::" + e.g. local::/path/to/ckpt.pt + + + Args: + model (torch.nn.Module): Model + resource (str): resource string + + Returns: + torch.nn.Module: Model with loaded weights + """ + print(f"Using pretrained resource {resource}") + + if resource.startswith('url::'): + url = resource.split('url::')[1] + return load_state_dict_from_url(model, url, progress=True) + + elif resource.startswith('local::'): + path = resource.split('local::')[1] + return load_wts(model, path) + + else: + raise ValueError("Invalid resource type, only url:: and local:: are supported") + \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cc33f737d238766559f0e3a8def3c0b568f23b7f --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/__init__.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from .zoedepth_v1 import ZoeDepth + +all_versions = { + "v1": ZoeDepth, +} + +get_version = lambda v : all_versions[v] \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json new file mode 100644 index 0000000000000000000000000000000000000000..3112ed78c89f00e1d13f5d6e5be87cd3216b6dc7 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json @@ -0,0 +1,58 @@ +{ + "model": { + "name": "ZoeDepth", + "version_name": "v1", + "n_bins": 64, + "bin_embedding_dim": 128, + "bin_centers_type": "softplus", + "n_attractors":[16, 8, 4, 1], + "attractor_alpha": 1000, + "attractor_gamma": 2, + "attractor_kind" : "mean", + "attractor_type" : "inv", + "midas_model_type" : "DPT_BEiT_L_384", + "min_temp": 0.0212, + "max_temp": 50.0, + "output_distribution": "logbinomial", + "memory_efficient": true, + "inverse_midas": false, + "img_size": [384, 512] + }, + + "train": { + "train_midas": true, + "use_pretrained_midas": true, + "trainer": "zoedepth", + "epochs": 5, + "bs": 16, + "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, + "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, + "same_lr": false, + "w_si": 1, + "w_domain": 0.2, + "w_reg": 0, + "w_grad": 0, + "avoid_boundary": false, + "random_crop": false, + "input_width": 640, + "input_height": 480, + "midas_lr_factor": 1, + "encoder_lr_factor":10, + "pos_enc_lr_factor":10, + "freeze_midas_bn": true + + }, + + "infer":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : null, + "force_keep_ar": true + }, + + "eval":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : null + } +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json new file mode 100644 index 0000000000000000000000000000000000000000..b51802aa44b91c39e15aacaac4b5ab6bec884414 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json @@ -0,0 +1,22 @@ +{ + "model": { + "bin_centers_type": "normed", + "img_size": [384, 768] + }, + + "train": { + }, + + "infer":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", + "force_keep_ar": true + }, + + "eval":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" + } +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..bc931b059d6165c84e8ff4f09d5c62d19930cee9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py @@ -0,0 +1,250 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import itertools + +import torch +import torch.nn as nn +from ..depth_model import DepthModel +from ..base_models.midas import MidasCore +from ..layers.attractor import AttractorLayer, AttractorLayerUnnormed +from ..layers.dist_layers import ConditionalLogBinomial +from ..layers.localbins_layers import (Projector, SeedBinRegressor, + SeedBinRegressorUnnormed) +from ..model_io import load_state_from_resource + + +class ZoeDepth(DepthModel): + def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10, + n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True, + midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs): + """ZoeDepth model. This is the version of ZoeDepth that has a single metric head + + Args: + core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features + n_bins (int, optional): Number of bin centers. Defaults to 64. + bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers. + For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus". + bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128. + min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3. + max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10. + n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1]. + attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300. + attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2. + attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'. + attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'. + min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5. + max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50. + train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True. + midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10. + encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10. + pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10. + """ + super().__init__() + + self.core = core + self.max_depth = max_depth + self.min_depth = min_depth + self.min_temp = min_temp + self.bin_centers_type = bin_centers_type + + self.midas_lr_factor = midas_lr_factor + self.encoder_lr_factor = encoder_lr_factor + self.pos_enc_lr_factor = pos_enc_lr_factor + self.train_midas = train_midas + self.inverse_midas = inverse_midas + + if self.encoder_lr_factor <= 0: + self.core.freeze_encoder( + freeze_rel_pos=self.pos_enc_lr_factor <= 0) + + N_MIDAS_OUT = 32 + btlnck_features = self.core.output_channels[0] + num_out_features = self.core.output_channels[1:] + + self.conv2 = nn.Conv2d(btlnck_features, btlnck_features, + kernel_size=1, stride=1, padding=0) # btlnck conv + + if bin_centers_type == "normed": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayer + elif bin_centers_type == "softplus": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid1": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid2": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayer + else: + raise ValueError( + "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'") + + self.seed_bin_regressor = SeedBinRegressorLayer( + btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth) + self.seed_projector = Projector(btlnck_features, bin_embedding_dim) + self.projectors = nn.ModuleList([ + Projector(num_out, bin_embedding_dim) + for num_out in num_out_features + ]) + self.attractors = nn.ModuleList([ + Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth, + alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type) + for i in range(len(num_out_features)) + ]) + + last_in = N_MIDAS_OUT + 1 # +1 for relative depth + + # use log binomial instead of softmax + self.conditional_log_binomial = ConditionalLogBinomial( + last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp) + + def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs): + """ + Args: + x (torch.Tensor): Input image tensor of shape (B, C, H, W) + return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False. + denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False. + return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False. + + Returns: + dict: Dictionary containing the following keys: + - rel_depth (torch.Tensor): Relative depth map of shape (B, H, W) + - metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W) + - bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True + - probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True + + """ + b, c, h, w = x.shape + # print("input shape ", x.shape) + self.orig_input_width = w + self.orig_input_height = h + rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True) + # print("output shapes", rel_depth.shape, out.shape) + + outconv_activation = out[0] + btlnck = out[1] + x_blocks = out[2:] + + x_d0 = self.conv2(btlnck) + x = x_d0 + _, seed_b_centers = self.seed_bin_regressor(x) + + if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2': + b_prev = (seed_b_centers - self.min_depth) / \ + (self.max_depth - self.min_depth) + else: + b_prev = seed_b_centers + + prev_b_embedding = self.seed_projector(x) + + # unroll this loop for better performance + for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks): + b_embedding = projector(x) + b, b_centers = attractor( + b_embedding, b_prev, prev_b_embedding, interpolate=True) + b_prev = b.clone() + prev_b_embedding = b_embedding.clone() + + last = outconv_activation + + if self.inverse_midas: + # invert depth followed by normalization + rel_depth = 1.0 / (rel_depth + 1e-6) + rel_depth = (rel_depth - rel_depth.min()) / \ + (rel_depth.max() - rel_depth.min()) + # concat rel depth with last. First interpolate rel depth to last size + rel_cond = rel_depth.unsqueeze(1) + rel_cond = nn.functional.interpolate( + rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True) + last = torch.cat([last, rel_cond], dim=1) + + b_embedding = nn.functional.interpolate( + b_embedding, last.shape[-2:], mode='bilinear', align_corners=True) + x = self.conditional_log_binomial(last, b_embedding) + + # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor + # print(x.shape, b_centers.shape) + b_centers = nn.functional.interpolate( + b_centers, x.shape[-2:], mode='bilinear', align_corners=True) + out = torch.sum(x * b_centers, dim=1, keepdim=True) + + # Structure output dict + output = dict(metric_depth=out) + if return_final_centers or return_probs: + output['bin_centers'] = b_centers + + if return_probs: + output['probs'] = x + + return output + + def get_lr_params(self, lr): + """ + Learning rate configuration for different layers of the model + Args: + lr (float) : Base learning rate + Returns: + list : list of parameters to optimize and their learning rates, in the format required by torch optimizers. + """ + param_conf = [] + if self.train_midas: + if self.encoder_lr_factor > 0: + param_conf.append({'params': self.core.get_enc_params_except_rel_pos( + ), 'lr': lr / self.encoder_lr_factor}) + + if self.pos_enc_lr_factor > 0: + param_conf.append( + {'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor}) + + midas_params = self.core.core.scratch.parameters() + midas_lr_factor = self.midas_lr_factor + param_conf.append( + {'params': midas_params, 'lr': lr / midas_lr_factor}) + + remaining_modules = [] + for name, child in self.named_children(): + if name != 'core': + remaining_modules.append(child) + remaining_params = itertools.chain( + *[child.parameters() for child in remaining_modules]) + + param_conf.append({'params': remaining_params, 'lr': lr}) + + return param_conf + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs): + core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas, + train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs) + model = ZoeDepth(core, **kwargs) + if pretrained_resource: + assert isinstance(pretrained_resource, str), "pretrained_resource must be a string" + model = load_state_from_resource(model, pretrained_resource) + return model + + @staticmethod + def build_from_config(config): + return ZoeDepth.build(**config) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..513a278b939c10c010e3c0250ec73544d5663886 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from .zoedepth_nk_v1 import ZoeDepthNK + +all_versions = { + "v1": ZoeDepthNK, +} + +get_version = lambda v : all_versions[v] \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json new file mode 100644 index 0000000000000000000000000000000000000000..42bab2a3ad159a09599a5aba270c491021a3cf1a --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json @@ -0,0 +1,67 @@ +{ + "model": { + "name": "ZoeDepthNK", + "version_name": "v1", + "bin_conf" : [ + { + "name": "nyu", + "n_bins": 64, + "min_depth": 1e-3, + "max_depth": 10.0 + }, + { + "name": "kitti", + "n_bins": 64, + "min_depth": 1e-3, + "max_depth": 80.0 + } + ], + "bin_embedding_dim": 128, + "bin_centers_type": "softplus", + "n_attractors":[16, 8, 4, 1], + "attractor_alpha": 1000, + "attractor_gamma": 2, + "attractor_kind" : "mean", + "attractor_type" : "inv", + "min_temp": 0.0212, + "max_temp": 50.0, + "memory_efficient": true, + "midas_model_type" : "DPT_BEiT_L_384", + "img_size": [384, 512] + }, + + "train": { + "train_midas": true, + "use_pretrained_midas": true, + "trainer": "zoedepth_nk", + "epochs": 5, + "bs": 16, + "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, + "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, + "same_lr": false, + "w_si": 1, + "w_domain": 100, + "avoid_boundary": false, + "random_crop": false, + "input_width": 640, + "input_height": 480, + "w_grad": 0, + "w_reg": 0, + "midas_lr_factor": 10, + "encoder_lr_factor":10, + "pos_enc_lr_factor":10 + }, + + "infer": { + "train_midas": false, + "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", + "use_pretrained_midas": false, + "force_keep_ar": true + }, + + "eval": { + "train_midas": false, + "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", + "use_pretrained_midas": false + } +} \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..7368ae8031188a9f946d9d3f29633c96e791e68e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py @@ -0,0 +1,333 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import itertools + +import torch +import torch.nn as nn + +from zoedepth.models.depth_model import DepthModel +from zoedepth.models.base_models.midas import MidasCore +from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed +from zoedepth.models.layers.dist_layers import ConditionalLogBinomial +from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor, + SeedBinRegressorUnnormed) +from zoedepth.models.layers.patch_transformer import PatchTransformerEncoder +from zoedepth.models.model_io import load_state_from_resource + + +class ZoeDepthNK(DepthModel): + def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128, + n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', + min_temp=5, max_temp=50, + memory_efficient=False, train_midas=True, + is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs): + """ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts. + + Args: + core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features + + bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys: + "name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float) + + The length of this list determines the number of metric heads. + bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers. + For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed". + bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128. + + n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1]. + attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300. + attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2. + attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'. + attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'. + + min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5. + max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50. + + memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False. + + train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True. + is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True. + midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10. + encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10. + pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10. + + """ + + super().__init__() + + self.core = core + self.bin_conf = bin_conf + self.min_temp = min_temp + self.max_temp = max_temp + self.memory_efficient = memory_efficient + self.train_midas = train_midas + self.is_midas_pretrained = is_midas_pretrained + self.midas_lr_factor = midas_lr_factor + self.encoder_lr_factor = encoder_lr_factor + self.pos_enc_lr_factor = pos_enc_lr_factor + self.inverse_midas = inverse_midas + + N_MIDAS_OUT = 32 + btlnck_features = self.core.output_channels[0] + num_out_features = self.core.output_channels[1:] + # self.scales = [16, 8, 4, 2] # spatial scale factors + + self.conv2 = nn.Conv2d( + btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0) + + # Transformer classifier on the bottleneck + self.patch_transformer = PatchTransformerEncoder( + btlnck_features, 1, 128, use_class_token=True) + self.mlp_classifier = nn.Sequential( + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 2) + ) + + if bin_centers_type == "normed": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayer + elif bin_centers_type == "softplus": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid1": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid2": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayer + else: + raise ValueError( + "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'") + self.bin_centers_type = bin_centers_type + # We have bins for each bin conf. + # Create a map (ModuleDict) of 'name' -> seed_bin_regressor + self.seed_bin_regressors = nn.ModuleDict( + {conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"]) + for conf in bin_conf} + ) + + self.seed_projector = Projector( + btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2) + self.projectors = nn.ModuleList([ + Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2) + for num_out in num_out_features + ]) + + # Create a map (ModuleDict) of 'name' -> attractors (ModuleList) + self.attractors = nn.ModuleDict( + {conf['name']: nn.ModuleList([ + Attractor(bin_embedding_dim, n_attractors[i], + mlp_dim=bin_embedding_dim, alpha=attractor_alpha, + gamma=attractor_gamma, kind=attractor_kind, + attractor_type=attractor_type, memory_efficient=memory_efficient, + min_depth=conf["min_depth"], max_depth=conf["max_depth"]) + for i in range(len(n_attractors)) + ]) + for conf in bin_conf} + ) + + last_in = N_MIDAS_OUT + # conditional log binomial for each bin conf + self.conditional_log_binomial = nn.ModuleDict( + {conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp) + for conf in bin_conf} + ) + + def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs): + """ + Args: + x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain. + return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False. + denorm (bool, optional): Whether to denormalize the input image. Defaults to False. + return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False. + + Returns: + dict: Dictionary of outputs with keys: + - "rel_depth": Relative depth map of shape (B, 1, H, W) + - "metric_depth": Metric depth map of shape (B, 1, H, W) + - "domain_logits": Domain logits of shape (B, 2) + - "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True + - "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True + """ + b, c, h, w = x.shape + self.orig_input_width = w + self.orig_input_height = h + rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True) + + outconv_activation = out[0] + btlnck = out[1] + x_blocks = out[2:] + + x_d0 = self.conv2(btlnck) + x = x_d0 + + # Predict which path to take + embedding = self.patch_transformer(x)[0] # N, E + domain_logits = self.mlp_classifier(embedding) # N, 2 + domain_vote = torch.softmax(domain_logits.sum( + dim=0, keepdim=True), dim=-1) # 1, 2 + + # Get the path + bin_conf_name = ["nyu", "kitti"][torch.argmax( + domain_vote, dim=-1).squeeze().item()] + + try: + conf = [c for c in self.bin_conf if c.name == bin_conf_name][0] + except IndexError: + raise ValueError( + f"bin_conf_name {bin_conf_name} not found in bin_confs") + + min_depth = conf['min_depth'] + max_depth = conf['max_depth'] + + seed_bin_regressor = self.seed_bin_regressors[bin_conf_name] + _, seed_b_centers = seed_bin_regressor(x) + if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2': + b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth) + else: + b_prev = seed_b_centers + prev_b_embedding = self.seed_projector(x) + + attractors = self.attractors[bin_conf_name] + for projector, attractor, x in zip(self.projectors, attractors, x_blocks): + b_embedding = projector(x) + b, b_centers = attractor( + b_embedding, b_prev, prev_b_embedding, interpolate=True) + b_prev = b + prev_b_embedding = b_embedding + + last = outconv_activation + + b_centers = nn.functional.interpolate( + b_centers, last.shape[-2:], mode='bilinear', align_corners=True) + b_embedding = nn.functional.interpolate( + b_embedding, last.shape[-2:], mode='bilinear', align_corners=True) + + clb = self.conditional_log_binomial[bin_conf_name] + x = clb(last, b_embedding) + + # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor + # print(x.shape, b_centers.shape) + # b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True) + out = torch.sum(x * b_centers, dim=1, keepdim=True) + + output = dict(domain_logits=domain_logits, metric_depth=out) + if return_final_centers or return_probs: + output['bin_centers'] = b_centers + + if return_probs: + output['probs'] = x + return output + + def get_lr_params(self, lr): + """ + Learning rate configuration for different layers of the model + + Args: + lr (float) : Base learning rate + Returns: + list : list of parameters to optimize and their learning rates, in the format required by torch optimizers. + """ + param_conf = [] + if self.train_midas: + def get_rel_pos_params(): + for name, p in self.core.core.pretrained.named_parameters(): + if "relative_position" in name: + yield p + + def get_enc_params_except_rel_pos(): + for name, p in self.core.core.pretrained.named_parameters(): + if "relative_position" not in name: + yield p + + encoder_params = get_enc_params_except_rel_pos() + rel_pos_params = get_rel_pos_params() + midas_params = self.core.core.scratch.parameters() + midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0 + param_conf.extend([ + {'params': encoder_params, 'lr': lr / self.encoder_lr_factor}, + {'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor}, + {'params': midas_params, 'lr': lr / midas_lr_factor} + ]) + + remaining_modules = [] + for name, child in self.named_children(): + if name != 'core': + remaining_modules.append(child) + remaining_params = itertools.chain( + *[child.parameters() for child in remaining_modules]) + param_conf.append({'params': remaining_params, 'lr': lr}) + return param_conf + + def get_conf_parameters(self, conf_name): + """ + Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + params = [] + for name, child in self.named_children(): + if isinstance(child, nn.ModuleDict): + for bin_conf_name, module in child.items(): + if bin_conf_name == conf_name: + params += list(module.parameters()) + return params + + def freeze_conf(self, conf_name): + """ + Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + for p in self.get_conf_parameters(conf_name): + p.requires_grad = False + + def unfreeze_conf(self, conf_name): + """ + Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + for p in self.get_conf_parameters(conf_name): + p.requires_grad = True + + def freeze_all_confs(self): + """ + Freezes all the parameters of all the ModuleDicts children + """ + for name, child in self.named_children(): + if isinstance(child, nn.ModuleDict): + for bin_conf_name, module in child.items(): + for p in module.parameters(): + p.requires_grad = False + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs): + core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas, + train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs) + model = ZoeDepthNK(core, **kwargs) + if pretrained_resource: + assert isinstance(pretrained_resource, str), "pretrained_resource must be a string" + model = load_state_from_resource(model, pretrained_resource) + return model + + @staticmethod + def build_from_config(config): + return ZoeDepthNK.build(**config) diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f2668792389157609abb2a0846fb620e7d67eb9 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/arg_utils.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/arg_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8a3004ec3679c0a40fd8961253733fb4343ad545 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/arg_utils.py @@ -0,0 +1,33 @@ + + +def infer_type(x): # hacky way to infer type from string args + if not isinstance(x, str): + return x + + try: + x = int(x) + return x + except ValueError: + pass + + try: + x = float(x) + return x + except ValueError: + pass + + return x + + +def parse_unknown(unknown_args): + clean = [] + for a in unknown_args: + if "=" in a: + k, v = a.split("=") + clean.extend([k, v]) + else: + clean.append(a) + + keys = clean[::2] + values = clean[1::2] + return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/config.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..84996564663dadf0e720de2a68ef8c53106ed666 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/config.py @@ -0,0 +1,437 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import json +import os + +from .easydict import EasyDict as edict +from .arg_utils import infer_type + +import pathlib +import platform + +ROOT = pathlib.Path(__file__).parent.parent.resolve() + +HOME_DIR = os.path.expanduser("~") + +COMMON_CONFIG = { + "save_dir": os.path.expanduser("~/shortcuts/monodepth3_checkpoints"), + "project": "ZoeDepth", + "tags": '', + "notes": "", + "gpu": None, + "root": ".", + "uid": None, + "print_losses": False +} + +DATASETS_CONFIG = { + "kitti": { + "dataset": "kitti", + "min_depth": 0.001, + "max_depth": 80, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt", + "input_height": 352, + "input_width": 1216, # 704 + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt", + + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + + "do_random_rotate": True, + "degree": 1.0, + "do_kb_crop": True, + "garg_crop": True, + "eigen_crop": False, + "use_right": False + }, + "kitti_test": { + "dataset": "kitti", + "min_depth": 0.001, + "max_depth": 80, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt", + "input_height": 352, + "input_width": 1216, + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt", + + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + + "do_random_rotate": False, + "degree": 1.0, + "do_kb_crop": True, + "garg_crop": True, + "eigen_crop": False, + "use_right": False + }, + "nyu": { + "dataset": "nyu", + "avoid_boundary": False, + "min_depth": 1e-3, # originally 0.1 + "max_depth": 10, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"), + "filenames_file": "./train_test_inputs/nyudepthv2_train_files_with_gt.txt", + "input_height": 480, + "input_width": 640, + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"), + "filenames_file_eval": "./train_test_inputs/nyudepthv2_test_files_with_gt.txt", + "min_depth_eval": 1e-3, + "max_depth_eval": 10, + "min_depth_diff": -10, + "max_depth_diff": 10, + + "do_random_rotate": True, + "degree": 1.0, + "do_kb_crop": False, + "garg_crop": False, + "eigen_crop": True + }, + "ibims": { + "dataset": "ibims", + "ibims_root": os.path.join(HOME_DIR, "shortcuts/datasets/ibims/ibims1_core_raw/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "sunrgbd": { + "dataset": "sunrgbd", + "sunrgbd_root": os.path.join(HOME_DIR, "shortcuts/datasets/SUNRGBD/test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 8, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diml_indoor": { + "dataset": "diml_indoor", + "diml_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_indoor_test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diml_outdoor": { + "dataset": "diml_outdoor", + "diml_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_outdoor_test/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": False, + "min_depth_eval": 2, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "diode_indoor": { + "dataset": "diode_indoor", + "diode_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_indoor/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diode_outdoor": { + "dataset": "diode_outdoor", + "diode_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_outdoor/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "hypersim_test": { + "dataset": "hypersim_test", + "hypersim_test_root": os.path.join(HOME_DIR, "shortcuts/datasets/hypersim_test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 10 + }, + "vkitti": { + "dataset": "vkitti", + "vkitti_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti_test/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "vkitti2": { + "dataset": "vkitti2", + "vkitti2_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti2/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80, + }, + "ddad": { + "dataset": "ddad", + "ddad_root": os.path.join(HOME_DIR, "shortcuts/datasets/ddad/ddad_val/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80, + }, +} + +ALL_INDOOR = ["nyu", "ibims", "sunrgbd", "diode_indoor", "hypersim_test"] +ALL_OUTDOOR = ["kitti", "diml_outdoor", "diode_outdoor", "vkitti2", "ddad"] +ALL_EVAL_DATASETS = ALL_INDOOR + ALL_OUTDOOR + +COMMON_TRAINING_CONFIG = { + "dataset": "nyu", + "distributed": True, + "workers": 16, + "clip_grad": 0.1, + "use_shared_dict": False, + "shared_dict": None, + "use_amp": False, + + "aug": True, + "random_crop": False, + "random_translate": False, + "translate_prob": 0.2, + "max_translation": 100, + + "validate_every": 0.25, + "log_images_every": 0.1, + "prefetch": False, +} + + +def flatten(config, except_keys=('bin_conf')): + def recurse(inp): + if isinstance(inp, dict): + for key, value in inp.items(): + if key in except_keys: + yield (key, value) + if isinstance(value, dict): + yield from recurse(value) + else: + yield (key, value) + + return dict(list(recurse(config))) + + +def split_combined_args(kwargs): + """Splits the arguments that are combined with '__' into multiple arguments. + Combined arguments should have equal number of keys and values. + Keys are separated by '__' and Values are separated with ';'. + For example, '__n_bins__lr=256;0.001' + + Args: + kwargs (dict): key-value pairs of arguments where key-value is optionally combined according to the above format. + + Returns: + dict: Parsed dict with the combined arguments split into individual key-value pairs. + """ + new_kwargs = dict(kwargs) + for key, value in kwargs.items(): + if key.startswith("__"): + keys = key.split("__")[1:] + values = value.split(";") + assert len(keys) == len( + values), f"Combined arguments should have equal number of keys and values. Keys are separated by '__' and Values are separated with ';'. For example, '__n_bins__lr=256;0.001. Given (keys,values) is ({keys}, {values})" + for k, v in zip(keys, values): + new_kwargs[k] = v + return new_kwargs + + +def parse_list(config, key, dtype=int): + """Parse a list of values for the key if the value is a string. The values are separated by a comma. + Modifies the config in place. + """ + if key in config: + if isinstance(config[key], str): + config[key] = list(map(dtype, config[key].split(','))) + assert isinstance(config[key], list) and all([isinstance(e, dtype) for e in config[key]] + ), f"{key} should be a list of values dtype {dtype}. Given {config[key]} of type {type(config[key])} with values of type {[type(e) for e in config[key]]}." + + +def get_model_config(model_name, model_version=None): + """Find and parse the .json config file for the model. + + Args: + model_name (str): name of the model. The config file should be named config_{model_name}[_{model_version}].json under the models/{model_name} directory. + model_version (str, optional): Specific config version. If specified config_{model_name}_{model_version}.json is searched for and used. Otherwise config_{model_name}.json is used. Defaults to None. + + Returns: + easydict: the config dictionary for the model. + """ + config_fname = f"config_{model_name}_{model_version}.json" if model_version is not None else f"config_{model_name}.json" + config_file = os.path.join(ROOT, "models", model_name, config_fname) + if not os.path.exists(config_file): + return None + + with open(config_file, "r") as f: + config = edict(json.load(f)) + + # handle dictionary inheritance + # only training config is supported for inheritance + if "inherit" in config.train and config.train.inherit is not None: + inherit_config = get_model_config(config.train["inherit"]).train + for key, value in inherit_config.items(): + if key not in config.train: + config.train[key] = value + return edict(config) + + +def update_model_config(config, mode, model_name, model_version=None, strict=False): + model_config = get_model_config(model_name, model_version) + if model_config is not None: + config = {**config, ** + flatten({**model_config.model, **model_config[mode]})} + elif strict: + raise ValueError(f"Config file for model {model_name} not found.") + return config + + +def check_choices(name, value, choices): + # return # No checks in dev branch + if value not in choices: + raise ValueError(f"{name} {value} not in supported choices {choices}") + + +KEYS_TYPE_BOOL = ["use_amp", "distributed", "use_shared_dict", "same_lr", "aug", "three_phase", + "prefetch", "cycle_momentum"] # Casting is not necessary as their int casted values in config are 0 or 1 + + +def get_config(model_name, mode='train', dataset=None, **overwrite_kwargs): + """Main entry point to get the config for the model. + + Args: + model_name (str): name of the desired model. + mode (str, optional): "train" or "infer". Defaults to 'train'. + dataset (str, optional): If specified, the corresponding dataset configuration is loaded as well. Defaults to None. + + Keyword Args: key-value pairs of arguments to overwrite the default config. + + The order of precedence for overwriting the config is (Higher precedence first): + # 1. overwrite_kwargs + # 2. "config_version": Config file version if specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{config_version}.json + # 3. "version_name": Default Model version specific config specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{version_name}.json + # 4. common_config: Default config for all models specified in COMMON_CONFIG + + Returns: + easydict: The config dictionary for the model. + """ + + + check_choices("Model", model_name, ["zoedepth", "zoedepth_nk"]) + check_choices("Mode", mode, ["train", "infer", "eval"]) + if mode == "train": + check_choices("Dataset", dataset, ["nyu", "kitti", "mix", None]) + + config = flatten({**COMMON_CONFIG, **COMMON_TRAINING_CONFIG}) + config = update_model_config(config, mode, model_name) + + # update with model version specific config + version_name = overwrite_kwargs.get("version_name", config["version_name"]) + config = update_model_config(config, mode, model_name, version_name) + + # update with config version if specified + config_version = overwrite_kwargs.get("config_version", None) + if config_version is not None: + print("Overwriting config with config_version", config_version) + config = update_model_config(config, mode, model_name, config_version) + + # update with overwrite_kwargs + # Combined args are useful for hyperparameter search + overwrite_kwargs = split_combined_args(overwrite_kwargs) + config = {**config, **overwrite_kwargs} + + # Casting to bool # TODO: Not necessary. Remove and test + for key in KEYS_TYPE_BOOL: + if key in config: + config[key] = bool(config[key]) + + # Model specific post processing of config + parse_list(config, "n_attractors") + + # adjust n_bins for each bin configuration if bin_conf is given and n_bins is passed in overwrite_kwargs + if 'bin_conf' in config and 'n_bins' in overwrite_kwargs: + bin_conf = config['bin_conf'] # list of dicts + n_bins = overwrite_kwargs['n_bins'] + new_bin_conf = [] + for conf in bin_conf: + conf['n_bins'] = n_bins + new_bin_conf.append(conf) + config['bin_conf'] = new_bin_conf + + if mode == "train": + orig_dataset = dataset + if dataset == "mix": + dataset = 'nyu' # Use nyu as default for mix. Dataset config is changed accordingly while loading the dataloader + if dataset is not None: + config['project'] = f"MonoDepth3-{orig_dataset}" # Set project for wandb + + if dataset is not None: + config['dataset'] = dataset + config = {**DATASETS_CONFIG[dataset], **config} + + + config['model'] = model_name + typed_config = {k: infer_type(v) for k, v in config.items()} + # add hostname to config + config['hostname'] = platform.node() + return edict(typed_config) + + +def change_dataset(config, new_dataset): + config.update(DATASETS_CONFIG[new_dataset]) + return config diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/easydict/__init__.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/easydict/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15928179b0182c6045d98bc0a7be1c6ca45f675e --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/easydict/__init__.py @@ -0,0 +1,158 @@ +""" +EasyDict +Copy/pasted from https://github.com/makinacorpus/easydict +Original author: Mathieu Leplatre +""" + +class EasyDict(dict): + """ + Get attributes + + >>> d = EasyDict({'foo':3}) + >>> d['foo'] + 3 + >>> d.foo + 3 + >>> d.bar + Traceback (most recent call last): + ... + AttributeError: 'EasyDict' object has no attribute 'bar' + + Works recursively + + >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) + >>> isinstance(d.bar, dict) + True + >>> d.bar.x + 1 + + Bullet-proof + + >>> EasyDict({}) + {} + >>> EasyDict(d={}) + {} + >>> EasyDict(None) + {} + >>> d = {'a': 1} + >>> EasyDict(**d) + {'a': 1} + >>> EasyDict((('a', 1), ('b', 2))) + {'a': 1, 'b': 2} + + Set attributes + + >>> d = EasyDict() + >>> d.foo = 3 + >>> d.foo + 3 + >>> d.bar = {'prop': 'value'} + >>> d.bar.prop + 'value' + >>> d + {'foo': 3, 'bar': {'prop': 'value'}} + >>> d.bar.prop = 'newer' + >>> d.bar.prop + 'newer' + + + Values extraction + + >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) + >>> isinstance(d.bar, list) + True + >>> from operator import attrgetter + >>> list(map(attrgetter('x'), d.bar)) + [1, 3] + >>> list(map(attrgetter('y'), d.bar)) + [2, 4] + >>> d = EasyDict() + >>> list(d.keys()) + [] + >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) + >>> d.foo + 3 + >>> d.bar.x + 1 + + Still like a dict though + + >>> o = EasyDict({'clean':True}) + >>> list(o.items()) + [('clean', True)] + + And like a class + + >>> class Flower(EasyDict): + ... power = 1 + ... + >>> f = Flower() + >>> f.power + 1 + >>> f = Flower({'height': 12}) + >>> f.height + 12 + >>> f['power'] + 1 + >>> sorted(f.keys()) + ['height', 'power'] + + update and pop items + >>> d = EasyDict(a=1, b='2') + >>> e = EasyDict(c=3.0, a=9.0) + >>> d.update(e) + >>> d.c + 3.0 + >>> d['c'] + 3.0 + >>> d.get('c') + 3.0 + >>> d.update(a=4, b=4) + >>> d.b + 4 + >>> d.pop('a') + 4 + >>> d.a + Traceback (most recent call last): + ... + AttributeError: 'EasyDict' object has no attribute 'a' + """ + def __init__(self, d=None, **kwargs): + if d is None: + d = {} + else: + d = dict(d) + if kwargs: + d.update(**kwargs) + for k, v in d.items(): + setattr(self, k, v) + # Class attributes + for k in self.__class__.__dict__.keys(): + if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): + setattr(self, k, getattr(self, k)) + + def __setattr__(self, name, value): + if isinstance(value, (list, tuple)): + value = [self.__class__(x) + if isinstance(x, dict) else x for x in value] + elif isinstance(value, dict) and not isinstance(value, self.__class__): + value = self.__class__(value) + super(EasyDict, self).__setattr__(name, value) + super(EasyDict, self).__setitem__(name, value) + + __setitem__ = __setattr__ + + def update(self, e=None, **f): + d = e or dict() + d.update(f) + for k in d: + setattr(self, k, d[k]) + + def pop(self, k, d=None): + delattr(self, k) + return super(EasyDict, self).pop(k, d) + + +if __name__ == "__main__": + import doctest + doctest.testmod() \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/geometry.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..e3da8c75b5a8e39b4b58a4dcd827b84d79b9115c --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/geometry.py @@ -0,0 +1,98 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import numpy as np + +def get_intrinsics(H,W): + """ + Intrinsics for a pinhole camera model. + Assume fov of 55 degrees and central principal point. + """ + f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) + cx = 0.5 * W + cy = 0.5 * H + return np.array([[f, 0, cx], + [0, f, cy], + [0, 0, 1]]) + +def depth_to_points(depth, R=None, t=None): + + K = get_intrinsics(depth.shape[1], depth.shape[2]) + Kinv = np.linalg.inv(K) + if R is None: + R = np.eye(3) + if t is None: + t = np.zeros(3) + + # M converts from your coordinate to PyTorch3D's coordinate system + M = np.eye(3) + M[0, 0] = -1.0 + M[1, 1] = -1.0 + + height, width = depth.shape[1:3] + + x = np.arange(width) + y = np.arange(height) + coord = np.stack(np.meshgrid(x, y), -1) + coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 + coord = coord.astype(np.float32) + # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) + coord = coord[None] # bs, h, w, 3 + + D = depth[:, :, :, None, None] + # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) + pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] + # pts3D_1 live in your coordinate system. Convert them to Py3D's + pts3D_1 = M[None, None, None, ...] @ pts3D_1 + # from reference to targe tviewpoint + pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] + # pts3D_2 = pts3D_1 + # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w + return pts3D_2[:, :, :, :3, 0][0] + + +def create_triangles(h, w, mask=None): + """ + Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 + Creates mesh triangle indices from a given pixel grid size. + This function is not and need not be differentiable as triangle indices are + fixed. + Args: + h: (int) denoting the height of the image. + w: (int) denoting the width of the image. + Returns: + triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) + """ + x, y = np.meshgrid(range(w - 1), range(h - 1)) + tl = y * w + x + tr = y * w + x + 1 + bl = (y + 1) * w + x + br = (y + 1) * w + x + 1 + triangles = np.array([tl, bl, tr, br, tr, bl]) + triangles = np.transpose(triangles, (1, 2, 0)).reshape( + ((w - 1) * (h - 1) * 2, 3)) + if mask is not None: + mask = mask.reshape(-1) + triangles = triangles[mask[triangles].all(1)] + return triangles diff --git a/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/misc.py b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..4bbe403d3669829eecdf658458c76aa5e87e2b33 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/annotator/zoe/zoedepth/utils/misc.py @@ -0,0 +1,368 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +"""Miscellaneous utility functions.""" + +from scipy import ndimage + +import base64 +import math +import re +from io import BytesIO + +import matplotlib +import matplotlib.cm +import numpy as np +import requests +import torch +import torch.distributed as dist +import torch.nn +import torch.nn as nn +import torch.utils.data.distributed +from PIL import Image +from torchvision.transforms import ToTensor + + +class RunningAverage: + def __init__(self): + self.avg = 0 + self.count = 0 + + def append(self, value): + self.avg = (value + self.count * self.avg) / (self.count + 1) + self.count += 1 + + def get_value(self): + return self.avg + + +def denormalize(x): + """Reverses the imagenet normalization applied to the input. + + Args: + x (torch.Tensor - shape(N,3,H,W)): input tensor + + Returns: + torch.Tensor - shape(N,3,H,W): Denormalized input + """ + mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) + std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) + return x * std + mean + + +class RunningAverageDict: + """A dictionary of running averages.""" + def __init__(self): + self._dict = None + + def update(self, new_dict): + if new_dict is None: + return + + if self._dict is None: + self._dict = dict() + for key, value in new_dict.items(): + self._dict[key] = RunningAverage() + + for key, value in new_dict.items(): + self._dict[key].append(value) + + def get_value(self): + if self._dict is None: + return None + return {key: value.get_value() for key, value in self._dict.items()} + + +def colorize(value, vmin=None, vmax=None, cmap='gray_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): + """Converts a depth map to a color image. + + Args: + value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed + vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. + vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. + cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. + invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. + invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. + background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). + gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. + value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. + + Returns: + numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) + """ + if isinstance(value, torch.Tensor): + value = value.detach().cpu().numpy() + + value = value.squeeze() + if invalid_mask is None: + invalid_mask = value == invalid_val + mask = np.logical_not(invalid_mask) + + # normalize + vmin = np.percentile(value[mask],2) if vmin is None else vmin + vmax = np.percentile(value[mask],85) if vmax is None else vmax + if vmin != vmax: + value = (value - vmin) / (vmax - vmin) # vmin..vmax + else: + # Avoid 0-division + value = value * 0. + + # squeeze last dim if it exists + # grey out the invalid values + + value[invalid_mask] = np.nan + cmapper = matplotlib.cm.get_cmap(cmap) + if value_transform: + value = value_transform(value) + # value = value / value.max() + value = cmapper(value, bytes=True) # (nxmx4) + + # img = value[:, :, :] + img = value[...] + img[invalid_mask] = background_color + + # return img.transpose((2, 0, 1)) + if gamma_corrected: + # gamma correction + img = img / 255 + img = np.power(img, 2.2) + img = img * 255 + img = img.astype(np.uint8) + return img + + +def count_parameters(model, include_all=False): + return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all) + + +def compute_errors(gt, pred): + """Compute metrics for 'pred' compared to 'gt' + + Args: + gt (numpy.ndarray): Ground truth values + pred (numpy.ndarray): Predicted values + + gt.shape should be equal to pred.shape + + Returns: + dict: Dictionary containing the following metrics: + 'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25 + 'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2 + 'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3 + 'abs_rel': Absolute relative error + 'rmse': Root mean squared error + 'log_10': Absolute log10 error + 'sq_rel': Squared relative error + 'rmse_log': Root mean squared error on the log scale + 'silog': Scale invariant log error + """ + thresh = np.maximum((gt / pred), (pred / gt)) + a1 = (thresh < 1.25).mean() + a2 = (thresh < 1.25 ** 2).mean() + a3 = (thresh < 1.25 ** 3).mean() + + abs_rel = np.mean(np.abs(gt - pred) / gt) + sq_rel = np.mean(((gt - pred) ** 2) / gt) + + rmse = (gt - pred) ** 2 + rmse = np.sqrt(rmse.mean()) + + rmse_log = (np.log(gt) - np.log(pred)) ** 2 + rmse_log = np.sqrt(rmse_log.mean()) + + err = np.log(pred) - np.log(gt) + silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100 + + log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean() + return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log, + silog=silog, sq_rel=sq_rel) + + +def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, **kwargs): + """Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics. + """ + if 'config' in kwargs: + config = kwargs['config'] + garg_crop = config.garg_crop + eigen_crop = config.eigen_crop + min_depth_eval = config.min_depth_eval + max_depth_eval = config.max_depth_eval + + if gt.shape[-2:] != pred.shape[-2:] and interpolate: + pred = nn.functional.interpolate( + pred, gt.shape[-2:], mode='bilinear', align_corners=True) + + pred = pred.squeeze().cpu().numpy() + pred[pred < min_depth_eval] = min_depth_eval + pred[pred > max_depth_eval] = max_depth_eval + pred[np.isinf(pred)] = max_depth_eval + pred[np.isnan(pred)] = min_depth_eval + + gt_depth = gt.squeeze().cpu().numpy() + valid_mask = np.logical_and( + gt_depth > min_depth_eval, gt_depth < max_depth_eval) + + if garg_crop or eigen_crop: + gt_height, gt_width = gt_depth.shape + eval_mask = np.zeros(valid_mask.shape) + + if garg_crop: + eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), + int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1 + + elif eigen_crop: + # print("-"*10, " EIGEN CROP ", "-"*10) + if dataset == 'kitti': + eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), + int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1 + else: + # assert gt_depth.shape == (480, 640), "Error: Eigen crop is currently only valid for (480, 640) images" + eval_mask[45:471, 41:601] = 1 + else: + eval_mask = np.ones(valid_mask.shape) + valid_mask = np.logical_and(valid_mask, eval_mask) + return compute_errors(gt_depth[valid_mask], pred[valid_mask]) + + +#################################### Model uilts ################################################ + + +def parallelize(config, model, find_unused_parameters=True): + + if config.gpu is not None: + torch.cuda.set_device(config.gpu) + model = model.cuda(config.gpu) + + config.multigpu = False + if config.distributed: + # Use DDP + config.multigpu = True + config.rank = config.rank * config.ngpus_per_node + config.gpu + dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url, + world_size=config.world_size, rank=config.rank) + config.batch_size = int(config.batch_size / config.ngpus_per_node) + # config.batch_size = 8 + config.workers = int( + (config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node) + print("Device", config.gpu, "Rank", config.rank, "batch size", + config.batch_size, "Workers", config.workers) + torch.cuda.set_device(config.gpu) + model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + model = model.cuda(config.gpu) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu, + find_unused_parameters=find_unused_parameters) + + elif config.gpu is None: + # Use DP + config.multigpu = True + model = model.cuda() + model = torch.nn.DataParallel(model) + + return model + + +################################################################################################# + + +##################################################################################################### + + +class colors: + '''Colors class: + Reset all colors with colors.reset + Two subclasses fg for foreground and bg for background. + Use as colors.subclass.colorname. + i.e. colors.fg.red or colors.bg.green + Also, the generic bold, disable, underline, reverse, strikethrough, + and invisible work with the main class + i.e. colors.bold + ''' + reset = '\033[0m' + bold = '\033[01m' + disable = '\033[02m' + underline = '\033[04m' + reverse = '\033[07m' + strikethrough = '\033[09m' + invisible = '\033[08m' + + class fg: + black = '\033[30m' + red = '\033[31m' + green = '\033[32m' + orange = '\033[33m' + blue = '\033[34m' + purple = '\033[35m' + cyan = '\033[36m' + lightgrey = '\033[37m' + darkgrey = '\033[90m' + lightred = '\033[91m' + lightgreen = '\033[92m' + yellow = '\033[93m' + lightblue = '\033[94m' + pink = '\033[95m' + lightcyan = '\033[96m' + + class bg: + black = '\033[40m' + red = '\033[41m' + green = '\033[42m' + orange = '\033[43m' + blue = '\033[44m' + purple = '\033[45m' + cyan = '\033[46m' + lightgrey = '\033[47m' + + +def printc(text, color): + print(f"{color}{text}{colors.reset}") + +############################################ + +def get_image_from_url(url): + response = requests.get(url) + img = Image.open(BytesIO(response.content)).convert("RGB") + return img + +def url_to_torch(url, size=(384, 384)): + img = get_image_from_url(url) + img = img.resize(size, Image.ANTIALIAS) + img = torch.from_numpy(np.asarray(img)).float() + img = img.permute(2, 0, 1) + img.div_(255) + return img + +def pil_to_batched_tensor(img): + return ToTensor()(img).unsqueeze(0) + +def save_raw_16bit(depth, fpath="raw.png"): + if isinstance(depth, torch.Tensor): + depth = depth.squeeze().cpu().numpy() + + assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array" + assert depth.ndim == 2, "Depth must be 2D" + depth = depth * 256 # scale for 16-bit png + depth = depth.astype(np.uint16) + depth = Image.fromarray(depth) + depth.save(fpath) + print("Saved raw depth to", fpath) \ No newline at end of file diff --git a/extensions-builtin/forge_legacy_preprocessors/install.py b/extensions-builtin/forge_legacy_preprocessors/install.py new file mode 100644 index 0000000000000000000000000000000000000000..3a9bd11726ec2ccdbab6e64e2cbeb534ce7680f0 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/install.py @@ -0,0 +1,151 @@ +import launch +import pkg_resources +import sys +import os +import shutil +import platform +from pathlib import Path +from typing import Tuple, Optional + + +repo_root = Path(__file__).parent +main_req_file = repo_root / "requirements.txt" + + +def comparable_version(version: str) -> Tuple: + return tuple(version.split(".")) + + +def get_installed_version(package: str) -> Optional[str]: + try: + return pkg_resources.get_distribution(package).version + except Exception: + return None + + +def extract_base_package(package_string: str) -> str: + base_package = package_string.split("@git")[0] + return base_package + + +def install_requirements(req_file): + with open(req_file) as file: + for package in file: + try: + package = package.strip() + if "==" in package: + package_name, package_version = package.split("==") + installed_version = get_installed_version(package_name) + if installed_version != package_version: + launch.run_pip( + f"install -U {package}", + f"forge_legacy_preprocessor requirement: changing {package_name} version from {installed_version} to {package_version}", + ) + elif ">=" in package: + package_name, package_version = package.split(">=") + installed_version = get_installed_version(package_name) + if not installed_version or comparable_version( + installed_version + ) < comparable_version(package_version): + launch.run_pip( + f"install -U {package}", + f"forge_legacy_preprocessor requirement: changing {package_name} version from {installed_version} to {package_version}", + ) + elif not launch.is_installed(extract_base_package(package)): + launch.run_pip( + f"install {package}", + f"forge_legacy_preprocessor requirement: {package}", + ) + except Exception as e: + print(e) + print( + f"Warning: Failed to install {package}, some preprocessors may not work." + ) + + +def try_install_from_wheel(pkg_name: str, wheel_url: str, version: Optional[str] = None): + current_version = get_installed_version(pkg_name) + if current_version is not None: + # No version requirement. + if version is None: + return + # Version requirement already satisfied. + if comparable_version(current_version) >= comparable_version(version): + return + try: + launch.run_pip( + f"install -U {wheel_url}", + f"forge_legacy_preprocessor requirement: {pkg_name}", + ) + except Exception as e: + print(e) + print(f"Warning: Failed to install {pkg_name}. Some processors will not work.") + + +def try_install_insight_face(): + """Attempt to install insightface library. The library is necessary to use ip-adapter faceid. + Note: Building insightface library from source requires compiling C++ code, which should be avoided + in principle. Here the solution is to download a precompiled wheel.""" + if get_installed_version("insightface") is not None: + return + + default_win_wheel = "https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp310-cp310-win_amd64.whl" + wheel_url = os.environ.get("INSIGHTFACE_WHEEL", default_win_wheel) + + system = platform.system().lower() + architecture = platform.machine().lower() + python_version = sys.version_info + if wheel_url != default_win_wheel or ( + system == "windows" + and "amd64" in architecture + and python_version.major == 3 + and python_version.minor == 10 + ): + try: + launch.run_pip( + f"install {wheel_url}", + "forge_legacy_preprocessor requirement: insightface", + ) + except Exception as e: + print(e) + print( + "Legacy Preprocessor init warning: Unable to install insightface automatically. " + ) + else: + print( + "Legacy Preprocessor init warning: Unable to install insightface automatically. " + "Please try run `pip install insightface` manually." + ) + + +def try_remove_legacy_submodule(): + """Try remove annotators/hand_refiner_portable submodule dir.""" + submodule = repo_root / "annotator" / "hand_refiner_portable" + if os.path.exists(submodule): + try: + shutil.rmtree(submodule) + except Exception as e: + print(e) + print( + f"Failed to remove submodule {submodule} automatically. You can manually delete the directory." + ) + + +install_requirements(main_req_file) +try_install_insight_face() +try_install_from_wheel( + "handrefinerportable", + wheel_url=os.environ.get( + "HANDREFINER_WHEEL", + "https://github.com/huchenlei/HandRefinerPortable/releases/download/v1.0.1/handrefinerportable-2024.2.12.0-py2.py3-none-any.whl", + ), + version="2024.2.12.0", +) +try_install_from_wheel( + "depth_anything", + wheel_url=os.environ.get( + "DEPTH_ANYTHING_WHEEL", + "https://github.com/huchenlei/Depth-Anything/releases/download/v1.0.0/depth_anything-2024.1.22.0-py2.py3-none-any.whl", + ), +) +try_remove_legacy_submodule() diff --git a/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor.py b/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..af6738b3923b4f3ad4aaa72f1d6402562c91c93d --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor.py @@ -0,0 +1,862 @@ +import os +import cv2 +import numpy as np +import torch +import math +import functools + +from dataclasses import dataclass +from transformers.models.clip.modeling_clip import CLIPVisionModelOutput + +from annotator.util import HWC3 +from typing import Callable, Tuple, Union + +from modules.safe import Extra +from modules import devices + + +def torch_handler(module: str, name: str): + """ Allow all torch access. Bypass A1111 safety whitelist. """ + if module == 'torch': + return getattr(torch, name) + if module == 'torch._tensor': + # depth_anything dep. + return getattr(torch._tensor, name) + + +def pad64(x): + return int(np.ceil(float(x) / 64.0) * 64 - x) + + +def safer_memory(x): + # Fix many MAC/AMD problems + return np.ascontiguousarray(x.copy()).copy() + + +def resize_image_with_pad(input_image, resolution, skip_hwc3=False): + if skip_hwc3: + img = input_image + else: + img = HWC3(input_image) + H_raw, W_raw, _ = img.shape + k = float(resolution) / float(min(H_raw, W_raw)) + interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA + H_target = int(np.round(float(H_raw) * k)) + W_target = int(np.round(float(W_raw) * k)) + img = cv2.resize(img, (W_target, H_target), interpolation=interpolation) + H_pad, W_pad = pad64(H_target), pad64(W_target) + img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge') + + def remove_pad(x): + return safer_memory(x[:H_target, :W_target]) + + return safer_memory(img_padded), remove_pad + + +model_canny = None + + +def canny(img, res=512, thr_a=100, thr_b=200, **kwargs): + l, h = thr_a, thr_b + img, remove_pad = resize_image_with_pad(img, res) + global model_canny + if model_canny is None: + from annotator.canny import apply_canny + model_canny = apply_canny + result = model_canny(img, l, h) + return remove_pad(result), True + + +def scribble_thr(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + result = np.zeros_like(img, dtype=np.uint8) + result[np.min(img, axis=2) < 127] = 255 + return remove_pad(result), True + + +def scribble_xdog(img, res=512, thr_a=32, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5) + g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0) + dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8) + result = np.zeros_like(img, dtype=np.uint8) + result[2 * (255 - dog) > thr_a] = 255 + return remove_pad(result), True + + +def tile_resample(img, res=512, thr_a=1.0, **kwargs): + img = HWC3(img) + if thr_a < 1.1: + return img, True + H, W, C = img.shape + H = int(float(H) / float(thr_a)) + W = int(float(W) / float(thr_a)) + img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) + return img, True + + +def threshold(img, res=512, thr_a=127, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + result = np.zeros_like(img, dtype=np.uint8) + result[np.min(img, axis=2) > thr_a] = 255 + return remove_pad(result), True + + +def identity(img, **kwargs): + return img, True + + +def invert(img, res=512, **kwargs): + return 255 - HWC3(img), True + + +model_hed = None + + +def hed(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_hed + if model_hed is None: + from annotator.hed import apply_hed + model_hed = apply_hed + result = model_hed(img) + return remove_pad(result), True + + +def hed_safe(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_hed + if model_hed is None: + from annotator.hed import apply_hed + model_hed = apply_hed + result = model_hed(img, is_safe=True) + return remove_pad(result), True + + +def unload_hed(): + global model_hed + if model_hed is not None: + from annotator.hed import unload_hed_model + unload_hed_model() + + +def scribble_hed(img, res=512, **kwargs): + result, _ = hed(img, res) + import cv2 + from annotator.util import nms + result = nms(result, 127, 3.0) + result = cv2.GaussianBlur(result, (0, 0), 3.0) + result[result > 4] = 255 + result[result < 255] = 0 + return result, True + + +model_mediapipe_face = None + + +def mediapipe_face(img, res=512, thr_a: int = 10, thr_b: float = 0.5, **kwargs): + max_faces = int(thr_a) + min_confidence = thr_b + img, remove_pad = resize_image_with_pad(img, res) + global model_mediapipe_face + if model_mediapipe_face is None: + from annotator.mediapipe_face import apply_mediapipe_face + model_mediapipe_face = apply_mediapipe_face + result = model_mediapipe_face(img, max_faces=max_faces, min_confidence=min_confidence) + return remove_pad(result), True + + +model_mlsd = None + + +def mlsd(img, res=512, thr_a=0.1, thr_b=0.1, **kwargs): + thr_v, thr_d = thr_a, thr_b + img, remove_pad = resize_image_with_pad(img, res) + global model_mlsd + if model_mlsd is None: + from annotator.mlsd import apply_mlsd + model_mlsd = apply_mlsd + result = model_mlsd(img, thr_v, thr_d) + return remove_pad(result), True + + +def unload_mlsd(): + global model_mlsd + if model_mlsd is not None: + from annotator.mlsd import unload_mlsd_model + unload_mlsd_model() + + +model_depth_anything = None + + +def depth_anything(img, res:int = 512, colored:bool = True, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_depth_anything + if model_depth_anything is None: + with Extra(torch_handler): + from annotator.depth_anything import DepthAnythingDetector + device = devices.get_device_for("controlnet") + model_depth_anything = DepthAnythingDetector(device) + return remove_pad(model_depth_anything(img, colored=colored)), True + + +def unload_depth_anything(): + if model_depth_anything is not None: + model_depth_anything.unload_model() + + +model_midas = None + + +def midas(img, res=512, a=np.pi * 2.0, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_midas + if model_midas is None: + from annotator.midas import apply_midas + model_midas = apply_midas + result, _ = model_midas(img, a) + return remove_pad(result), True + + +def midas_normal(img, res=512, a=np.pi * 2.0, thr_a=0.4, **kwargs): # bg_th -> thr_a + bg_th = thr_a + img, remove_pad = resize_image_with_pad(img, res) + global model_midas + if model_midas is None: + from annotator.midas import apply_midas + model_midas = apply_midas + _, result = model_midas(img, a, bg_th) + return remove_pad(result), True + + +def unload_midas(): + global model_midas + if model_midas is not None: + from annotator.midas import unload_midas_model + unload_midas_model() + + +model_leres = None + + +def leres(img, res=512, a=np.pi * 2.0, thr_a=0, thr_b=0, boost=False, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_leres + if model_leres is None: + from annotator.leres import apply_leres + model_leres = apply_leres + result = model_leres(img, thr_a, thr_b, boost=boost) + return remove_pad(result), True + + +def unload_leres(): + global model_leres + if model_leres is not None: + from annotator.leres import unload_leres_model + unload_leres_model() + + +class OpenposeModel(object): + def __init__(self) -> None: + self.model_openpose = None + + def run_model( + self, + img: np.ndarray, + include_body: bool, + include_hand: bool, + include_face: bool, + use_dw_pose: bool = False, + use_animal_pose: bool = False, + json_pose_callback: Callable[[str], None] = None, + res: int = 512, + **kwargs # Ignore rest of kwargs + ) -> Tuple[np.ndarray, bool]: + """Run the openpose model. Returns a tuple of + - result image + - is_image flag + + The JSON format pose string is passed to `json_pose_callback`. + """ + if json_pose_callback is None: + json_pose_callback = lambda x: None + + img, remove_pad = resize_image_with_pad(img, res) + + if self.model_openpose is None: + from annotator.openpose import OpenposeDetector + self.model_openpose = OpenposeDetector() + + return remove_pad(self.model_openpose( + img, + include_body=include_body, + include_hand=include_hand, + include_face=include_face, + use_dw_pose=use_dw_pose, + use_animal_pose=use_animal_pose, + json_pose_callback=json_pose_callback + )), True + + def unload(self): + if self.model_openpose is not None: + self.model_openpose.unload_model() + + +g_openpose_model = OpenposeModel() + +model_uniformer = None + + +def uniformer(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_uniformer + if model_uniformer is None: + from annotator.uniformer import apply_uniformer + model_uniformer = apply_uniformer + result = model_uniformer(img) + return remove_pad(result), True + + +def unload_uniformer(): + global model_uniformer + if model_uniformer is not None: + from annotator.uniformer import unload_uniformer_model + unload_uniformer_model() + + +model_pidinet = None + + +def pidinet(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_pidinet + if model_pidinet is None: + from annotator.pidinet import apply_pidinet + model_pidinet = apply_pidinet + result = model_pidinet(img) + return remove_pad(result), True + + +def pidinet_ts(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_pidinet + if model_pidinet is None: + from annotator.pidinet import apply_pidinet + model_pidinet = apply_pidinet + result = model_pidinet(img, apply_fliter=True) + return remove_pad(result), True + + +def pidinet_safe(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_pidinet + if model_pidinet is None: + from annotator.pidinet import apply_pidinet + model_pidinet = apply_pidinet + result = model_pidinet(img, is_safe=True) + return remove_pad(result), True + + +def scribble_pidinet(img, res=512, **kwargs): + result, _ = pidinet(img, res) + import cv2 + from annotator.util import nms + result = nms(result, 127, 3.0) + result = cv2.GaussianBlur(result, (0, 0), 3.0) + result[result > 4] = 255 + result[result < 255] = 0 + return result, True + + +def unload_pidinet(): + global model_pidinet + if model_pidinet is not None: + from annotator.pidinet import unload_pid_model + unload_pid_model() + + +clip_encoder = { + 'clip_g': None, + 'clip_h': None, + 'clip_vitl': None, +} + + +def clip(img, res=512, config='clip_vitl', low_vram=False, **kwargs): + img = HWC3(img) + global clip_encoder + if clip_encoder[config] is None: + from annotator.clipvision import ClipVisionDetector + if low_vram: + print("Loading CLIP model on CPU.") + clip_encoder[config] = ClipVisionDetector(config, low_vram) + result = clip_encoder[config](img) + return result, False + + +def unload_clip(config='clip_vitl'): + global clip_encoder + if clip_encoder[config] is not None: + clip_encoder[config].unload_model() + clip_encoder[config] = None + + +model_color = None + + +def color(img, res=512, **kwargs): + img = HWC3(img) + global model_color + if model_color is None: + from annotator.color import apply_color + model_color = apply_color + result = model_color(img, res=res) + return result, True + + +def lineart_standard(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + x = img.astype(np.float32) + g = cv2.GaussianBlur(x, (0, 0), 6.0) + intensity = np.min(g - x, axis=2).clip(0, 255) + intensity /= max(16, np.median(intensity[intensity > 8])) + intensity *= 127 + result = intensity.clip(0, 255).astype(np.uint8) + return remove_pad(result), True + + +model_lineart = None + + +def lineart(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_lineart + if model_lineart is None: + from annotator.lineart import LineartDetector + model_lineart = LineartDetector(LineartDetector.model_default) + + # applied auto inversion + result = 255 - model_lineart(img) + return remove_pad(result), True + + +def unload_lineart(): + global model_lineart + if model_lineart is not None: + model_lineart.unload_model() + + +model_lineart_coarse = None + + +def lineart_coarse(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_lineart_coarse + if model_lineart_coarse is None: + from annotator.lineart import LineartDetector + model_lineart_coarse = LineartDetector(LineartDetector.model_coarse) + + # applied auto inversion + result = 255 - model_lineart_coarse(img) + return remove_pad(result), True + + +def unload_lineart_coarse(): + global model_lineart_coarse + if model_lineart_coarse is not None: + model_lineart_coarse.unload_model() + + +model_lineart_anime = None + + +def lineart_anime(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_lineart_anime + if model_lineart_anime is None: + from annotator.lineart_anime import LineartAnimeDetector + model_lineart_anime = LineartAnimeDetector() + + # applied auto inversion + result = 255 - model_lineart_anime(img) + return remove_pad(result), True + + +def unload_lineart_anime(): + global model_lineart_anime + if model_lineart_anime is not None: + model_lineart_anime.unload_model() + + +model_manga_line = None + + +def lineart_anime_denoise(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_manga_line + if model_manga_line is None: + from annotator.manga_line import MangaLineExtration + model_manga_line = MangaLineExtration() + + # applied auto inversion + result = model_manga_line(img) + return remove_pad(result), True + + +def unload_lineart_anime_denoise(): + global model_manga_line + if model_manga_line is not None: + model_manga_line.unload_model() + + +model_lama = None + + +def lama_inpaint(img, res=512, **kwargs): + H, W, C = img.shape + raw_color = img[:, :, 0:3].copy() + raw_mask = img[:, :, 3:4].copy() + + res = 256 # Always use 256 since lama is trained on 256 + + img_res, remove_pad = resize_image_with_pad(img, res, skip_hwc3=True) + + global model_lama + if model_lama is None: + from annotator.lama import LamaInpainting + model_lama = LamaInpainting() + + # applied auto inversion + prd_color = model_lama(img_res) + prd_color = remove_pad(prd_color) + prd_color = cv2.resize(prd_color, (W, H)) + + alpha = raw_mask.astype(np.float32) / 255.0 + fin_color = prd_color.astype(np.float32) * alpha + raw_color.astype(np.float32) * (1 - alpha) + fin_color = fin_color.clip(0, 255).astype(np.uint8) + + result = np.concatenate([fin_color, raw_mask], axis=2) + + return result, True + + +def unload_lama_inpaint(): + global model_lama + if model_lama is not None: + model_lama.unload_model() + + +model_zoe_depth = None + + +def zoe_depth(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_zoe_depth + if model_zoe_depth is None: + from annotator.zoe import ZoeDetector + model_zoe_depth = ZoeDetector() + result = model_zoe_depth(img) + return remove_pad(result), True + + +def unload_zoe_depth(): + global model_zoe_depth + if model_zoe_depth is not None: + model_zoe_depth.unload_model() + + +model_normal_bae = None + + +def normal_bae(img, res=512, **kwargs): + pass + + +def unload_normal_bae(): + pass + + +model_oneformer_coco = None + + +def oneformer_coco(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_oneformer_coco + if model_oneformer_coco is None: + from annotator.oneformer import OneformerDetector + model_oneformer_coco = OneformerDetector(OneformerDetector.configs["coco"]) + result = model_oneformer_coco(img) + return remove_pad(result), True + + +def unload_oneformer_coco(): + global model_oneformer_coco + if model_oneformer_coco is not None: + model_oneformer_coco.unload_model() + + +model_oneformer_ade20k = None + + +def oneformer_ade20k(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_oneformer_ade20k + if model_oneformer_ade20k is None: + from annotator.oneformer import OneformerDetector + model_oneformer_ade20k = OneformerDetector(OneformerDetector.configs["ade20k"]) + result = model_oneformer_ade20k(img) + return remove_pad(result), True + + +def unload_oneformer_ade20k(): + global model_oneformer_ade20k + if model_oneformer_ade20k is not None: + model_oneformer_ade20k.unload_model() + + +model_shuffle = None + + +def shuffle(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + img = remove_pad(img) + global model_shuffle + if model_shuffle is None: + from annotator.shuffle import ContentShuffleDetector + model_shuffle = ContentShuffleDetector() + result = model_shuffle(img) + return result, True + + +def recolor_luminance(img, res=512, thr_a=1.0, **kwargs): + result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2LAB) + result = result[:, :, 0].astype(np.float32) / 255.0 + result = result ** thr_a + result = (result * 255.0).clip(0, 255).astype(np.uint8) + result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) + return result, True + + +def recolor_intensity(img, res=512, thr_a=1.0, **kwargs): + result = cv2.cvtColor(HWC3(img), cv2.COLOR_BGR2HSV) + result = result[:, :, 2].astype(np.float32) / 255.0 + result = result ** thr_a + result = (result * 255.0).clip(0, 255).astype(np.uint8) + result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) + return result, True + + +def blur_gaussian(img, res=512, thr_a=1.0, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + img = remove_pad(img) + result = cv2.GaussianBlur(img, (0, 0), float(thr_a)) + return result, True + + +model_anime_face_segment = None + + +def anime_face_segment(img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_anime_face_segment + if model_anime_face_segment is None: + from annotator.anime_face_segment import AnimeFaceSegment + model_anime_face_segment = AnimeFaceSegment() + + result = model_anime_face_segment(img) + return remove_pad(result), True + + +def unload_anime_face_segment(): + global model_anime_face_segment + if model_anime_face_segment is not None: + model_anime_face_segment.unload_model() + + + +def densepose(img, res=512, cmap="viridis", **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + from annotator.densepose import apply_densepose + result = apply_densepose(img, cmap=cmap) + return remove_pad(result), True + + +def unload_densepose(): + from annotator.densepose import unload_model + unload_model() + +model_te_hed = None + +def te_hed(img, res=512, thr_a=2, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + global model_te_hed + if model_te_hed is None: + from annotator.teed import TEEDDector + model_te_hed = TEEDDector() + result = model_te_hed(img, safe_steps=int(thr_a)) + return remove_pad(result), True + +def unload_te_hed(): + if model_te_hed is not None: + model_te_hed.unload_model() + +class InsightFaceModel: + def __init__(self, face_analysis_model_name: str = "buffalo_l"): + self.model = None + self.face_analysis_model_name = face_analysis_model_name + self.antelopev2_installed = False + + def install_antelopev2(self): + """insightface's github release on antelopev2 model is down. Downloading + from huggingface mirror.""" + from modules.modelloader import load_file_from_url + from modules_forge.shared import models_path + model_root = os.path.join(models_path, "insightface", "models", "antelopev2") + if not model_root: + os.makedirs(model_root, exist_ok=True) + for local_file, url in ( + ("1k3d68.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx"), + ("2d106det.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx"), + ("genderage.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx"), + ("glintr100.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx"), + ("scrfd_10g_bnkps.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx"), + ): + local_path = os.path.join(model_root, local_file) + if not os.path.exists(local_path): + load_file_from_url(url, model_dir=model_root) + self.antelopev2_installed = True + + def load_model(self): + if self.model is None: + from insightface.app import FaceAnalysis + from modules_forge.shared import models_path + self.model = FaceAnalysis( + name=self.face_analysis_model_name, + providers=['CUDAExecutionProvider', 'CPUExecutionProvider'], + root=os.path.join(models_path, "insightface"), + ) + self.model.prepare(ctx_id=0, det_size=(640, 640)) + + def run_model(self, img: np.ndarray, **kwargs) -> Tuple[torch.Tensor, bool]: + self.load_model() + img = HWC3(img) + faces = self.model.get(img) + if not faces: + raise Exception(f"Insightface: No face found in image {i}.") + if len(faces) > 1: + print("Insightface: More than one face is detected in the image. " + f"Only the first one will be used {i}.") + return torch.from_numpy(faces[0].normed_embedding).unsqueeze(0), False + + def run_model_instant_id( + self, + img: np.ndarray, + res: int = 512, + return_keypoints: bool = False, + **kwargs + ) -> Tuple[Union[np.ndarray, torch.Tensor], bool]: + """Run the insightface model for instant_id. + Arguments: + - img: Input image in any size. + - res: Resolution used to resize image. + - return_keypoints: Whether to return keypoints image or face embedding. + """ + def draw_kps(img: np.ndarray, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]): + stickwidth = 4 + limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) + kps = np.array(kps) + + h, w, _ = img.shape + out_img = np.zeros([h, w, 3]) + + for i in range(len(limbSeq)): + index = limbSeq[i] + color = color_list[index[0]] + + x = kps[index][:, 0] + y = kps[index][:, 1] + length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 + angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1])) + polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) + out_img = (out_img * 0.6).astype(np.uint8) + + for idx_kp, kp in enumerate(kps): + color = color_list[idx_kp] + x, y = kp + out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1) + + return out_img.astype(np.uint8) + + if not self.antelopev2_installed: + self.install_antelopev2() + self.load_model() + + img, remove_pad = resize_image_with_pad(img, res) + face_info = self.model.get(img) + if not face_info: + raise Exception(f"Insightface: No face found in image.") + if len(face_info) > 1: + print("Insightface: More than one face is detected in the image. " + f"Only the biggest one will be used.") + # only use the maximum face + face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1] + if return_keypoints: + return remove_pad(draw_kps(img, face_info['kps'])), True + else: + return torch.from_numpy(face_info['embedding']), False + + +g_insight_face_model = InsightFaceModel() +g_insight_face_instant_id_model = InsightFaceModel(face_analysis_model_name="antelopev2") + + +@dataclass +class FaceIdPlusInput: + face_embed: torch.Tensor + clip_embed: CLIPVisionModelOutput + + +def face_id_plus(img, low_vram=False, **kwargs): + """ FaceID plus uses both face_embeding from insightface and clip_embeding from clip. """ + face_embed, _ = g_insight_face_model.run_model(img) + clip_embed, _ = clip(img, config='clip_h', low_vram=low_vram) + return FaceIdPlusInput(face_embed, clip_embed), False + + +class HandRefinerModel: + def __init__(self): + self.model = None + self.device = devices.get_device_for("controlnet") + + def load_model(self): + if self.model is None: + from annotator.annotator_path import models_path + from hand_refiner import MeshGraphormerDetector # installed via hand_refiner_portable + with Extra(torch_handler): + self.model = MeshGraphormerDetector.from_pretrained( + "hr16/ControlNet-HandRefiner-pruned", + cache_dir=os.path.join(models_path, "hand_refiner"), + device=self.device, + ) + else: + self.model.to(self.device) + + def unload(self): + if self.model is not None: + self.model.to("cpu") + + def run_model(self, img, res=512, **kwargs): + img, remove_pad = resize_image_with_pad(img, res) + self.load_model() + with Extra(torch_handler): + depth_map, mask, info = self.model( + img, output_type="np", + detect_resolution=res, + mask_bbox_padding=30, + ) + return remove_pad(depth_map), True + + +g_hand_refiner_model = HandRefinerModel() diff --git a/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor_compiled.py b/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor_compiled.py new file mode 100644 index 0000000000000000000000000000000000000000..13901cc6d4fc4400c8c9c304b90e9deabf5c02f1 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/legacy_preprocessors/preprocessor_compiled.py @@ -0,0 +1,1309 @@ +from legacy_preprocessors.preprocessor import * + + +legacy_preprocessors = { + # "none": { + # "label": "none", + # "call_function": lambda x, *args, **kwargs: (x, True), + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [] + # }, + "invert (from white bg & black line)": { + "label": "invert (from white bg & black line)", + "call_function": invert, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 20, + "tags": [ + "Canny", "Lineart", "Scribble", "Sketch", "MLSD", + ] + }, + "animal_openpose": { + "label": "animal_openpose", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=False, include_face=False, use_animal_pose=True), + "unload_function": g_openpose_model.unload, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "blur_gaussian": { + "label": "blur_gaussian", + "call_function": blur_gaussian, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": { + "label": "Sigma", + "minimum": 0.01, + "maximum": 64.0, + "value": 9.0 + }, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Tile", "Blur", + ] + }, + # "canny": { + # "label": "canny", + # "call_function": canny, + # "unload_function": None, + # "managed_model": "model_canny", + # "model_free": False, + # "no_control_mode": False, + # "resolution": { + # "label": "Resolution", + # "value": 512, + # "minimum": 64, + # "maximum": 2048 + # }, + # "slider_1": { + # "label": "Canny Low Threshold", + # "value": 100, + # "minimum": 1, + # "maximum": 255 + # }, + # "slider_2": { + # "label": "Canny High Threshold", + # "value": 200, + # "minimum": 1, + # "maximum": 255 + # }, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Canny" + # ] + # }, + "densepose (pruple bg & purple torso)": { + "label": "densepose (pruple bg & purple torso)", + "call_function": functools.partial(densepose, cmap="viridis"), + "unload_function": unload_densepose, + "managed_model": "unknown", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "densepose_parula (black bg & blue torso)": { + "label": "densepose_parula (black bg & blue torso)", + "call_function": functools.partial(densepose, cmap="parula"), + "unload_function": unload_densepose, + "managed_model": "unknown", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "depth_anything": { + "label": "depth_anything", + "call_function": functools.partial(depth_anything, colored=False), + "unload_function": unload_depth_anything, + "managed_model": "model_depth_anything", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Depth" + ] + }, + "depth_hand_refiner": { + "label": "depth_hand_refiner", + "call_function": g_hand_refiner_model.run_model, + "unload_function": g_hand_refiner_model.unload, + "managed_model": "g_hand_refiner_model", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Depth" + ] + }, + "depth_leres": { + "label": "depth_leres", + "call_function": functools.partial(leres, boost=False), + "unload_function": unload_leres, + "managed_model": "model_leres", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": { + "label": "Remove Near %", + "minimum": 0, + "maximum": 100, + "value": 0, + "step": 0.1 + }, + "slider_2": { + "label": "Remove Background %", + "minimum": 0, + "maximum": 100, + "value": 0, + "step": 0.1 + }, + "slider_3": None, + "priority": 0, + "tags": [ + "Depth" + ] + }, + "depth_leres++": { + "label": "depth_leres++", + "call_function": functools.partial(leres, boost=True), + "unload_function": None, + "managed_model": "model_leres", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": { + "label": "Remove Near %", + "minimum": 0, + "maximum": 100, + "value": 0, + "step": 0.1 + }, + "slider_2": { + "label": "Remove Background %", + "minimum": 0, + "maximum": 100, + "value": 0, + "step": 0.1 + }, + "slider_3": None, + "priority": 0, + "tags": [ + "Depth" + ] + }, + "depth_midas": { + "label": "depth_midas", + "call_function": midas, + "unload_function": unload_midas, + "managed_model": "model_midas", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "Depth" + ] + }, + "depth_zoe": { + "label": "depth_zoe", + "call_function": zoe_depth, + "unload_function": unload_zoe_depth, + "managed_model": "model_zoe_depth", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Depth" + ] + }, + "dw_openpose_full": { + "label": "dw_openpose_full", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=True, include_face=True, use_dw_pose=True), + "unload_function": g_openpose_model.unload, + "managed_model": 'g_openpose_model', + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + # "inpaint_global_harmonious": { + # "label": "inpaint_global_harmonious", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Inpaint" + # ] + # }, + # "inpaint_only": { + # "label": "inpaint_only", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Inpaint" + # ] + # }, + # "inpaint_only+lama": { + # "label": "inpaint_only+lama", + # "call_function": lama_inpaint, + # "unload_function": unload_lama_inpaint, + # "managed_model": "model_lama", + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Inpaint" + # ] + # }, + # "instant_id_face_embedding": { + # "label": "instant_id_face_embedding", + # "call_function": functools.partial(g_insight_face_instant_id_model.run_model_instant_id, return_keypoints=False), + # "unload_function": None, + # "managed_model": "g_insight_face_instant_id_model", + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Instant-ID" + # ] + # }, + "instant_id_face_keypoints": { + "label": "instant_id_face_keypoints", + "call_function": functools.partial(g_insight_face_instant_id_model.run_model_instant_id, return_keypoints=True), + "unload_function": None, + "managed_model": "unknown", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Instant-ID" + ] + }, + # "ip-adapter_clip_sd15": { + # "label": "ip-adapter_clip_sd15", + # "call_function": functools.partial(clip, config='clip_h'), + # "unload_function": functools.partial(unload_clip, config='clip_h'), + # "managed_model": "unknown", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "IP-Adapter" + # ] + # }, + # "ip-adapter_clip_sdxl": { + # "label": "ip-adapter_clip_sdxl", + # "call_function": functools.partial(clip, config='clip_g'), + # "unload_function": functools.partial(unload_clip, config='clip_g'), + # "managed_model": "unknown", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "IP-Adapter" + # ] + # }, + # "ip-adapter_clip_sdxl_plus_vith": { + # "label": "ip-adapter_clip_sdxl_plus_vith", + # "call_function": functools.partial(clip, config='clip_h'), + # "unload_function": functools.partial(unload_clip, config='clip_h'), + # "managed_model": "unknown", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "IP-Adapter" + # ] + # }, + # "ip-adapter_face_id": { + # "label": "insight_face_face_id", + # "call_function": g_insight_face_model.run_model, + # "unload_function": None, + # "managed_model": "g_insight_face_model", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "IP-Adapter" + # ] + # }, + # "ip-adapter_face_id_plus": { + # "label": "ip-adapter_face_id_plus", + # "call_function": face_id_plus, + # "unload_function": functools.partial(unload_clip, config='clip_h'), + # "managed_model": "unknown", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "IP-Adapter" + # ] + # }, + "lineart_anime": { + "label": "lineart_anime", + "call_function": lineart_anime, + "unload_function": unload_lineart_anime, + "managed_model": "model_lineart_anime", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Lineart" + ] + }, + "lineart_anime_denoise": { + "label": "lineart_anime_denoise", + "call_function": lineart_anime_denoise, + "unload_function": unload_lineart_anime_denoise, + "managed_model": "model_manga_line", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Lineart" + ] + }, + "lineart_coarse": { + "label": "lineart_coarse", + "call_function": lineart_coarse, + "unload_function": unload_lineart_coarse, + "managed_model": "model_lineart_coarse", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Lineart" + ] + }, + "lineart_realistic": { + "label": "lineart_realistic", + "call_function": lineart, + "unload_function": unload_lineart, + "managed_model": "model_lineart", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Lineart" + ] + }, + "lineart_standard (from white bg & black line)": { + "label": "lineart_standard (from white bg & black line)", + "call_function": lineart_standard, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "Lineart" + ] + }, + "mediapipe_face": { + "label": "mediapipe_face", + "call_function": mediapipe_face, + "unload_function": None, + "managed_model": "model_mediapipe_face", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": { + "label": "Max Faces", + "value": 1, + "minimum": 1, + "maximum": 10, + "step": 1 + }, + "slider_2": { + "label": "Min Face Confidence", + "value": 0.5, + "minimum": 0.01, + "maximum": 1.0, + "step": 0.01 + }, + "slider_3": None, + "priority": 0, + "tags": [] + }, + "mlsd": { + "label": "mlsd", + "call_function": mlsd, + "unload_function": unload_mlsd, + "managed_model": "model_mlsd", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": { + "label": "MLSD Value Threshold", + "minimum": 0.01, + "maximum": 2.0, + "value": 0.1, + "step": 0.01 + }, + "slider_2": { + "label": "MLSD Distance Threshold", + "minimum": 0.01, + "maximum": 20.0, + "value": 0.1, + "step": 0.01 + }, + "slider_3": None, + "priority": 100, + "tags": [ + "MLSD" + ], + "use_soft_projection_in_hr_fix": True + }, + # "normal_bae": { + # "label": "normal_bae", + # "call_function": normal_bae, + # "unload_function": unload_normal_bae, + # "managed_model": "model_normal_bae", + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "NormalMap" + # ] + # }, + "normal_midas": { + "label": "normal_midas", + "call_function": midas_normal, + "unload_function": unload_midas, + "managed_model": "model_midas", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": { + "label": "Normal Background Threshold", + "minimum": 0.0, + "maximum": 1.0, + "value": 0.4, + "step": 0.01 + }, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "NormalMap" + ] + }, + "openpose": { + "label": "openpose", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=False, include_face=False), + "unload_function": g_openpose_model.unload, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "openpose_face": { + "label": "openpose_face", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=False, include_face=True), + "unload_function": g_openpose_model.unload, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "openpose_faceonly": { + "label": "openpose_faceonly", + "call_function": functools.partial(g_openpose_model.run_model, include_body=False, include_hand=False, include_face=True), + "unload_function": None, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + "openpose_full": { + "label": "openpose_full", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=True, include_face=True), + "unload_function": g_openpose_model.unload, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "OpenPose" + ] + }, + "openpose_hand": { + "label": "openpose_hand", + "call_function": functools.partial(g_openpose_model.run_model, include_body=True, include_hand=True, include_face=False), + "unload_function": g_openpose_model.unload, + "managed_model": "g_openpose_model", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "OpenPose" + ] + }, + # "recolor_intensity": { + # "label": "recolor_intensity", + # "call_function": recolor_intensity, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Gamma Correction", + # "value": 1.0, + # "minimum": 0.1, + # "maximum": 2.0, + # "step": 0.001 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Recolor" + # ] + # }, + # "recolor_luminance": { + # "label": "recolor_luminance", + # "call_function": recolor_luminance, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Gamma Correction", + # "value": 1.0, + # "minimum": 0.1, + # "maximum": 2.0, + # "step": 0.001 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Recolor" + # ] + # }, + # "reference_adain": { + # "label": "reference_adain", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": True, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Style Fidelity (only for Balanced mode)", + # "value": 0.5, + # "minimum": 0.0, + # "maximum": 1.0, + # "step": 0.01 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Reference" + # ] + # }, + # "reference_adain+attn": { + # "label": "reference_adain+attn", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": True, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Style Fidelity (only for Balanced mode)", + # "value": 0.5, + # "minimum": 0.0, + # "maximum": 1.0, + # "step": 0.01 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Reference" + # ] + # }, + # "reference_only": { + # "label": "reference_only", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": True, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Style Fidelity (only for Balanced mode)", + # "value": 0.5, + # "minimum": 0.0, + # "maximum": 1.0, + # "step": 0.01 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Reference" + # ] + # }, + # "revision_clipvision": { + # "label": "revision_clipvision", + # "call_function": functools.partial(clip, config='clip_g'), + # "unload_function": functools.partial(unload_clip, config='clip_g'), + # "managed_model": None, + # "model_free": True, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": { + # "label": "Noise Augmentation", + # "value": 0.0, + # "minimum": 0.0, + # "maximum": 1.0 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Revision" + # ] + # }, + # "revision_ignore_prompt": { + # "label": "revision_ignore_prompt", + # "call_function": functools.partial(clip, config='clip_g'), + # "unload_function": functools.partial(unload_clip, config='clip_g'), + # "managed_model": None, + # "model_free": True, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": { + # "label": "Noise Augmentation", + # "value": 0.0, + # "minimum": 0.0, + # "maximum": 1.0 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Revision" + # ] + # }, + "scribble_hed": { + "label": "scribble_hed", + "call_function": scribble_hed, + "unload_function": None, + "managed_model": "model_hed", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Scribble", "Sketch", + ] + }, + "scribble_pidinet": { + "label": "scribble_pidinet", + "call_function": scribble_pidinet, + "unload_function": None, + "managed_model": "model_pidinet", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "Scribble", "Sketch", + ] + }, + "scribble_xdog": { + "label": "scribble_xdog", + "call_function": scribble_xdog, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": { + "label": "XDoG Threshold", + "minimum": 1, + "maximum": 64, + "value": 32 + }, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Scribble", "Sketch", + ] + }, + "seg_anime_face": { + "label": "seg_anime_face", + "call_function": anime_face_segment, + "unload_function": unload_anime_face_segment, + "managed_model": "model_anime_face_segment", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Segmentation" + ] + }, + "seg_ofade20k": { + "label": "seg_ofade20k", + "call_function": oneformer_ade20k, + "unload_function": unload_oneformer_ade20k, + "managed_model": "model_oneformer_ade20k", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "Segmentation" + ] + }, + "seg_ofcoco": { + "label": "seg_ofcoco", + "call_function": oneformer_coco, + "unload_function": unload_oneformer_coco, + "managed_model": "model_oneformer_coco", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Segmentation" + ] + }, + "seg_ufade20k": { + "label": "seg_ufade20k", + "call_function": uniformer, + "unload_function": unload_uniformer, + "managed_model": "model_uniformer", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "Segmentation" + ] + }, + "shuffle": { + "label": "shuffle", + "call_function": shuffle, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "Shuffle" + ] + }, + "softedge_hed": { + "label": "softedge_hed", + "call_function": hed, + "unload_function": unload_hed, + "managed_model": "model_hed", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "SoftEdge" + ] + }, + "softedge_hedsafe": { + "label": "softedge_hedsafe", + "call_function": hed_safe, + "unload_function": None, + "managed_model": "model_hed", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "minimum": 64, + "maximum": 2048, + "value": 512 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "SoftEdge" + ] + }, + "softedge_pidinet": { + "label": "softedge_pidinet", + "call_function": pidinet, + "unload_function": unload_pidinet, + "managed_model": "model_pidinet", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 100, + "tags": [ + "SoftEdge" + ] + }, + "softedge_pidisafe": { + "label": "softedge_pidisafe", + "call_function": pidinet_safe, + "unload_function": None, + "managed_model": "model_pidinet", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "SoftEdge" + ] + }, + "softedge_teed": { + "label": "softedge_teed", + "call_function": te_hed, + "unload_function": unload_te_hed, + "managed_model": "model_te_hed", + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": { + "label": "Safe Steps", + "minimum": 0, + "maximum": 10, + "value": 2, + "step": 1 + }, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "SoftEdge" + ] + }, + "t2ia_color_grid": { + "label": "t2ia_color_grid", + "call_function": color, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "T2I-Adapter" + ] + }, + "t2ia_sketch_pidi": { + "label": "t2ia_sketch_pidi", + "call_function": pidinet_ts, + "unload_function": None, + "managed_model": "model_pidinet", + "model_free": False, + "no_control_mode": False, + "resolution": None, + "slider_1": None, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [ + "T2I-Adapter" + ] + }, + # "t2ia_style_clipvision": { + # "label": "t2ia_style_clipvision", + # "call_function": functools.partial(clip, config='clip_vitl'), + # "unload_function": functools.partial(unload_clip, config='clip_vitl'), + # "managed_model": "unknown", + # "model_free": False, + # "no_control_mode": True, + # "resolution": None, + # "slider_1": None, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "T2I-Adapter" + # ] + # }, + "threshold": { + "label": "threshold", + "call_function": threshold, + "unload_function": None, + "managed_model": None, + "model_free": False, + "no_control_mode": False, + "resolution": { + "label": "Resolution", + "value": 512, + "minimum": 64, + "maximum": 2048 + }, + "slider_1": { + "label": "Binarization Threshold", + "minimum": 0, + "maximum": 255, + "value": 127 + }, + "slider_2": None, + "slider_3": None, + "priority": 0, + "tags": [] + }, + # "tile_colorfix": { + # "label": "tile_colorfix", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Variation", + # "value": 8.0, + # "minimum": 3.0, + # "maximum": 32.0, + # "step": 1.0 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Tile", "Blur", + # ] + # }, + # "tile_colorfix+sharp": { + # "label": "tile_colorfix+sharp", + # "call_function": identity, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Variation", + # "value": 8.0, + # "minimum": 3.0, + # "maximum": 32.0, + # "step": 1.0 + # }, + # "slider_2": { + # "label": "Sharpness", + # "value": 1.0, + # "minimum": 0.0, + # "maximum": 2.0, + # "step": 0.01 + # }, + # "slider_3": None, + # "priority": 0, + # "tags": [ + # "Tile", "Blur", + # ] + # }, + # "tile_resample": { + # "label": "tile_resample", + # "call_function": tile_resample, + # "unload_function": None, + # "managed_model": None, + # "model_free": False, + # "no_control_mode": False, + # "resolution": None, + # "slider_1": { + # "label": "Down Sampling Rate", + # "value": 1.0, + # "minimum": 1.0, + # "maximum": 8.0, + # "step": 0.01 + # }, + # "slider_2": None, + # "slider_3": None, + # "priority": 100, + # "tags": [ + # "Tile", "Blur", + # ] + # } +} diff --git a/extensions-builtin/forge_legacy_preprocessors/requirements.txt b/extensions-builtin/forge_legacy_preprocessors/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d12e85b09d7b0d997bbb1a8f5d56bb2a2ec1ef01 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/requirements.txt @@ -0,0 +1,5 @@ +fvcore +mediapipe +onnxruntime +opencv-python>=4.8.0 +svglib diff --git a/extensions-builtin/forge_legacy_preprocessors/scripts/legacy_preprocessors.py b/extensions-builtin/forge_legacy_preprocessors/scripts/legacy_preprocessors.py new file mode 100644 index 0000000000000000000000000000000000000000..b90fb61f5b872e5dc667308fe441747676a2b844 --- /dev/null +++ b/extensions-builtin/forge_legacy_preprocessors/scripts/legacy_preprocessors.py @@ -0,0 +1,120 @@ +# This is a python script to convert all old preprocessors to new format. +# However, the old preprocessors are not very memory effective +# and eventually we should move all old preprocessors to new format manually +# see also the forge_preprocessor_normalbae/scripts/preprocessor_normalbae for +# how to make better implementation of preprocessors. +# No newer preprocessors should be written in this legacy way. + +# Never add new leagcy preprocessors please. +# The new forge_preprocessor_normalbae/scripts/preprocessor_normalbae +# is much more effective and maintainable + + +import contextlib + +from annotator.util import HWC3 +from modules_forge.ops import automatic_memory_management +from legacy_preprocessors.preprocessor_compiled import legacy_preprocessors +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor + + +### + +# This file has lots of unreasonable historical designs and should be viewed as a frozen blackbox library. + +# If you want to add preprocessor, +# please instead look at `extensions-builtin/forge_preprocessor_normalbae/scripts/preprocessor_normalbae` +# If you want to use preprocessor, +# please instead use `from modules_forge.shared import supported_preprocessors` +# and then use any preprocessor like: depth_midas = supported_preprocessors['depth_midas'] + +# Please do not hack/edit/modify/rely-on any codes in this file. + +# Never use methods in this file to add anything! +# This file will be eventually removed but the workload is super high and we need more time to do this. + +### + + +class LegacyPreprocessor(Preprocessor): + def __init__(self, legacy_dict): + super().__init__() + self.name = legacy_dict['label'] + self.call_function = legacy_dict['call_function'] + self.unload_function = legacy_dict['unload_function'] + self.managed_model = legacy_dict['managed_model'] + self.do_not_need_model = legacy_dict['model_free'] + self.show_control_mode = not legacy_dict['no_control_mode'] + self.sorting_priority = legacy_dict['priority'] + self.tags = legacy_dict['tags'] + + filters_aliases = { + 'instructp2p': ['ip2p'], + 'segmentation': ['seg'], + 'normalmap': ['normal'], + 't2i-adapter': ['t2i_adapter', 't2iadapter', 't2ia'], + 'ip-adapter': ['ip_adapter', 'ipadapter'], + 'openpose': ['openpose', 'densepose'], + 'instant-iD': ['instant_id', 'instantid'], + } + + if legacy_dict.get('use_soft_projection_in_hr_fix', False): + self.use_soft_projection_in_hr_fix = True + + self.model_filename_filters = [] + for tag in self.tags: + tag_lower = tag.lower() + self.model_filename_filters.append(tag_lower) + self.model_filename_filters += filters_aliases.get(tag_lower, []) + + if legacy_dict['resolution'] is None: + self.resolution = PreprocessorParameter(visible=False) + else: + legacy_dict['resolution']['label'] = 'Resolution' + legacy_dict['resolution']['step'] = 8 + self.resolution = PreprocessorParameter(**legacy_dict['resolution'], visible=True) + + if legacy_dict['slider_1'] is None: + self.slider_1 = PreprocessorParameter(visible=False) + else: + self.slider_1 = PreprocessorParameter(**legacy_dict['slider_1'], visible=True) + + if legacy_dict['slider_2'] is None: + self.slider_2 = PreprocessorParameter(visible=False) + else: + self.slider_2 = PreprocessorParameter(**legacy_dict['slider_2'], visible=True) + + if legacy_dict['slider_3'] is None: + self.slider_3 = PreprocessorParameter(visible=False) + else: + self.slider_3 = PreprocessorParameter(**legacy_dict['slider_3'], visible=True) + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + # Legacy Preprocessors does not have slider 3 + del slider_3 + + if self.unload_function is not None or self.managed_model is not None: + context = automatic_memory_management + else: + context = contextlib.nullcontext + + with context(): + result, is_image = self.call_function(img=input_image, res=resolution, thr_a=slider_1, thr_b=slider_2, **kwargs) + + if is_image: + result = HWC3(result) + + if self.unload_function is not None: + self.unload_function() + + return result + + +for name, data in legacy_preprocessors.items(): + p = LegacyPreprocessor(data) + p.name = name + # Invert should not match any particular model. + if "invert" in name: + p.model_filename_filters = [] + add_supported_preprocessor(p) diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/data/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/data/masks.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/data/masks.py new file mode 100644 index 0000000000000000000000000000000000000000..27cb9050fa67c40d7d8d492a7088a621ad1ba2ce --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/data/masks.py @@ -0,0 +1,332 @@ +import math +import random +import hashlib +import logging +from enum import Enum + +import cv2 +import numpy as np + +# from annotator.lama.saicinpainting.evaluation.masks.mask import SegmentationMask +from annotator.lama.saicinpainting.utils import LinearRamp + +LOGGER = logging.getLogger(__name__) + + +class DrawMethod(Enum): + LINE = 'line' + CIRCLE = 'circle' + SQUARE = 'square' + + +def make_random_irregular_mask(shape, max_angle=4, max_len=60, max_width=20, min_times=0, max_times=10, + draw_method=DrawMethod.LINE): + draw_method = DrawMethod(draw_method) + + height, width = shape + mask = np.zeros((height, width), np.float32) + times = np.random.randint(min_times, max_times + 1) + for i in range(times): + start_x = np.random.randint(width) + start_y = np.random.randint(height) + for j in range(1 + np.random.randint(5)): + angle = 0.01 + np.random.randint(max_angle) + if i % 2 == 0: + angle = 2 * 3.1415926 - angle + length = 10 + np.random.randint(max_len) + brush_w = 5 + np.random.randint(max_width) + end_x = np.clip((start_x + length * np.sin(angle)).astype(np.int32), 0, width) + end_y = np.clip((start_y + length * np.cos(angle)).astype(np.int32), 0, height) + if draw_method == DrawMethod.LINE: + cv2.line(mask, (start_x, start_y), (end_x, end_y), 1.0, brush_w) + elif draw_method == DrawMethod.CIRCLE: + cv2.circle(mask, (start_x, start_y), radius=brush_w, color=1., thickness=-1) + elif draw_method == DrawMethod.SQUARE: + radius = brush_w // 2 + mask[start_y - radius:start_y + radius, start_x - radius:start_x + radius] = 1 + start_x, start_y = end_x, end_y + return mask[None, ...] + + +class RandomIrregularMaskGenerator: + def __init__(self, max_angle=4, max_len=60, max_width=20, min_times=0, max_times=10, ramp_kwargs=None, + draw_method=DrawMethod.LINE): + self.max_angle = max_angle + self.max_len = max_len + self.max_width = max_width + self.min_times = min_times + self.max_times = max_times + self.draw_method = draw_method + self.ramp = LinearRamp(**ramp_kwargs) if ramp_kwargs is not None else None + + def __call__(self, img, iter_i=None, raw_image=None): + coef = self.ramp(iter_i) if (self.ramp is not None) and (iter_i is not None) else 1 + cur_max_len = int(max(1, self.max_len * coef)) + cur_max_width = int(max(1, self.max_width * coef)) + cur_max_times = int(self.min_times + 1 + (self.max_times - self.min_times) * coef) + return make_random_irregular_mask(img.shape[1:], max_angle=self.max_angle, max_len=cur_max_len, + max_width=cur_max_width, min_times=self.min_times, max_times=cur_max_times, + draw_method=self.draw_method) + + +def make_random_rectangle_mask(shape, margin=10, bbox_min_size=30, bbox_max_size=100, min_times=0, max_times=3): + height, width = shape + mask = np.zeros((height, width), np.float32) + bbox_max_size = min(bbox_max_size, height - margin * 2, width - margin * 2) + times = np.random.randint(min_times, max_times + 1) + for i in range(times): + box_width = np.random.randint(bbox_min_size, bbox_max_size) + box_height = np.random.randint(bbox_min_size, bbox_max_size) + start_x = np.random.randint(margin, width - margin - box_width + 1) + start_y = np.random.randint(margin, height - margin - box_height + 1) + mask[start_y:start_y + box_height, start_x:start_x + box_width] = 1 + return mask[None, ...] + + +class RandomRectangleMaskGenerator: + def __init__(self, margin=10, bbox_min_size=30, bbox_max_size=100, min_times=0, max_times=3, ramp_kwargs=None): + self.margin = margin + self.bbox_min_size = bbox_min_size + self.bbox_max_size = bbox_max_size + self.min_times = min_times + self.max_times = max_times + self.ramp = LinearRamp(**ramp_kwargs) if ramp_kwargs is not None else None + + def __call__(self, img, iter_i=None, raw_image=None): + coef = self.ramp(iter_i) if (self.ramp is not None) and (iter_i is not None) else 1 + cur_bbox_max_size = int(self.bbox_min_size + 1 + (self.bbox_max_size - self.bbox_min_size) * coef) + cur_max_times = int(self.min_times + (self.max_times - self.min_times) * coef) + return make_random_rectangle_mask(img.shape[1:], margin=self.margin, bbox_min_size=self.bbox_min_size, + bbox_max_size=cur_bbox_max_size, min_times=self.min_times, + max_times=cur_max_times) + + +class RandomSegmentationMaskGenerator: + def __init__(self, **kwargs): + self.impl = None # will be instantiated in first call (effectively in subprocess) + self.kwargs = kwargs + + def __call__(self, img, iter_i=None, raw_image=None): + if self.impl is None: + self.impl = SegmentationMask(**self.kwargs) + + masks = self.impl.get_masks(np.transpose(img, (1, 2, 0))) + masks = [m for m in masks if len(np.unique(m)) > 1] + return np.random.choice(masks) + + +def make_random_superres_mask(shape, min_step=2, max_step=4, min_width=1, max_width=3): + height, width = shape + mask = np.zeros((height, width), np.float32) + step_x = np.random.randint(min_step, max_step + 1) + width_x = np.random.randint(min_width, min(step_x, max_width + 1)) + offset_x = np.random.randint(0, step_x) + + step_y = np.random.randint(min_step, max_step + 1) + width_y = np.random.randint(min_width, min(step_y, max_width + 1)) + offset_y = np.random.randint(0, step_y) + + for dy in range(width_y): + mask[offset_y + dy::step_y] = 1 + for dx in range(width_x): + mask[:, offset_x + dx::step_x] = 1 + return mask[None, ...] + + +class RandomSuperresMaskGenerator: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, img, iter_i=None): + return make_random_superres_mask(img.shape[1:], **self.kwargs) + + +class DumbAreaMaskGenerator: + min_ratio = 0.1 + max_ratio = 0.35 + default_ratio = 0.225 + + def __init__(self, is_training): + #Parameters: + # is_training(bool): If true - random rectangular mask, if false - central square mask + self.is_training = is_training + + def _random_vector(self, dimension): + if self.is_training: + lower_limit = math.sqrt(self.min_ratio) + upper_limit = math.sqrt(self.max_ratio) + mask_side = round((random.random() * (upper_limit - lower_limit) + lower_limit) * dimension) + u = random.randint(0, dimension-mask_side-1) + v = u+mask_side + else: + margin = (math.sqrt(self.default_ratio) / 2) * dimension + u = round(dimension/2 - margin) + v = round(dimension/2 + margin) + return u, v + + def __call__(self, img, iter_i=None, raw_image=None): + c, height, width = img.shape + mask = np.zeros((height, width), np.float32) + x1, x2 = self._random_vector(width) + y1, y2 = self._random_vector(height) + mask[x1:x2, y1:y2] = 1 + return mask[None, ...] + + +class OutpaintingMaskGenerator: + def __init__(self, min_padding_percent:float=0.04, max_padding_percent:int=0.25, left_padding_prob:float=0.5, top_padding_prob:float=0.5, + right_padding_prob:float=0.5, bottom_padding_prob:float=0.5, is_fixed_randomness:bool=False): + """ + is_fixed_randomness - get identical paddings for the same image if args are the same + """ + self.min_padding_percent = min_padding_percent + self.max_padding_percent = max_padding_percent + self.probs = [left_padding_prob, top_padding_prob, right_padding_prob, bottom_padding_prob] + self.is_fixed_randomness = is_fixed_randomness + + assert self.min_padding_percent <= self.max_padding_percent + assert self.max_padding_percent > 0 + assert len([x for x in [self.min_padding_percent, self.max_padding_percent] if (x>=0 and x<=1)]) == 2, f"Padding percentage should be in [0,1]" + assert sum(self.probs) > 0, f"At least one of the padding probs should be greater than 0 - {self.probs}" + assert len([x for x in self.probs if (x >= 0) and (x <= 1)]) == 4, f"At least one of padding probs is not in [0,1] - {self.probs}" + if len([x for x in self.probs if x > 0]) == 1: + LOGGER.warning(f"Only one padding prob is greater than zero - {self.probs}. That means that the outpainting masks will be always on the same side") + + def apply_padding(self, mask, coord): + mask[int(coord[0][0]*self.img_h):int(coord[1][0]*self.img_h), + int(coord[0][1]*self.img_w):int(coord[1][1]*self.img_w)] = 1 + return mask + + def get_padding(self, size): + n1 = int(self.min_padding_percent*size) + n2 = int(self.max_padding_percent*size) + return self.rnd.randint(n1, n2) / size + + @staticmethod + def _img2rs(img): + arr = np.ascontiguousarray(img.astype(np.uint8)) + str_hash = hashlib.sha1(arr).hexdigest() + res = hash(str_hash)%(2**32) + return res + + def __call__(self, img, iter_i=None, raw_image=None): + c, self.img_h, self.img_w = img.shape + mask = np.zeros((self.img_h, self.img_w), np.float32) + at_least_one_mask_applied = False + + if self.is_fixed_randomness: + assert raw_image is not None, f"Cant calculate hash on raw_image=None" + rs = self._img2rs(raw_image) + self.rnd = np.random.RandomState(rs) + else: + self.rnd = np.random + + coords = [[ + (0,0), + (1,self.get_padding(size=self.img_h)) + ], + [ + (0,0), + (self.get_padding(size=self.img_w),1) + ], + [ + (0,1-self.get_padding(size=self.img_h)), + (1,1) + ], + [ + (1-self.get_padding(size=self.img_w),0), + (1,1) + ]] + + for pp, coord in zip(self.probs, coords): + if self.rnd.random() < pp: + at_least_one_mask_applied = True + mask = self.apply_padding(mask=mask, coord=coord) + + if not at_least_one_mask_applied: + idx = self.rnd.choice(range(len(coords)), p=np.array(self.probs)/sum(self.probs)) + mask = self.apply_padding(mask=mask, coord=coords[idx]) + return mask[None, ...] + + +class MixedMaskGenerator: + def __init__(self, irregular_proba=1/3, irregular_kwargs=None, + box_proba=1/3, box_kwargs=None, + segm_proba=1/3, segm_kwargs=None, + squares_proba=0, squares_kwargs=None, + superres_proba=0, superres_kwargs=None, + outpainting_proba=0, outpainting_kwargs=None, + invert_proba=0): + self.probas = [] + self.gens = [] + + if irregular_proba > 0: + self.probas.append(irregular_proba) + if irregular_kwargs is None: + irregular_kwargs = {} + else: + irregular_kwargs = dict(irregular_kwargs) + irregular_kwargs['draw_method'] = DrawMethod.LINE + self.gens.append(RandomIrregularMaskGenerator(**irregular_kwargs)) + + if box_proba > 0: + self.probas.append(box_proba) + if box_kwargs is None: + box_kwargs = {} + self.gens.append(RandomRectangleMaskGenerator(**box_kwargs)) + + if segm_proba > 0: + self.probas.append(segm_proba) + if segm_kwargs is None: + segm_kwargs = {} + self.gens.append(RandomSegmentationMaskGenerator(**segm_kwargs)) + + if squares_proba > 0: + self.probas.append(squares_proba) + if squares_kwargs is None: + squares_kwargs = {} + else: + squares_kwargs = dict(squares_kwargs) + squares_kwargs['draw_method'] = DrawMethod.SQUARE + self.gens.append(RandomIrregularMaskGenerator(**squares_kwargs)) + + if superres_proba > 0: + self.probas.append(superres_proba) + if superres_kwargs is None: + superres_kwargs = {} + self.gens.append(RandomSuperresMaskGenerator(**superres_kwargs)) + + if outpainting_proba > 0: + self.probas.append(outpainting_proba) + if outpainting_kwargs is None: + outpainting_kwargs = {} + self.gens.append(OutpaintingMaskGenerator(**outpainting_kwargs)) + + self.probas = np.array(self.probas, dtype='float32') + self.probas /= self.probas.sum() + self.invert_proba = invert_proba + + def __call__(self, img, iter_i=None, raw_image=None): + kind = np.random.choice(len(self.probas), p=self.probas) + gen = self.gens[kind] + result = gen(img, iter_i=iter_i, raw_image=raw_image) + if self.invert_proba > 0 and random.random() < self.invert_proba: + result = 1 - result + return result + + +def get_mask_generator(kind, kwargs): + if kind is None: + kind = "mixed" + if kwargs is None: + kwargs = {} + + if kind == "mixed": + cl = MixedMaskGenerator + elif kind == "outpainting": + cl = OutpaintingMaskGenerator + elif kind == "dumb": + cl = DumbAreaMaskGenerator + else: + raise NotImplementedError(f"No such generator kind = {kind}") + return cl(**kwargs) diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/adversarial.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/adversarial.py new file mode 100644 index 0000000000000000000000000000000000000000..d6db2967ce5074d94ed3b4c51fc743ff2f7831b1 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/adversarial.py @@ -0,0 +1,177 @@ +from typing import Tuple, Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BaseAdversarialLoss: + def pre_generator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + """ + Prepare for generator step + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param generator: + :param discriminator: + :return: None + """ + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + """ + Prepare for discriminator step + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param generator: + :param discriminator: + :return: None + """ + + def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask: Optional[torch.Tensor] = None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Calculate generator loss + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param discr_real_pred: Tensor, discriminator output for real_batch + :param discr_fake_pred: Tensor, discriminator output for fake_batch + :param mask: Tensor, actual mask, which was at input of generator when making fake_batch + :return: total generator loss along with some values that might be interesting to log + """ + raise NotImplemented() + + def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask: Optional[torch.Tensor] = None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Calculate discriminator loss and call .backward() on it + :param real_batch: Tensor, a batch of real samples + :param fake_batch: Tensor, a batch of samples produced by generator + :param discr_real_pred: Tensor, discriminator output for real_batch + :param discr_fake_pred: Tensor, discriminator output for fake_batch + :param mask: Tensor, actual mask, which was at input of generator when making fake_batch + :return: total discriminator loss along with some values that might be interesting to log + """ + raise NotImplemented() + + def interpolate_mask(self, mask, shape): + assert mask is not None + assert self.allow_scale_mask or shape == mask.shape[-2:] + if shape != mask.shape[-2:] and self.allow_scale_mask: + if self.mask_scale_mode == 'maxpool': + mask = F.adaptive_max_pool2d(mask, shape) + else: + mask = F.interpolate(mask, size=shape, mode=self.mask_scale_mode) + return mask + +def make_r1_gp(discr_real_pred, real_batch): + if torch.is_grad_enabled(): + grad_real = torch.autograd.grad(outputs=discr_real_pred.sum(), inputs=real_batch, create_graph=True)[0] + grad_penalty = (grad_real.view(grad_real.shape[0], -1).norm(2, dim=1) ** 2).mean() + else: + grad_penalty = 0 + real_batch.requires_grad = False + + return grad_penalty + +class NonSaturatingWithR1(BaseAdversarialLoss): + def __init__(self, gp_coef=5, weight=1, mask_as_fake_target=False, allow_scale_mask=False, + mask_scale_mode='nearest', extra_mask_weight_for_gen=0, + use_unmasked_for_gen=True, use_unmasked_for_discr=True): + self.gp_coef = gp_coef + self.weight = weight + # use for discr => use for gen; + # otherwise we teach only the discr to pay attention to very small difference + assert use_unmasked_for_gen or (not use_unmasked_for_discr) + # mask as target => use unmasked for discr: + # if we don't care about unmasked regions at all + # then it doesn't matter if the value of mask_as_fake_target is true or false + assert use_unmasked_for_discr or (not mask_as_fake_target) + self.use_unmasked_for_gen = use_unmasked_for_gen + self.use_unmasked_for_discr = use_unmasked_for_discr + self.mask_as_fake_target = mask_as_fake_target + self.allow_scale_mask = allow_scale_mask + self.mask_scale_mode = mask_scale_mode + self.extra_mask_weight_for_gen = extra_mask_weight_for_gen + + def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask=None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + fake_loss = F.softplus(-discr_fake_pred) + if (self.mask_as_fake_target and self.extra_mask_weight_for_gen > 0) or \ + not self.use_unmasked_for_gen: # == if masked region should be treated differently + mask = self.interpolate_mask(mask, discr_fake_pred.shape[-2:]) + if not self.use_unmasked_for_gen: + fake_loss = fake_loss * mask + else: + pixel_weights = 1 + mask * self.extra_mask_weight_for_gen + fake_loss = fake_loss * pixel_weights + + return fake_loss.mean() * self.weight, dict() + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + real_batch.requires_grad = True + + def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + discr_real_pred: torch.Tensor, discr_fake_pred: torch.Tensor, + mask=None) \ + -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + + real_loss = F.softplus(-discr_real_pred) + grad_penalty = make_r1_gp(discr_real_pred, real_batch) * self.gp_coef + fake_loss = F.softplus(discr_fake_pred) + + if not self.use_unmasked_for_discr or self.mask_as_fake_target: + # == if masked region should be treated differently + mask = self.interpolate_mask(mask, discr_fake_pred.shape[-2:]) + # use_unmasked_for_discr=False only makes sense for fakes; + # for reals there is no difference beetween two regions + fake_loss = fake_loss * mask + if self.mask_as_fake_target: + fake_loss = fake_loss + (1 - mask) * F.softplus(-discr_fake_pred) + + sum_discr_loss = real_loss + grad_penalty + fake_loss + metrics = dict(discr_real_out=discr_real_pred.mean(), + discr_fake_out=discr_fake_pred.mean(), + discr_real_gp=grad_penalty) + return sum_discr_loss.mean(), metrics + +class BCELoss(BaseAdversarialLoss): + def __init__(self, weight): + self.weight = weight + self.bce_loss = nn.BCEWithLogitsLoss() + + def generator_loss(self, discr_fake_pred: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + real_mask_gt = torch.zeros(discr_fake_pred.shape).to(discr_fake_pred.device) + fake_loss = self.bce_loss(discr_fake_pred, real_mask_gt) * self.weight + return fake_loss, dict() + + def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch: torch.Tensor, + generator: nn.Module, discriminator: nn.Module): + real_batch.requires_grad = True + + def discriminator_loss(self, + mask: torch.Tensor, + discr_real_pred: torch.Tensor, + discr_fake_pred: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + + real_mask_gt = torch.zeros(discr_real_pred.shape).to(discr_real_pred.device) + sum_discr_loss = (self.bce_loss(discr_real_pred, real_mask_gt) + self.bce_loss(discr_fake_pred, mask)) / 2 + metrics = dict(discr_real_out=discr_real_pred.mean(), + discr_fake_out=discr_fake_pred.mean(), + discr_real_gp=0) + return sum_discr_loss, metrics + + +def make_discrim_loss(kind, **kwargs): + if kind == 'r1': + return NonSaturatingWithR1(**kwargs) + elif kind == 'bce': + return BCELoss(**kwargs) + raise ValueError(f'Unknown adversarial loss kind {kind}') diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/constants.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3e5e151342232be8e2c2a77fe6fd5798dc2a8c --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/constants.py @@ -0,0 +1,152 @@ +weights = {"ade20k": + [6.34517766497462, + 9.328358208955224, + 11.389521640091116, + 16.10305958132045, + 20.833333333333332, + 22.22222222222222, + 25.125628140703515, + 43.29004329004329, + 50.5050505050505, + 54.6448087431694, + 55.24861878453038, + 60.24096385542168, + 62.5, + 66.2251655629139, + 84.74576271186442, + 90.90909090909092, + 91.74311926605505, + 96.15384615384616, + 96.15384615384616, + 97.08737864077669, + 102.04081632653062, + 135.13513513513513, + 149.2537313432836, + 153.84615384615384, + 163.93442622950818, + 166.66666666666666, + 188.67924528301887, + 192.30769230769232, + 217.3913043478261, + 227.27272727272725, + 227.27272727272725, + 227.27272727272725, + 303.03030303030306, + 322.5806451612903, + 333.3333333333333, + 370.3703703703703, + 384.61538461538464, + 416.6666666666667, + 416.6666666666667, + 434.7826086956522, + 434.7826086956522, + 454.5454545454545, + 454.5454545454545, + 500.0, + 526.3157894736842, + 526.3157894736842, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 555.5555555555555, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 588.2352941176471, + 666.6666666666666, + 666.6666666666666, + 666.6666666666666, + 666.6666666666666, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 714.2857142857143, + 769.2307692307693, + 769.2307692307693, + 769.2307692307693, + 833.3333333333334, + 833.3333333333334, + 833.3333333333334, + 833.3333333333334, + 909.090909090909, + 1000.0, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1111.111111111111, + 1250.0, + 1250.0, + 1250.0, + 1250.0, + 1250.0, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1428.5714285714287, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 1666.6666666666667, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2000.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 2500.0, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 3333.3333333333335, + 5000.0, + 5000.0, + 5000.0] +} \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/distance_weighting.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/distance_weighting.py new file mode 100644 index 0000000000000000000000000000000000000000..90ce05bee5f633662057b3347d8791e1b4d115a0 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/distance_weighting.py @@ -0,0 +1,126 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from annotator.lama.saicinpainting.training.losses.perceptual import IMAGENET_STD, IMAGENET_MEAN + + +def dummy_distance_weighter(real_img, pred_img, mask): + return mask + + +def get_gauss_kernel(kernel_size, width_factor=1): + coords = torch.stack(torch.meshgrid(torch.arange(kernel_size), + torch.arange(kernel_size)), + dim=0).float() + diff = torch.exp(-((coords - kernel_size // 2) ** 2).sum(0) / kernel_size / width_factor) + diff /= diff.sum() + return diff + + +class BlurMask(nn.Module): + def __init__(self, kernel_size=5, width_factor=1): + super().__init__() + self.filter = nn.Conv2d(1, 1, kernel_size, padding=kernel_size // 2, padding_mode='replicate', bias=False) + self.filter.weight.data.copy_(get_gauss_kernel(kernel_size, width_factor=width_factor)) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + result = self.filter(mask) * mask + return result + + +class EmulatedEDTMask(nn.Module): + def __init__(self, dilate_kernel_size=5, blur_kernel_size=5, width_factor=1): + super().__init__() + self.dilate_filter = nn.Conv2d(1, 1, dilate_kernel_size, padding=dilate_kernel_size// 2, padding_mode='replicate', + bias=False) + self.dilate_filter.weight.data.copy_(torch.ones(1, 1, dilate_kernel_size, dilate_kernel_size, dtype=torch.float)) + self.blur_filter = nn.Conv2d(1, 1, blur_kernel_size, padding=blur_kernel_size // 2, padding_mode='replicate', bias=False) + self.blur_filter.weight.data.copy_(get_gauss_kernel(blur_kernel_size, width_factor=width_factor)) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + known_mask = 1 - mask + dilated_known_mask = (self.dilate_filter(known_mask) > 1).float() + result = self.blur_filter(1 - dilated_known_mask) * mask + return result + + +class PropagatePerceptualSim(nn.Module): + def __init__(self, level=2, max_iters=10, temperature=500, erode_mask_size=3): + super().__init__() + vgg = torchvision.models.vgg19(pretrained=True).features + vgg_avg_pooling = [] + + for weights in vgg.parameters(): + weights.requires_grad = False + + cur_level_i = 0 + for module in vgg.modules(): + if module.__class__.__name__ == 'Sequential': + continue + elif module.__class__.__name__ == 'MaxPool2d': + vgg_avg_pooling.append(nn.AvgPool2d(kernel_size=2, stride=2, padding=0)) + else: + vgg_avg_pooling.append(module) + if module.__class__.__name__ == 'ReLU': + cur_level_i += 1 + if cur_level_i == level: + break + + self.features = nn.Sequential(*vgg_avg_pooling) + + self.max_iters = max_iters + self.temperature = temperature + self.do_erode = erode_mask_size > 0 + if self.do_erode: + self.erode_mask = nn.Conv2d(1, 1, erode_mask_size, padding=erode_mask_size // 2, bias=False) + self.erode_mask.weight.data.fill_(1) + + def forward(self, real_img, pred_img, mask): + with torch.no_grad(): + real_img = (real_img - IMAGENET_MEAN.to(real_img)) / IMAGENET_STD.to(real_img) + real_feats = self.features(real_img) + + vertical_sim = torch.exp(-(real_feats[:, :, 1:] - real_feats[:, :, :-1]).pow(2).sum(1, keepdim=True) + / self.temperature) + horizontal_sim = torch.exp(-(real_feats[:, :, :, 1:] - real_feats[:, :, :, :-1]).pow(2).sum(1, keepdim=True) + / self.temperature) + + mask_scaled = F.interpolate(mask, size=real_feats.shape[-2:], mode='bilinear', align_corners=False) + if self.do_erode: + mask_scaled = (self.erode_mask(mask_scaled) > 1).float() + + cur_knowness = 1 - mask_scaled + + for iter_i in range(self.max_iters): + new_top_knowness = F.pad(cur_knowness[:, :, :-1] * vertical_sim, (0, 0, 1, 0), mode='replicate') + new_bottom_knowness = F.pad(cur_knowness[:, :, 1:] * vertical_sim, (0, 0, 0, 1), mode='replicate') + + new_left_knowness = F.pad(cur_knowness[:, :, :, :-1] * horizontal_sim, (1, 0, 0, 0), mode='replicate') + new_right_knowness = F.pad(cur_knowness[:, :, :, 1:] * horizontal_sim, (0, 1, 0, 0), mode='replicate') + + new_knowness = torch.stack([new_top_knowness, new_bottom_knowness, + new_left_knowness, new_right_knowness], + dim=0).max(0).values + + cur_knowness = torch.max(cur_knowness, new_knowness) + + cur_knowness = F.interpolate(cur_knowness, size=mask.shape[-2:], mode='bilinear') + result = torch.min(mask, 1 - cur_knowness) + + return result + + +def make_mask_distance_weighter(kind='none', **kwargs): + if kind == 'none': + return dummy_distance_weighter + if kind == 'blur': + return BlurMask(**kwargs) + if kind == 'edt': + return EmulatedEDTMask(**kwargs) + if kind == 'pps': + return PropagatePerceptualSim(**kwargs) + raise ValueError(f'Unknown mask distance weighter kind {kind}') diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/feature_matching.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/feature_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..c019895c9178817837d1a6773367b178a861dc61 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/feature_matching.py @@ -0,0 +1,33 @@ +from typing import List + +import torch +import torch.nn.functional as F + + +def masked_l2_loss(pred, target, mask, weight_known, weight_missing): + per_pixel_l2 = F.mse_loss(pred, target, reduction='none') + pixel_weights = mask * weight_missing + (1 - mask) * weight_known + return (pixel_weights * per_pixel_l2).mean() + + +def masked_l1_loss(pred, target, mask, weight_known, weight_missing): + per_pixel_l1 = F.l1_loss(pred, target, reduction='none') + pixel_weights = mask * weight_missing + (1 - mask) * weight_known + return (pixel_weights * per_pixel_l1).mean() + + +def feature_matching_loss(fake_features: List[torch.Tensor], target_features: List[torch.Tensor], mask=None): + if mask is None: + res = torch.stack([F.mse_loss(fake_feat, target_feat) + for fake_feat, target_feat in zip(fake_features, target_features)]).mean() + else: + res = 0 + norm = 0 + for fake_feat, target_feat in zip(fake_features, target_features): + cur_mask = F.interpolate(mask, size=fake_feat.shape[-2:], mode='bilinear', align_corners=False) + error_weights = 1 - cur_mask + cur_val = ((fake_feat - target_feat).pow(2) * error_weights).mean() + res = res + cur_val + norm += 1 + res = res / norm + return res diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/perceptual.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/perceptual.py new file mode 100644 index 0000000000000000000000000000000000000000..5d8b0b309b2b8ba95172cb16af440033a4aeafae --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/perceptual.py @@ -0,0 +1,113 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +# from models.ade20k import ModelBuilder +from annotator.lama.saicinpainting.utils import check_and_warn_input_range + + +IMAGENET_MEAN = torch.FloatTensor([0.485, 0.456, 0.406])[None, :, None, None] +IMAGENET_STD = torch.FloatTensor([0.229, 0.224, 0.225])[None, :, None, None] + + +class PerceptualLoss(nn.Module): + def __init__(self, normalize_inputs=True): + super(PerceptualLoss, self).__init__() + + self.normalize_inputs = normalize_inputs + self.mean_ = IMAGENET_MEAN + self.std_ = IMAGENET_STD + + vgg = torchvision.models.vgg19(pretrained=True).features + vgg_avg_pooling = [] + + for weights in vgg.parameters(): + weights.requires_grad = False + + for module in vgg.modules(): + if module.__class__.__name__ == 'Sequential': + continue + elif module.__class__.__name__ == 'MaxPool2d': + vgg_avg_pooling.append(nn.AvgPool2d(kernel_size=2, stride=2, padding=0)) + else: + vgg_avg_pooling.append(module) + + self.vgg = nn.Sequential(*vgg_avg_pooling) + + def do_normalize_inputs(self, x): + return (x - self.mean_.to(x.device)) / self.std_.to(x.device) + + def partial_losses(self, input, target, mask=None): + check_and_warn_input_range(target, 0, 1, 'PerceptualLoss target in partial_losses') + + # we expect input and target to be in [0, 1] range + losses = [] + + if self.normalize_inputs: + features_input = self.do_normalize_inputs(input) + features_target = self.do_normalize_inputs(target) + else: + features_input = input + features_target = target + + for layer in self.vgg[:30]: + + features_input = layer(features_input) + features_target = layer(features_target) + + if layer.__class__.__name__ == 'ReLU': + loss = F.mse_loss(features_input, features_target, reduction='none') + + if mask is not None: + cur_mask = F.interpolate(mask, size=features_input.shape[-2:], + mode='bilinear', align_corners=False) + loss = loss * (1 - cur_mask) + + loss = loss.mean(dim=tuple(range(1, len(loss.shape)))) + losses.append(loss) + + return losses + + def forward(self, input, target, mask=None): + losses = self.partial_losses(input, target, mask=mask) + return torch.stack(losses).sum(dim=0) + + def get_global_features(self, input): + check_and_warn_input_range(input, 0, 1, 'PerceptualLoss input in get_global_features') + + if self.normalize_inputs: + features_input = self.do_normalize_inputs(input) + else: + features_input = input + + features_input = self.vgg(features_input) + return features_input + + +class ResNetPL(nn.Module): + def __init__(self, weight=1, + weights_path=None, arch_encoder='resnet50dilated', segmentation=True): + super().__init__() + self.impl = ModelBuilder.get_encoder(weights_path=weights_path, + arch_encoder=arch_encoder, + arch_decoder='ppm_deepsup', + fc_dim=2048, + segmentation=segmentation) + self.impl.eval() + for w in self.impl.parameters(): + w.requires_grad_(False) + + self.weight = weight + + def forward(self, pred, target): + pred = (pred - IMAGENET_MEAN.to(pred)) / IMAGENET_STD.to(pred) + target = (target - IMAGENET_MEAN.to(target)) / IMAGENET_STD.to(target) + + pred_feats = self.impl(pred, return_feature_maps=True) + target_feats = self.impl(target, return_feature_maps=True) + + result = torch.stack([F.mse_loss(cur_pred, cur_target) + for cur_pred, cur_target + in zip(pred_feats, target_feats)]).sum() * self.weight + return result diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/segmentation.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..3d4a9f94eaae84722db584277dbbf9bc41ede357 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/segmentation.py @@ -0,0 +1,43 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .constants import weights as constant_weights + + +class CrossEntropy2d(nn.Module): + def __init__(self, reduction="mean", ignore_label=255, weights=None, *args, **kwargs): + """ + weight (Tensor, optional): a manual rescaling weight given to each class. + If given, has to be a Tensor of size "nclasses" + """ + super(CrossEntropy2d, self).__init__() + self.reduction = reduction + self.ignore_label = ignore_label + self.weights = weights + if self.weights is not None: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.weights = torch.FloatTensor(constant_weights[weights]).to(device) + + def forward(self, predict, target): + """ + Args: + predict:(n, c, h, w) + target:(n, 1, h, w) + """ + target = target.long() + assert not target.requires_grad + assert predict.dim() == 4, "{0}".format(predict.size()) + assert target.dim() == 4, "{0}".format(target.size()) + assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) + assert target.size(1) == 1, "{0}".format(target.size(1)) + assert predict.size(2) == target.size(2), "{0} vs {1} ".format(predict.size(2), target.size(2)) + assert predict.size(3) == target.size(3), "{0} vs {1} ".format(predict.size(3), target.size(3)) + target = target.squeeze(1) + n, c, h, w = predict.size() + target_mask = (target >= 0) * (target != self.ignore_label) + target = target[target_mask] + predict = predict.transpose(1, 2).transpose(2, 3).contiguous() + predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) + loss = F.cross_entropy(predict, target, weight=self.weights, reduction=self.reduction) + return loss diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/style_loss.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/style_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0bb42d7fbc5d17a47bec7365889868505f5fdfb5 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/losses/style_loss.py @@ -0,0 +1,155 @@ +import torch +import torch.nn as nn +import torchvision.models as models + + +class PerceptualLoss(nn.Module): + r""" + Perceptual loss, VGG-based + https://arxiv.org/abs/1603.08155 + https://github.com/dxyang/StyleTransfer/blob/master/utils.py + """ + + def __init__(self, weights=[1.0, 1.0, 1.0, 1.0, 1.0]): + super(PerceptualLoss, self).__init__() + self.add_module('vgg', VGG19()) + self.criterion = torch.nn.L1Loss() + self.weights = weights + + def __call__(self, x, y): + # Compute features + x_vgg, y_vgg = self.vgg(x), self.vgg(y) + + content_loss = 0.0 + content_loss += self.weights[0] * self.criterion(x_vgg['relu1_1'], y_vgg['relu1_1']) + content_loss += self.weights[1] * self.criterion(x_vgg['relu2_1'], y_vgg['relu2_1']) + content_loss += self.weights[2] * self.criterion(x_vgg['relu3_1'], y_vgg['relu3_1']) + content_loss += self.weights[3] * self.criterion(x_vgg['relu4_1'], y_vgg['relu4_1']) + content_loss += self.weights[4] * self.criterion(x_vgg['relu5_1'], y_vgg['relu5_1']) + + + return content_loss + + +class VGG19(torch.nn.Module): + def __init__(self): + super(VGG19, self).__init__() + features = models.vgg19(pretrained=True).features + self.relu1_1 = torch.nn.Sequential() + self.relu1_2 = torch.nn.Sequential() + + self.relu2_1 = torch.nn.Sequential() + self.relu2_2 = torch.nn.Sequential() + + self.relu3_1 = torch.nn.Sequential() + self.relu3_2 = torch.nn.Sequential() + self.relu3_3 = torch.nn.Sequential() + self.relu3_4 = torch.nn.Sequential() + + self.relu4_1 = torch.nn.Sequential() + self.relu4_2 = torch.nn.Sequential() + self.relu4_3 = torch.nn.Sequential() + self.relu4_4 = torch.nn.Sequential() + + self.relu5_1 = torch.nn.Sequential() + self.relu5_2 = torch.nn.Sequential() + self.relu5_3 = torch.nn.Sequential() + self.relu5_4 = torch.nn.Sequential() + + for x in range(2): + self.relu1_1.add_module(str(x), features[x]) + + for x in range(2, 4): + self.relu1_2.add_module(str(x), features[x]) + + for x in range(4, 7): + self.relu2_1.add_module(str(x), features[x]) + + for x in range(7, 9): + self.relu2_2.add_module(str(x), features[x]) + + for x in range(9, 12): + self.relu3_1.add_module(str(x), features[x]) + + for x in range(12, 14): + self.relu3_2.add_module(str(x), features[x]) + + for x in range(14, 16): + self.relu3_2.add_module(str(x), features[x]) + + for x in range(16, 18): + self.relu3_4.add_module(str(x), features[x]) + + for x in range(18, 21): + self.relu4_1.add_module(str(x), features[x]) + + for x in range(21, 23): + self.relu4_2.add_module(str(x), features[x]) + + for x in range(23, 25): + self.relu4_3.add_module(str(x), features[x]) + + for x in range(25, 27): + self.relu4_4.add_module(str(x), features[x]) + + for x in range(27, 30): + self.relu5_1.add_module(str(x), features[x]) + + for x in range(30, 32): + self.relu5_2.add_module(str(x), features[x]) + + for x in range(32, 34): + self.relu5_3.add_module(str(x), features[x]) + + for x in range(34, 36): + self.relu5_4.add_module(str(x), features[x]) + + # don't need the gradients, just want the features + for param in self.parameters(): + param.requires_grad = False + + def forward(self, x): + relu1_1 = self.relu1_1(x) + relu1_2 = self.relu1_2(relu1_1) + + relu2_1 = self.relu2_1(relu1_2) + relu2_2 = self.relu2_2(relu2_1) + + relu3_1 = self.relu3_1(relu2_2) + relu3_2 = self.relu3_2(relu3_1) + relu3_3 = self.relu3_3(relu3_2) + relu3_4 = self.relu3_4(relu3_3) + + relu4_1 = self.relu4_1(relu3_4) + relu4_2 = self.relu4_2(relu4_1) + relu4_3 = self.relu4_3(relu4_2) + relu4_4 = self.relu4_4(relu4_3) + + relu5_1 = self.relu5_1(relu4_4) + relu5_2 = self.relu5_2(relu5_1) + relu5_3 = self.relu5_3(relu5_2) + relu5_4 = self.relu5_4(relu5_3) + + out = { + 'relu1_1': relu1_1, + 'relu1_2': relu1_2, + + 'relu2_1': relu2_1, + 'relu2_2': relu2_2, + + 'relu3_1': relu3_1, + 'relu3_2': relu3_2, + 'relu3_3': relu3_3, + 'relu3_4': relu3_4, + + 'relu4_1': relu4_1, + 'relu4_2': relu4_2, + 'relu4_3': relu4_3, + 'relu4_4': relu4_4, + + 'relu5_1': relu5_1, + 'relu5_2': relu5_2, + 'relu5_3': relu5_3, + 'relu5_4': relu5_4, + } + return out diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c56ad9965ec95f3ae28c35c2ab42456eb06066 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/__init__.py @@ -0,0 +1,31 @@ +import logging + +from annotator.lama.saicinpainting.training.modules.ffc import FFCResNetGenerator +from annotator.lama.saicinpainting.training.modules.pix2pixhd import GlobalGenerator, MultiDilatedGlobalGenerator, \ + NLayerDiscriminator, MultidilatedNLayerDiscriminator + +def make_generator(config, kind, **kwargs): + logging.info(f'Make generator {kind}') + + if kind == 'pix2pixhd_multidilated': + return MultiDilatedGlobalGenerator(**kwargs) + + if kind == 'pix2pixhd_global': + return GlobalGenerator(**kwargs) + + if kind == 'ffc_resnet': + return FFCResNetGenerator(**kwargs) + + raise ValueError(f'Unknown generator kind {kind}') + + +def make_discriminator(kind, **kwargs): + logging.info(f'Make discriminator {kind}') + + if kind == 'pix2pixhd_nlayer_multidilated': + return MultidilatedNLayerDiscriminator(**kwargs) + + if kind == 'pix2pixhd_nlayer': + return NLayerDiscriminator(**kwargs) + + raise ValueError(f'Unknown discriminator kind {kind}') diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/base.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/base.py new file mode 100644 index 0000000000000000000000000000000000000000..58c513987601d6a442ca8f066f82f1af46e28939 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/base.py @@ -0,0 +1,80 @@ +import abc +from typing import Tuple, List + +import torch +import torch.nn as nn + +from annotator.lama.saicinpainting.training.modules.depthwise_sep_conv import DepthWiseSeperableConv +from annotator.lama.saicinpainting.training.modules.multidilated_conv import MultidilatedConv + + +class BaseDiscriminator(nn.Module): + @abc.abstractmethod + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]: + """ + Predict scores and get intermediate activations. Useful for feature matching loss + :return tuple (scores, list of intermediate activations) + """ + raise NotImplemented() + + +def get_conv_block_ctor(kind='default'): + if not isinstance(kind, str): + return kind + if kind == 'default': + return nn.Conv2d + if kind == 'depthwise': + return DepthWiseSeperableConv + if kind == 'multidilated': + return MultidilatedConv + raise ValueError(f'Unknown convolutional block kind {kind}') + + +def get_norm_layer(kind='bn'): + if not isinstance(kind, str): + return kind + if kind == 'bn': + return nn.BatchNorm2d + if kind == 'in': + return nn.InstanceNorm2d + raise ValueError(f'Unknown norm block kind {kind}') + + +def get_activation(kind='tanh'): + if kind == 'tanh': + return nn.Tanh() + if kind == 'sigmoid': + return nn.Sigmoid() + if kind is False: + return nn.Identity() + raise ValueError(f'Unknown activation kind {kind}') + + +class SimpleMultiStepGenerator(nn.Module): + def __init__(self, steps: List[nn.Module]): + super().__init__() + self.steps = nn.ModuleList(steps) + + def forward(self, x): + cur_in = x + outs = [] + for step in self.steps: + cur_out = step(cur_in) + outs.append(cur_out) + cur_in = torch.cat((cur_in, cur_out), dim=1) + return torch.cat(outs[::-1], dim=1) + +def deconv_factory(kind, ngf, mult, norm_layer, activation, max_features): + if kind == 'convtranspose': + return [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(min(max_features, int(ngf * mult / 2))), activation] + elif kind == 'bilinear': + return [nn.Upsample(scale_factor=2, mode='bilinear'), + DepthWiseSeperableConv(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=1, padding=1), + norm_layer(min(max_features, int(ngf * mult / 2))), activation] + else: + raise Exception(f"Invalid deconv kind: {kind}") \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/depthwise_sep_conv.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/depthwise_sep_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..83dd15c3df1d9f40baf0091a373fa224532c9ddd --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/depthwise_sep_conv.py @@ -0,0 +1,17 @@ +import torch +import torch.nn as nn + +class DepthWiseSeperableConv(nn.Module): + def __init__(self, in_dim, out_dim, *args, **kwargs): + super().__init__() + if 'groups' in kwargs: + # ignoring groups for Depthwise Sep Conv + del kwargs['groups'] + + self.depthwise = nn.Conv2d(in_dim, in_dim, *args, groups=in_dim, **kwargs) + self.pointwise = nn.Conv2d(in_dim, out_dim, kernel_size=1) + + def forward(self, x): + out = self.depthwise(x) + out = self.pointwise(out) + return out \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/fake_fakes.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/fake_fakes.py new file mode 100644 index 0000000000000000000000000000000000000000..45c4ad559cef2730b771a709197e00ae1c87683c --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/fake_fakes.py @@ -0,0 +1,47 @@ +import torch +from kornia import SamplePadding +from kornia.augmentation import RandomAffine, CenterCrop + + +class FakeFakesGenerator: + def __init__(self, aug_proba=0.5, img_aug_degree=30, img_aug_translate=0.2): + self.grad_aug = RandomAffine(degrees=360, + translate=0.2, + padding_mode=SamplePadding.REFLECTION, + keepdim=False, + p=1) + self.img_aug = RandomAffine(degrees=img_aug_degree, + translate=img_aug_translate, + padding_mode=SamplePadding.REFLECTION, + keepdim=True, + p=1) + self.aug_proba = aug_proba + + def __call__(self, input_images, masks): + blend_masks = self._fill_masks_with_gradient(masks) + blend_target = self._make_blend_target(input_images) + result = input_images * (1 - blend_masks) + blend_target * blend_masks + return result, blend_masks + + def _make_blend_target(self, input_images): + batch_size = input_images.shape[0] + permuted = input_images[torch.randperm(batch_size)] + augmented = self.img_aug(input_images) + is_aug = (torch.rand(batch_size, device=input_images.device)[:, None, None, None] < self.aug_proba).float() + result = augmented * is_aug + permuted * (1 - is_aug) + return result + + def _fill_masks_with_gradient(self, masks): + batch_size, _, height, width = masks.shape + grad = torch.linspace(0, 1, steps=width * 2, device=masks.device, dtype=masks.dtype) \ + .view(1, 1, 1, -1).expand(batch_size, 1, height * 2, width * 2) + grad = self.grad_aug(grad) + grad = CenterCrop((height, width))(grad) + grad *= masks + + grad_for_min = grad + (1 - masks) * 10 + grad -= grad_for_min.view(batch_size, -1).min(-1).values[:, None, None, None] + grad /= grad.view(batch_size, -1).max(-1).values[:, None, None, None] + 1e-6 + grad.clamp_(min=0, max=1) + + return grad diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/ffc.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/ffc.py new file mode 100644 index 0000000000000000000000000000000000000000..e67ff9c832463e5518d6ccea2c6f27531ed778d4 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/ffc.py @@ -0,0 +1,485 @@ +# Fast Fourier Convolution NeurIPS 2020 +# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py +# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.lama.saicinpainting.training.modules.base import get_activation, BaseDiscriminator +from annotator.lama.saicinpainting.training.modules.spatial_transform import LearnableSpatialTransformWrapper +from annotator.lama.saicinpainting.training.modules.squeeze_excitation import SELayer +from annotator.lama.saicinpainting.utils import get_shape + + +class FFCSE_block(nn.Module): + + def __init__(self, channels, ratio_g): + super(FFCSE_block, self).__init__() + in_cg = int(channels * ratio_g) + in_cl = channels - in_cg + r = 16 + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.conv1 = nn.Conv2d(channels, channels // r, + kernel_size=1, bias=True) + self.relu1 = nn.ReLU(inplace=True) + self.conv_a2l = None if in_cl == 0 else nn.Conv2d( + channels // r, in_cl, kernel_size=1, bias=True) + self.conv_a2g = None if in_cg == 0 else nn.Conv2d( + channels // r, in_cg, kernel_size=1, bias=True) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = x if type(x) is tuple else (x, 0) + id_l, id_g = x + + x = id_l if type(id_g) is int else torch.cat([id_l, id_g], dim=1) + x = self.avgpool(x) + x = self.relu1(self.conv1(x)) + + x_l = 0 if self.conv_a2l is None else id_l * \ + self.sigmoid(self.conv_a2l(x)) + x_g = 0 if self.conv_a2g is None else id_g * \ + self.sigmoid(self.conv_a2g(x)) + return x_l, x_g + + +class FourierUnit(nn.Module): + + def __init__(self, in_channels, out_channels, groups=1, spatial_scale_factor=None, spatial_scale_mode='bilinear', + spectral_pos_encoding=False, use_se=False, se_kwargs=None, ffc3d=False, fft_norm='ortho'): + # bn_layer not used + super(FourierUnit, self).__init__() + self.groups = groups + + self.conv_layer = torch.nn.Conv2d(in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0), + out_channels=out_channels * 2, + kernel_size=1, stride=1, padding=0, groups=self.groups, bias=False) + self.bn = torch.nn.BatchNorm2d(out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + # squeeze and excitation block + self.use_se = use_se + if use_se: + if se_kwargs is None: + se_kwargs = {} + self.se = SELayer(self.conv_layer.in_channels, **se_kwargs) + + self.spatial_scale_factor = spatial_scale_factor + self.spatial_scale_mode = spatial_scale_mode + self.spectral_pos_encoding = spectral_pos_encoding + self.ffc3d = ffc3d + self.fft_norm = fft_norm + + def forward(self, x): + batch = x.shape[0] + + if self.spatial_scale_factor is not None: + orig_size = x.shape[-2:] + x = F.interpolate(x, scale_factor=self.spatial_scale_factor, mode=self.spatial_scale_mode, align_corners=False) + + r_size = x.size() + # (batch, c, h, w/2+1, 2) + fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1) + ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm) + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view((batch, -1,) + ffted.size()[3:]) + + if self.spectral_pos_encoding: + height, width = ffted.shape[-2:] + coords_vert = torch.linspace(0, 1, height)[None, None, :, None].expand(batch, 1, height, width).to(ffted) + coords_hor = torch.linspace(0, 1, width)[None, None, None, :].expand(batch, 1, height, width).to(ffted) + ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1) + + if self.use_se: + ffted = self.se(ffted) + + ffted = self.conv_layer(ffted) # (batch, c*2, h, w/2+1) + ffted = self.relu(self.bn(ffted)) + + ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute( + 0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2) + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:] + output = torch.fft.irfftn(ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm) + + if self.spatial_scale_factor is not None: + output = F.interpolate(output, size=orig_size, mode=self.spatial_scale_mode, align_corners=False) + + return output + + +class SeparableFourierUnit(nn.Module): + + def __init__(self, in_channels, out_channels, groups=1, kernel_size=3): + # bn_layer not used + super(SeparableFourierUnit, self).__init__() + self.groups = groups + row_out_channels = out_channels // 2 + col_out_channels = out_channels - row_out_channels + self.row_conv = torch.nn.Conv2d(in_channels=in_channels * 2, + out_channels=row_out_channels * 2, + kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed + stride=1, padding=(kernel_size // 2, 0), + padding_mode='reflect', + groups=self.groups, bias=False) + self.col_conv = torch.nn.Conv2d(in_channels=in_channels * 2, + out_channels=col_out_channels * 2, + kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed + stride=1, padding=(kernel_size // 2, 0), + padding_mode='reflect', + groups=self.groups, bias=False) + self.row_bn = torch.nn.BatchNorm2d(row_out_channels * 2) + self.col_bn = torch.nn.BatchNorm2d(col_out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + def process_branch(self, x, conv, bn): + batch = x.shape[0] + + r_size = x.size() + # (batch, c, h, w/2+1, 2) + ffted = torch.fft.rfft(x, norm="ortho") + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view((batch, -1,) + ffted.size()[3:]) + + ffted = self.relu(bn(conv(ffted))) + + ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute( + 0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2) + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + output = torch.fft.irfft(ffted, s=x.shape[-1:], norm="ortho") + return output + + + def forward(self, x): + rowwise = self.process_branch(x, self.row_conv, self.row_bn) + colwise = self.process_branch(x.permute(0, 1, 3, 2), self.col_conv, self.col_bn).permute(0, 1, 3, 2) + out = torch.cat((rowwise, colwise), dim=1) + return out + + +class SpectralTransform(nn.Module): + + def __init__(self, in_channels, out_channels, stride=1, groups=1, enable_lfu=True, separable_fu=False, **fu_kwargs): + # bn_layer not used + super(SpectralTransform, self).__init__() + self.enable_lfu = enable_lfu + if stride == 2: + self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2) + else: + self.downsample = nn.Identity() + + self.stride = stride + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels // + 2, kernel_size=1, groups=groups, bias=False), + nn.BatchNorm2d(out_channels // 2), + nn.ReLU(inplace=True) + ) + fu_class = SeparableFourierUnit if separable_fu else FourierUnit + self.fu = fu_class( + out_channels // 2, out_channels // 2, groups, **fu_kwargs) + if self.enable_lfu: + self.lfu = fu_class( + out_channels // 2, out_channels // 2, groups) + self.conv2 = torch.nn.Conv2d( + out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False) + + def forward(self, x): + + x = self.downsample(x) + x = self.conv1(x) + output = self.fu(x) + + if self.enable_lfu: + n, c, h, w = x.shape + split_no = 2 + split_s = h // split_no + xs = torch.cat(torch.split( + x[:, :c // 4], split_s, dim=-2), dim=1).contiguous() + xs = torch.cat(torch.split(xs, split_s, dim=-1), + dim=1).contiguous() + xs = self.lfu(xs) + xs = xs.repeat(1, 1, split_no, split_no).contiguous() + else: + xs = 0 + + output = self.conv2(x + output + xs) + + return output + + +class FFC(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + ratio_gin, ratio_gout, stride=1, padding=0, + dilation=1, groups=1, bias=False, enable_lfu=True, + padding_type='reflect', gated=False, **spectral_kwargs): + super(FFC, self).__init__() + + assert stride == 1 or stride == 2, "Stride should be 1 or 2." + self.stride = stride + + in_cg = int(in_channels * ratio_gin) + in_cl = in_channels - in_cg + out_cg = int(out_channels * ratio_gout) + out_cl = out_channels - out_cg + #groups_g = 1 if groups == 1 else int(groups * ratio_gout) + #groups_l = 1 if groups == 1 else groups - groups_g + + self.ratio_gin = ratio_gin + self.ratio_gout = ratio_gout + self.global_in_num = in_cg + + module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d + self.convl2l = module(in_cl, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d + self.convl2g = module(in_cl, out_cg, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d + self.convg2l = module(in_cg, out_cl, kernel_size, + stride, padding, dilation, groups, bias, padding_mode=padding_type) + module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform + self.convg2g = module( + in_cg, out_cg, stride, 1 if groups == 1 else groups // 2, enable_lfu, **spectral_kwargs) + + self.gated = gated + module = nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d + self.gate = module(in_channels, 2, 1) + + def forward(self, x): + x_l, x_g = x if type(x) is tuple else (x, 0) + out_xl, out_xg = 0, 0 + + if self.gated: + total_input_parts = [x_l] + if torch.is_tensor(x_g): + total_input_parts.append(x_g) + total_input = torch.cat(total_input_parts, dim=1) + + gates = torch.sigmoid(self.gate(total_input)) + g2l_gate, l2g_gate = gates.chunk(2, dim=1) + else: + g2l_gate, l2g_gate = 1, 1 + + if self.ratio_gout != 1: + out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate + if self.ratio_gout != 0: + out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g) + + return out_xl, out_xg + + +class FFC_BN_ACT(nn.Module): + + def __init__(self, in_channels, out_channels, + kernel_size, ratio_gin, ratio_gout, + stride=1, padding=0, dilation=1, groups=1, bias=False, + norm_layer=nn.BatchNorm2d, activation_layer=nn.Identity, + padding_type='reflect', + enable_lfu=True, **kwargs): + super(FFC_BN_ACT, self).__init__() + self.ffc = FFC(in_channels, out_channels, kernel_size, + ratio_gin, ratio_gout, stride, padding, dilation, + groups, bias, enable_lfu, padding_type=padding_type, **kwargs) + lnorm = nn.Identity if ratio_gout == 1 else norm_layer + gnorm = nn.Identity if ratio_gout == 0 else norm_layer + global_channels = int(out_channels * ratio_gout) + self.bn_l = lnorm(out_channels - global_channels) + self.bn_g = gnorm(global_channels) + + lact = nn.Identity if ratio_gout == 1 else activation_layer + gact = nn.Identity if ratio_gout == 0 else activation_layer + self.act_l = lact(inplace=True) + self.act_g = gact(inplace=True) + + def forward(self, x): + x_l, x_g = self.ffc(x) + x_l = self.act_l(self.bn_l(x_l)) + x_g = self.act_g(self.bn_g(x_g)) + return x_l, x_g + + +class FFCResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation_layer=nn.ReLU, dilation=1, + spatial_transform_kwargs=None, inline=False, **conv_kwargs): + super().__init__() + self.conv1 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs) + self.conv2 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs) + if spatial_transform_kwargs is not None: + self.conv1 = LearnableSpatialTransformWrapper(self.conv1, **spatial_transform_kwargs) + self.conv2 = LearnableSpatialTransformWrapper(self.conv2, **spatial_transform_kwargs) + self.inline = inline + + def forward(self, x): + if self.inline: + x_l, x_g = x[:, :-self.conv1.ffc.global_in_num], x[:, -self.conv1.ffc.global_in_num:] + else: + x_l, x_g = x if type(x) is tuple else (x, 0) + + id_l, id_g = x_l, x_g + + x_l, x_g = self.conv1((x_l, x_g)) + x_l, x_g = self.conv2((x_l, x_g)) + + x_l, x_g = id_l + x_l, id_g + x_g + out = x_l, x_g + if self.inline: + out = torch.cat(out, dim=1) + return out + + +class ConcatTupleLayer(nn.Module): + def forward(self, x): + assert isinstance(x, tuple) + x_l, x_g = x + assert torch.is_tensor(x_l) or torch.is_tensor(x_g) + if not torch.is_tensor(x_g): + return x_l + return torch.cat(x, dim=1) + + +class FFCResNetGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', activation_layer=nn.ReLU, + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), + init_conv_kwargs={}, downsample_conv_kwargs={}, resnet_conv_kwargs={}, + spatial_transform_layers=None, spatial_transform_kwargs={}, + add_out_act=True, max_features=1024, out_ffc=False, out_ffc_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + model = [nn.ReflectionPad2d(3), + FFC_BN_ACT(input_nc, ngf, kernel_size=7, padding=0, norm_layer=norm_layer, + activation_layer=activation_layer, **init_conv_kwargs)] + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + if i == n_downsampling - 1: + cur_conv_kwargs = dict(downsample_conv_kwargs) + cur_conv_kwargs['ratio_gout'] = resnet_conv_kwargs.get('ratio_gin', 0) + else: + cur_conv_kwargs = downsample_conv_kwargs + model += [FFC_BN_ACT(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + **cur_conv_kwargs)] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + ### resnet blocks + for i in range(n_blocks): + cur_resblock = FFCResnetBlock(feats_num_bottleneck, padding_type=padding_type, activation_layer=activation_layer, + norm_layer=norm_layer, **resnet_conv_kwargs) + if spatial_transform_layers is not None and i in spatial_transform_layers: + cur_resblock = LearnableSpatialTransformWrapper(cur_resblock, **spatial_transform_kwargs) + model += [cur_resblock] + + model += [ConcatTupleLayer()] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + up_norm_layer(min(max_features, int(ngf * mult / 2))), + up_activation] + + if out_ffc: + model += [FFCResnetBlock(ngf, padding_type=padding_type, activation_layer=activation_layer, + norm_layer=norm_layer, inline=True, **out_ffc_kwargs)] + + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class FFCNLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, max_features=512, + init_conv_kwargs={}, conv_kwargs={}): + super().__init__() + self.n_layers = n_layers + + def _act_ctor(inplace=True): + return nn.LeakyReLU(negative_slope=0.2, inplace=inplace) + + kw = 3 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[FFC_BN_ACT(input_nc, ndf, kernel_size=kw, padding=padw, norm_layer=norm_layer, + activation_layer=_act_ctor, **init_conv_kwargs)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, max_features) + + cur_model = [ + FFC_BN_ACT(nf_prev, nf, + kernel_size=kw, stride=2, padding=padw, + norm_layer=norm_layer, + activation_layer=_act_ctor, + **conv_kwargs) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [ + FFC_BN_ACT(nf_prev, nf, + kernel_size=kw, stride=1, padding=padw, + norm_layer=norm_layer, + activation_layer=lambda *args, **kwargs: nn.LeakyReLU(*args, negative_slope=0.2, **kwargs), + **conv_kwargs), + ConcatTupleLayer() + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + feats = [] + for out in act[:-1]: + if isinstance(out, tuple): + if torch.is_tensor(out[1]): + out = torch.cat(out, dim=1) + else: + out = out[0] + feats.append(out) + return act[-1], feats diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multidilated_conv.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multidilated_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..c57d0b457d4b30aeeffcd8cba138a502ba7affc5 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multidilated_conv.py @@ -0,0 +1,98 @@ +import torch +import torch.nn as nn +import random +from annotator.lama.saicinpainting.training.modules.depthwise_sep_conv import DepthWiseSeperableConv + +class MultidilatedConv(nn.Module): + def __init__(self, in_dim, out_dim, kernel_size, dilation_num=3, comb_mode='sum', equal_dim=True, + shared_weights=False, padding=1, min_dilation=1, shuffle_in_channels=False, use_depthwise=False, **kwargs): + super().__init__() + convs = [] + self.equal_dim = equal_dim + assert comb_mode in ('cat_out', 'sum', 'cat_in', 'cat_both'), comb_mode + if comb_mode in ('cat_out', 'cat_both'): + self.cat_out = True + if equal_dim: + assert out_dim % dilation_num == 0 + out_dims = [out_dim // dilation_num] * dilation_num + self.index = sum([[i + j * (out_dims[0]) for j in range(dilation_num)] for i in range(out_dims[0])], []) + else: + out_dims = [out_dim // 2 ** (i + 1) for i in range(dilation_num - 1)] + out_dims.append(out_dim - sum(out_dims)) + index = [] + starts = [0] + out_dims[:-1] + lengths = [out_dims[i] // out_dims[-1] for i in range(dilation_num)] + for i in range(out_dims[-1]): + for j in range(dilation_num): + index += list(range(starts[j], starts[j] + lengths[j])) + starts[j] += lengths[j] + self.index = index + assert(len(index) == out_dim) + self.out_dims = out_dims + else: + self.cat_out = False + self.out_dims = [out_dim] * dilation_num + + if comb_mode in ('cat_in', 'cat_both'): + if equal_dim: + assert in_dim % dilation_num == 0 + in_dims = [in_dim // dilation_num] * dilation_num + else: + in_dims = [in_dim // 2 ** (i + 1) for i in range(dilation_num - 1)] + in_dims.append(in_dim - sum(in_dims)) + self.in_dims = in_dims + self.cat_in = True + else: + self.cat_in = False + self.in_dims = [in_dim] * dilation_num + + conv_type = DepthWiseSeperableConv if use_depthwise else nn.Conv2d + dilation = min_dilation + for i in range(dilation_num): + if isinstance(padding, int): + cur_padding = padding * dilation + else: + cur_padding = padding[i] + convs.append(conv_type( + self.in_dims[i], self.out_dims[i], kernel_size, padding=cur_padding, dilation=dilation, **kwargs + )) + if i > 0 and shared_weights: + convs[-1].weight = convs[0].weight + convs[-1].bias = convs[0].bias + dilation *= 2 + self.convs = nn.ModuleList(convs) + + self.shuffle_in_channels = shuffle_in_channels + if self.shuffle_in_channels: + # shuffle list as shuffling of tensors is nondeterministic + in_channels_permute = list(range(in_dim)) + random.shuffle(in_channels_permute) + # save as buffer so it is saved and loaded with checkpoint + self.register_buffer('in_channels_permute', torch.tensor(in_channels_permute)) + + def forward(self, x): + if self.shuffle_in_channels: + x = x[:, self.in_channels_permute] + + outs = [] + if self.cat_in: + if self.equal_dim: + x = x.chunk(len(self.convs), dim=1) + else: + new_x = [] + start = 0 + for dim in self.in_dims: + new_x.append(x[:, start:start+dim]) + start += dim + x = new_x + for i, conv in enumerate(self.convs): + if self.cat_in: + input = x[i] + else: + input = x + outs.append(conv(input)) + if self.cat_out: + out = torch.cat(outs, dim=1)[:, self.index] + else: + out = sum(outs) + return out diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multiscale.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multiscale.py new file mode 100644 index 0000000000000000000000000000000000000000..3f41252f3c7509ee58b939215baef328cfbe48c8 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/multiscale.py @@ -0,0 +1,244 @@ +from typing import List, Tuple, Union, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.lama.saicinpainting.training.modules.base import get_conv_block_ctor, get_activation +from annotator.lama.saicinpainting.training.modules.pix2pixhd import ResnetBlock + + +class ResNetHead(nn.Module): + def __init__(self, input_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True)): + assert (n_blocks >= 0) + super(ResNetHead, self).__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + model += [conv_layer(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), + norm_layer(ngf * mult * 2), + activation] + + mult = 2 ** n_downsampling + + ### resnet blocks + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=conv_kind)] + + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class ResNetTail(nn.Module): + def __init__(self, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), add_out_act=False, out_extra_layers_n=0, + add_in_proj=None): + assert (n_blocks >= 0) + super(ResNetTail, self).__init__() + + mult = 2 ** n_downsampling + + model = [] + + if add_in_proj is not None: + model.append(nn.Conv2d(add_in_proj, ngf * mult, kernel_size=1)) + + ### resnet blocks + for i in range(n_blocks): + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=conv_kind)] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, + output_padding=1), + up_norm_layer(int(ngf * mult / 2)), + up_activation] + self.model = nn.Sequential(*model) + + out_layers = [] + for _ in range(out_extra_layers_n): + out_layers += [nn.Conv2d(ngf, ngf, kernel_size=1, padding=0), + up_norm_layer(ngf), + up_activation] + out_layers += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + + if add_out_act: + out_layers.append(get_activation('tanh' if add_out_act is True else add_out_act)) + + self.out_proj = nn.Sequential(*out_layers) + + def forward(self, input, return_last_act=False): + features = self.model(input) + out = self.out_proj(features) + if return_last_act: + return out, features + else: + return out + + +class MultiscaleResNet(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=2, n_blocks_head=2, n_blocks_tail=6, n_scales=3, + norm_layer=nn.BatchNorm2d, padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True), add_out_act=False, out_extra_layers_n=0, + out_cumulative=False, return_only_hr=False): + super().__init__() + + self.heads = nn.ModuleList([ResNetHead(input_nc, ngf=ngf, n_downsampling=n_downsampling, + n_blocks=n_blocks_head, norm_layer=norm_layer, padding_type=padding_type, + conv_kind=conv_kind, activation=activation) + for i in range(n_scales)]) + tail_in_feats = ngf * (2 ** n_downsampling) + ngf + self.tails = nn.ModuleList([ResNetTail(output_nc, + ngf=ngf, n_downsampling=n_downsampling, + n_blocks=n_blocks_tail, norm_layer=norm_layer, padding_type=padding_type, + conv_kind=conv_kind, activation=activation, up_norm_layer=up_norm_layer, + up_activation=up_activation, add_out_act=add_out_act, + out_extra_layers_n=out_extra_layers_n, + add_in_proj=None if (i == n_scales - 1) else tail_in_feats) + for i in range(n_scales)]) + + self.out_cumulative = out_cumulative + self.return_only_hr = return_only_hr + + @property + def num_scales(self): + return len(self.heads) + + def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: Optional[int] = None) \ + -> Union[torch.Tensor, List[torch.Tensor]]: + """ + :param ms_inputs: List of inputs of different resolutions from HR to LR + :param smallest_scales_num: int or None, number of smallest scales to take at input + :return: Depending on return_only_hr: + True: Only the most HR output + False: List of outputs of different resolutions from HR to LR + """ + if smallest_scales_num is None: + assert len(self.heads) == len(ms_inputs), (len(self.heads), len(ms_inputs), smallest_scales_num) + smallest_scales_num = len(self.heads) + else: + assert smallest_scales_num == len(ms_inputs) <= len(self.heads), (len(self.heads), len(ms_inputs), smallest_scales_num) + + cur_heads = self.heads[-smallest_scales_num:] + ms_features = [cur_head(cur_inp) for cur_head, cur_inp in zip(cur_heads, ms_inputs)] + + all_outputs = [] + prev_tail_features = None + for i in range(len(ms_features)): + scale_i = -i - 1 + + cur_tail_input = ms_features[-i - 1] + if prev_tail_features is not None: + if prev_tail_features.shape != cur_tail_input.shape: + prev_tail_features = F.interpolate(prev_tail_features, size=cur_tail_input.shape[2:], + mode='bilinear', align_corners=False) + cur_tail_input = torch.cat((cur_tail_input, prev_tail_features), dim=1) + + cur_out, cur_tail_feats = self.tails[scale_i](cur_tail_input, return_last_act=True) + + prev_tail_features = cur_tail_feats + all_outputs.append(cur_out) + + if self.out_cumulative: + all_outputs_cum = [all_outputs[0]] + for i in range(1, len(ms_features)): + cur_out = all_outputs[i] + cur_out_cum = cur_out + F.interpolate(all_outputs_cum[-1], size=cur_out.shape[2:], + mode='bilinear', align_corners=False) + all_outputs_cum.append(cur_out_cum) + all_outputs = all_outputs_cum + + if self.return_only_hr: + return all_outputs[-1] + else: + return all_outputs[::-1] + + +class MultiscaleDiscriminatorSimple(nn.Module): + def __init__(self, ms_impl): + super().__init__() + self.ms_impl = nn.ModuleList(ms_impl) + + @property + def num_scales(self): + return len(self.ms_impl) + + def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: Optional[int] = None) \ + -> List[Tuple[torch.Tensor, List[torch.Tensor]]]: + """ + :param ms_inputs: List of inputs of different resolutions from HR to LR + :param smallest_scales_num: int or None, number of smallest scales to take at input + :return: List of pairs (prediction, features) for different resolutions from HR to LR + """ + if smallest_scales_num is None: + assert len(self.ms_impl) == len(ms_inputs), (len(self.ms_impl), len(ms_inputs), smallest_scales_num) + smallest_scales_num = len(self.heads) + else: + assert smallest_scales_num == len(ms_inputs) <= len(self.ms_impl), \ + (len(self.ms_impl), len(ms_inputs), smallest_scales_num) + + return [cur_discr(cur_input) for cur_discr, cur_input in zip(self.ms_impl[-smallest_scales_num:], ms_inputs)] + + +class SingleToMultiScaleInputMixin: + def forward(self, x: torch.Tensor) -> List: + orig_height, orig_width = x.shape[2:] + factors = [2 ** i for i in range(self.num_scales)] + ms_inputs = [F.interpolate(x, size=(orig_height // f, orig_width // f), mode='bilinear', align_corners=False) + for f in factors] + return super().forward(ms_inputs) + + +class GeneratorMultiToSingleOutputMixin: + def forward(self, x): + return super().forward(x)[0] + + +class DiscriminatorMultiToSingleOutputMixin: + def forward(self, x): + out_feat_tuples = super().forward(x) + return out_feat_tuples[0][0], [f for _, flist in out_feat_tuples for f in flist] + + +class DiscriminatorMultiToSingleOutputStackedMixin: + def __init__(self, *args, return_feats_only_levels=None, **kwargs): + super().__init__(*args, **kwargs) + self.return_feats_only_levels = return_feats_only_levels + + def forward(self, x): + out_feat_tuples = super().forward(x) + outs = [out for out, _ in out_feat_tuples] + scaled_outs = [outs[0]] + [F.interpolate(cur_out, size=outs[0].shape[-2:], + mode='bilinear', align_corners=False) + for cur_out in outs[1:]] + out = torch.cat(scaled_outs, dim=1) + if self.return_feats_only_levels is not None: + feat_lists = [out_feat_tuples[i][1] for i in self.return_feats_only_levels] + else: + feat_lists = [flist for _, flist in out_feat_tuples] + feats = [f for flist in feat_lists for f in flist] + return out, feats + + +class MultiscaleDiscrSingleInput(SingleToMultiScaleInputMixin, DiscriminatorMultiToSingleOutputStackedMixin, MultiscaleDiscriminatorSimple): + pass + + +class MultiscaleResNetSingle(GeneratorMultiToSingleOutputMixin, SingleToMultiScaleInputMixin, MultiscaleResNet): + pass diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/pix2pixhd.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/pix2pixhd.py new file mode 100644 index 0000000000000000000000000000000000000000..2e4fcfcff083f9ce4d3c7880ff0f74f8f745a251 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/pix2pixhd.py @@ -0,0 +1,669 @@ +# original: https://github.com/NVIDIA/pix2pixHD/blob/master/models/networks.py +import collections +from functools import partial +import functools +import logging +from collections import defaultdict + +import numpy as np +import torch.nn as nn + +from annotator.lama.saicinpainting.training.modules.base import BaseDiscriminator, deconv_factory, get_conv_block_ctor, get_norm_layer, get_activation +from annotator.lama.saicinpainting.training.modules.ffc import FFCResnetBlock +from annotator.lama.saicinpainting.training.modules.multidilated_conv import MultidilatedConv + +class DotDict(defaultdict): + # https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary + """dot.notation access to dictionary attributes""" + __getattr__ = defaultdict.get + __setattr__ = defaultdict.__setitem__ + __delattr__ = defaultdict.__delitem__ + +class Identity(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class ResnetBlock(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=None): + super(ResnetBlock, self).__init__() + self.in_dim = in_dim + self.dim = dim + if second_dilation is None: + second_dilation = dilation + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, activation, use_dropout, + conv_kind=conv_kind, dilation=dilation, in_dim=in_dim, groups=groups, + second_dilation=second_dilation) + + if self.in_dim is not None: + self.input_conv = nn.Conv2d(in_dim, dim, 1) + + self.out_channnels = dim + + def build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=1): + conv_layer = get_conv_block_ctor(conv_kind) + + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(dilation)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(dilation)] + elif padding_type == 'zero': + p = dilation + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + if in_dim is None: + in_dim = dim + + conv_block += [conv_layer(in_dim, dim, kernel_size=3, padding=p, dilation=dilation), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(second_dilation)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(second_dilation)] + elif padding_type == 'zero': + p = second_dilation + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [conv_layer(dim, dim, kernel_size=3, padding=p, dilation=second_dilation, groups=groups), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + x_before = x + if self.in_dim is not None: + x = self.input_conv(x) + out = x + self.conv_block(x_before) + return out + +class ResnetBlock5x5(nn.Module): + def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=None): + super(ResnetBlock5x5, self).__init__() + self.in_dim = in_dim + self.dim = dim + if second_dilation is None: + second_dilation = dilation + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, activation, use_dropout, + conv_kind=conv_kind, dilation=dilation, in_dim=in_dim, groups=groups, + second_dilation=second_dilation) + + if self.in_dim is not None: + self.input_conv = nn.Conv2d(in_dim, dim, 1) + + self.out_channnels = dim + + def build_conv_block(self, dim, padding_type, norm_layer, activation, use_dropout, conv_kind='default', + dilation=1, in_dim=None, groups=1, second_dilation=1): + conv_layer = get_conv_block_ctor(conv_kind) + + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(dilation * 2)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(dilation * 2)] + elif padding_type == 'zero': + p = dilation * 2 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + if in_dim is None: + in_dim = dim + + conv_block += [conv_layer(in_dim, dim, kernel_size=5, padding=p, dilation=dilation), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(second_dilation * 2)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(second_dilation * 2)] + elif padding_type == 'zero': + p = second_dilation * 2 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [conv_layer(dim, dim, kernel_size=5, padding=p, dilation=second_dilation, groups=groups), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + x_before = x + if self.in_dim is not None: + x = self.input_conv(x) + out = x + self.conv_block(x_before) + return out + + +class MultidilatedResnetBlock(nn.Module): + def __init__(self, dim, padding_type, conv_layer, norm_layer, activation=nn.ReLU(True), use_dropout=False): + super().__init__() + self.conv_block = self.build_conv_block(dim, padding_type, conv_layer, norm_layer, activation, use_dropout) + + def build_conv_block(self, dim, padding_type, conv_layer, norm_layer, activation, use_dropout, dilation=1): + conv_block = [] + conv_block += [conv_layer(dim, dim, kernel_size=3, padding_mode=padding_type), + norm_layer(dim), + activation] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + conv_block += [conv_layer(dim, dim, kernel_size=3, padding_mode=padding_type), + norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + + +class MultiDilatedGlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, + n_blocks=3, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', + deconv_kind='convtranspose', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, up_activation=nn.ReLU(True), + add_out_act=True, max_features=1024, multidilation_kwargs={}, + ffc_positions=None, ffc_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + resnet_conv_layer = functools.partial(get_conv_block_ctor('multidilated'), **multidilation_kwargs) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + ### resnet blocks + for i in range(n_blocks): + if ffc_positions is not None and i in ffc_positions: + model += [FFCResnetBlock(feats_num_bottleneck, padding_type, norm_layer, activation_layer=nn.ReLU, + inline=True, **ffc_kwargs)] + model += [MultidilatedResnetBlock(feats_num_bottleneck, padding_type=padding_type, + conv_layer=resnet_conv_layer, activation=activation, + norm_layer=norm_layer)] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += deconv_factory(deconv_kind, ngf, mult, up_norm_layer, up_activation, max_features) + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + +class ConfigGlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, + n_blocks=3, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', + deconv_kind='convtranspose', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, up_activation=nn.ReLU(True), + add_out_act=True, max_features=1024, + manual_block_spec=[], + resnet_block_kind='multidilatedresnetblock', + resnet_conv_kind='multidilated', + resnet_dilation=1, + multidilation_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + resnet_conv_layer = functools.partial(get_conv_block_ctor(resnet_conv_kind), **multidilation_kwargs) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + if len(manual_block_spec) == 0: + manual_block_spec = [ + DotDict(lambda : None, { + 'n_blocks': n_blocks, + 'use_default': True}) + ] + + ### resnet blocks + for block_spec in manual_block_spec: + def make_and_add_blocks(model, block_spec): + block_spec = DotDict(lambda : None, block_spec) + if not block_spec.use_default: + resnet_conv_layer = functools.partial(get_conv_block_ctor(block_spec.resnet_conv_kind), **block_spec.multidilation_kwargs) + resnet_conv_kind = block_spec.resnet_conv_kind + resnet_block_kind = block_spec.resnet_block_kind + if block_spec.resnet_dilation is not None: + resnet_dilation = block_spec.resnet_dilation + for i in range(block_spec.n_blocks): + if resnet_block_kind == "multidilatedresnetblock": + model += [MultidilatedResnetBlock(feats_num_bottleneck, padding_type=padding_type, + conv_layer=resnet_conv_layer, activation=activation, + norm_layer=norm_layer)] + if resnet_block_kind == "resnetblock": + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind)] + if resnet_block_kind == "resnetblock5x5": + model += [ResnetBlock5x5(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind)] + if resnet_block_kind == "resnetblockdwdil": + model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, + conv_kind=resnet_conv_kind, dilation=resnet_dilation, second_dilation=resnet_dilation)] + make_and_add_blocks(model, block_spec) + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += deconv_factory(deconv_kind, ngf, mult, up_norm_layer, up_activation, max_features) + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +def make_dil_blocks(dilated_blocks_n, dilation_block_kind, dilated_block_kwargs): + blocks = [] + for i in range(dilated_blocks_n): + if dilation_block_kind == 'simple': + blocks.append(ResnetBlock(**dilated_block_kwargs, dilation=2 ** (i + 1))) + elif dilation_block_kind == 'multi': + blocks.append(MultidilatedResnetBlock(**dilated_block_kwargs)) + else: + raise ValueError(f'dilation_block_kind could not be "{dilation_block_kind}"') + return blocks + + +class GlobalGenerator(nn.Module): + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + padding_type='reflect', conv_kind='default', activation=nn.ReLU(True), + up_norm_layer=nn.BatchNorm2d, affine=None, + up_activation=nn.ReLU(True), dilated_blocks_n=0, dilated_blocks_n_start=0, + dilated_blocks_n_middle=0, + add_out_act=True, + max_features=1024, is_resblock_depthwise=False, + ffc_positions=None, ffc_kwargs={}, dilation=1, second_dilation=None, + dilation_block_kind='simple', multidilation_kwargs={}): + assert (n_blocks >= 0) + super().__init__() + + conv_layer = get_conv_block_ctor(conv_kind) + norm_layer = get_norm_layer(norm_layer) + if affine is not None: + norm_layer = partial(norm_layer, affine=affine) + up_norm_layer = get_norm_layer(up_norm_layer) + if affine is not None: + up_norm_layer = partial(up_norm_layer, affine=affine) + + if ffc_positions is not None: + ffc_positions = collections.Counter(ffc_positions) + + model = [nn.ReflectionPad2d(3), + conv_layer(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), + activation] + + identity = Identity() + ### downsample + for i in range(n_downsampling): + mult = 2 ** i + + model += [conv_layer(min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, stride=2, padding=1), + norm_layer(min(max_features, ngf * mult * 2)), + activation] + + mult = 2 ** n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + dilated_block_kwargs = dict(dim=feats_num_bottleneck, padding_type=padding_type, + activation=activation, norm_layer=norm_layer) + if dilation_block_kind == 'simple': + dilated_block_kwargs['conv_kind'] = conv_kind + elif dilation_block_kind == 'multi': + dilated_block_kwargs['conv_layer'] = functools.partial( + get_conv_block_ctor('multidilated'), **multidilation_kwargs) + + # dilated blocks at the start of the bottleneck sausage + if dilated_blocks_n_start is not None and dilated_blocks_n_start > 0: + model += make_dil_blocks(dilated_blocks_n_start, dilation_block_kind, dilated_block_kwargs) + + # resnet blocks + for i in range(n_blocks): + # dilated blocks at the middle of the bottleneck sausage + if i == n_blocks // 2 and dilated_blocks_n_middle is not None and dilated_blocks_n_middle > 0: + model += make_dil_blocks(dilated_blocks_n_middle, dilation_block_kind, dilated_block_kwargs) + + if ffc_positions is not None and i in ffc_positions: + for _ in range(ffc_positions[i]): # same position can occur more than once + model += [FFCResnetBlock(feats_num_bottleneck, padding_type, norm_layer, activation_layer=nn.ReLU, + inline=True, **ffc_kwargs)] + + if is_resblock_depthwise: + resblock_groups = feats_num_bottleneck + else: + resblock_groups = 1 + + model += [ResnetBlock(feats_num_bottleneck, padding_type=padding_type, activation=activation, + norm_layer=norm_layer, conv_kind=conv_kind, groups=resblock_groups, + dilation=dilation, second_dilation=second_dilation)] + + + # dilated blocks at the end of the bottleneck sausage + if dilated_blocks_n is not None and dilated_blocks_n > 0: + model += make_dil_blocks(dilated_blocks_n, dilation_block_kind, dilated_block_kwargs) + + # upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, stride=2, padding=1, output_padding=1), + up_norm_layer(min(max_features, int(ngf * mult / 2))), + up_activation] + model += [nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def forward(self, input): + return self.model(input) + + +class GlobalGeneratorGated(GlobalGenerator): + def __init__(self, *args, **kwargs): + real_kwargs=dict( + conv_kind='gated_bn_relu', + activation=nn.Identity(), + norm_layer=nn.Identity + ) + real_kwargs.update(kwargs) + super().__init__(*args, **real_kwargs) + + +class GlobalGeneratorFromSuperChannels(nn.Module): + def __init__(self, input_nc, output_nc, n_downsampling, n_blocks, super_channels, norm_layer="bn", padding_type='reflect', add_out_act=True): + super().__init__() + self.n_downsampling = n_downsampling + norm_layer = get_norm_layer(norm_layer) + if type(norm_layer) == functools.partial: + use_bias = (norm_layer.func == nn.InstanceNorm2d) + else: + use_bias = (norm_layer == nn.InstanceNorm2d) + + channels = self.convert_super_channels(super_channels) + self.channels = channels + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, channels[0], kernel_size=7, padding=0, bias=use_bias), + norm_layer(channels[0]), + nn.ReLU(True)] + + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(channels[0+i], channels[1+i], kernel_size=3, stride=2, padding=1, bias=use_bias), + norm_layer(channels[1+i]), + nn.ReLU(True)] + + mult = 2 ** n_downsampling + + n_blocks1 = n_blocks // 3 + n_blocks2 = n_blocks1 + n_blocks3 = n_blocks - n_blocks1 - n_blocks2 + + for i in range(n_blocks1): + c = n_downsampling + dim = channels[c] + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer)] + + for i in range(n_blocks2): + c = n_downsampling+1 + dim = channels[c] + kwargs = {} + if i == 0: + kwargs = {"in_dim": channels[c-1]} + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer, **kwargs)] + + for i in range(n_blocks3): + c = n_downsampling+2 + dim = channels[c] + kwargs = {} + if i == 0: + kwargs = {"in_dim": channels[c-1]} + model += [ResnetBlock(dim, padding_type=padding_type, norm_layer=norm_layer, **kwargs)] + + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(channels[n_downsampling+3+i], + channels[n_downsampling+3+i+1], + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(channels[n_downsampling+3+i+1]), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(channels[2*n_downsampling+3], output_nc, kernel_size=7, padding=0)] + + if add_out_act: + model.append(get_activation('tanh' if add_out_act is True else add_out_act)) + self.model = nn.Sequential(*model) + + def convert_super_channels(self, super_channels): + n_downsampling = self.n_downsampling + result = [] + cnt = 0 + + if n_downsampling == 2: + N1 = 10 + elif n_downsampling == 3: + N1 = 13 + else: + raise NotImplementedError + + for i in range(0, N1): + if i in [1,4,7,10]: + channel = super_channels[cnt] * (2 ** cnt) + config = {'channel': channel} + result.append(channel) + logging.info(f"Downsample channels {result[-1]}") + cnt += 1 + + for i in range(3): + for counter, j in enumerate(range(N1 + i * 3, N1 + 3 + i * 3)): + if len(super_channels) == 6: + channel = super_channels[3] * 4 + else: + channel = super_channels[i + 3] * 4 + config = {'channel': channel} + if counter == 0: + result.append(channel) + logging.info(f"Bottleneck channels {result[-1]}") + cnt = 2 + + for i in range(N1+9, N1+21): + if i in [22, 25,28]: + cnt -= 1 + if len(super_channels) == 6: + channel = super_channels[5 - cnt] * (2 ** cnt) + else: + channel = super_channels[7 - cnt] * (2 ** cnt) + result.append(int(channel)) + logging.info(f"Upsample channels {result[-1]}") + return result + + def forward(self, input): + return self.model(input) + + +# Defines the PatchGAN discriminator with the specified arguments. +class NLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d,): + super().__init__() + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2, True)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + return act[-1], act[:-1] + + +class MultidilatedNLayerDiscriminator(BaseDiscriminator): + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, multidilation_kwargs={}): + super().__init__() + self.n_layers = n_layers + + kw = 4 + padw = int(np.ceil((kw-1.0)/2)) + sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2, True)]] + + nf = ndf + for n in range(1, n_layers): + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + MultidilatedConv(nf_prev, nf, kernel_size=kw, stride=2, padding=[2, 3], **multidilation_kwargs), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + nf_prev = nf + nf = min(nf * 2, 512) + + cur_model = [] + cur_model += [ + nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw), + norm_layer(nf), + nn.LeakyReLU(0.2, True) + ] + sequence.append(cur_model) + + sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]] + + for n in range(len(sequence)): + setattr(self, 'model'+str(n), nn.Sequential(*sequence[n])) + + def get_all_activations(self, x): + res = [x] + for n in range(self.n_layers + 2): + model = getattr(self, 'model' + str(n)) + res.append(model(res[-1])) + return res[1:] + + def forward(self, x): + act = self.get_all_activations(x) + return act[-1], act[:-1] + + +class NLayerDiscriminatorAsGen(NLayerDiscriminator): + def forward(self, x): + return super().forward(x)[0] diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/spatial_transform.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/spatial_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..2de024ba08c549605a08b64d096f1f0db7b7722a --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/spatial_transform.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from kornia.geometry.transform import rotate + + +class LearnableSpatialTransformWrapper(nn.Module): + def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True): + super().__init__() + self.impl = impl + self.angle = torch.rand(1) * angle_init_range + if train_angle: + self.angle = nn.Parameter(self.angle, requires_grad=True) + self.pad_coef = pad_coef + + def forward(self, x): + if torch.is_tensor(x): + return self.inverse_transform(self.impl(self.transform(x)), x) + elif isinstance(x, tuple): + x_trans = tuple(self.transform(elem) for elem in x) + y_trans = self.impl(x_trans) + return tuple(self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)) + else: + raise ValueError(f'Unexpected input type {type(x)}') + + def transform(self, x): + height, width = x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode='reflect') + x_padded_rotated = rotate(x_padded, angle=self.angle.to(x_padded)) + return x_padded_rotated + + def inverse_transform(self, y_padded_rotated, orig_x): + height, width = orig_x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + + y_padded = rotate(y_padded_rotated, angle=-self.angle.to(y_padded_rotated)) + y_height, y_width = y_padded.shape[2:] + y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w] + return y + + +if __name__ == '__main__': + layer = LearnableSpatialTransformWrapper(nn.Identity()) + x = torch.arange(2* 3 * 15 * 15).view(2, 3, 15, 15).float() + y = layer(x) + assert x.shape == y.shape + assert torch.allclose(x[:, :, 1:, 1:][:, :, :-1, :-1], y[:, :, 1:, 1:][:, :, :-1, :-1]) + print('all ok') diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/squeeze_excitation.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/squeeze_excitation.py new file mode 100644 index 0000000000000000000000000000000000000000..d1d902bb30c071acbc0fa919a134c80fed86bd6c --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/modules/squeeze_excitation.py @@ -0,0 +1,20 @@ +import torch.nn as nn + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + res = x * y.expand_as(x) + return res diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8307cd31c2139db0ce581637403b3a95dc8cae59 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/__init__.py @@ -0,0 +1,29 @@ +import logging +import torch +from annotator.lama.saicinpainting.training.trainers.default import DefaultInpaintingTrainingModule + + +def get_training_model_class(kind): + if kind == 'default': + return DefaultInpaintingTrainingModule + + raise ValueError(f'Unknown trainer module {kind}') + + +def make_training_model(config): + kind = config.training_model.kind + kwargs = dict(config.training_model) + kwargs.pop('kind') + kwargs['use_ddp'] = config.trainer.kwargs.get('accelerator', None) == 'ddp' + + logging.info(f'Make training model {kind}') + + cls = get_training_model_class(kind) + return cls(config, **kwargs) + + +def load_checkpoint(train_config, path, map_location='cuda', strict=True): + model = make_training_model(train_config).generator + state = torch.load(path, map_location=map_location) + model.load_state_dict(state, strict=strict) + return model diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/base.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..372dd879a22ff6c3929abf23bb59d6b8b66256b7 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/base.py @@ -0,0 +1,293 @@ +import copy +import logging +from typing import Dict, Tuple + +import pandas as pd +import pytorch_lightning as ptl +import torch +import torch.nn as nn +import torch.nn.functional as F +# from torch.utils.data import DistributedSampler + +# from annotator.lama.saicinpainting.evaluation import make_evaluator +# from annotator.lama.saicinpainting.training.data.datasets import make_default_train_dataloader, make_default_val_dataloader +# from annotator.lama.saicinpainting.training.losses.adversarial import make_discrim_loss +# from annotator.lama.saicinpainting.training.losses.perceptual import PerceptualLoss, ResNetPL +from annotator.lama.saicinpainting.training.modules import make_generator #, make_discriminator +# from annotator.lama.saicinpainting.training.visualizers import make_visualizer +from annotator.lama.saicinpainting.utils import add_prefix_to_keys, average_dicts, set_requires_grad, flatten_dict, \ + get_has_ddp_rank + +LOGGER = logging.getLogger(__name__) + + +def make_optimizer(parameters, kind='adamw', **kwargs): + if kind == 'adam': + optimizer_class = torch.optim.Adam + elif kind == 'adamw': + optimizer_class = torch.optim.AdamW + else: + raise ValueError(f'Unknown optimizer kind {kind}') + return optimizer_class(parameters, **kwargs) + + +def update_running_average(result: nn.Module, new_iterate_model: nn.Module, decay=0.999): + with torch.no_grad(): + res_params = dict(result.named_parameters()) + new_params = dict(new_iterate_model.named_parameters()) + + for k in res_params.keys(): + res_params[k].data.mul_(decay).add_(new_params[k].data, alpha=1 - decay) + + +def make_multiscale_noise(base_tensor, scales=6, scale_mode='bilinear'): + batch_size, _, height, width = base_tensor.shape + cur_height, cur_width = height, width + result = [] + align_corners = False if scale_mode in ('bilinear', 'bicubic') else None + for _ in range(scales): + cur_sample = torch.randn(batch_size, 1, cur_height, cur_width, device=base_tensor.device) + cur_sample_scaled = F.interpolate(cur_sample, size=(height, width), mode=scale_mode, align_corners=align_corners) + result.append(cur_sample_scaled) + cur_height //= 2 + cur_width //= 2 + return torch.cat(result, dim=1) + + +class BaseInpaintingTrainingModule(ptl.LightningModule): + def __init__(self, config, use_ddp, *args, predict_only=False, visualize_each_iters=100, + average_generator=False, generator_avg_beta=0.999, average_generator_start_step=30000, + average_generator_period=10, store_discr_outputs_for_vis=False, + **kwargs): + super().__init__(*args, **kwargs) + LOGGER.info('BaseInpaintingTrainingModule init called') + + self.config = config + + self.generator = make_generator(config, **self.config.generator) + self.use_ddp = use_ddp + + if not get_has_ddp_rank(): + LOGGER.info(f'Generator\n{self.generator}') + + # if not predict_only: + # self.save_hyperparameters(self.config) + # self.discriminator = make_discriminator(**self.config.discriminator) + # self.adversarial_loss = make_discrim_loss(**self.config.losses.adversarial) + # self.visualizer = make_visualizer(**self.config.visualizer) + # self.val_evaluator = make_evaluator(**self.config.evaluator) + # self.test_evaluator = make_evaluator(**self.config.evaluator) + # + # if not get_has_ddp_rank(): + # LOGGER.info(f'Discriminator\n{self.discriminator}') + # + # extra_val = self.config.data.get('extra_val', ()) + # if extra_val: + # self.extra_val_titles = list(extra_val) + # self.extra_evaluators = nn.ModuleDict({k: make_evaluator(**self.config.evaluator) + # for k in extra_val}) + # else: + # self.extra_evaluators = {} + # + # self.average_generator = average_generator + # self.generator_avg_beta = generator_avg_beta + # self.average_generator_start_step = average_generator_start_step + # self.average_generator_period = average_generator_period + # self.generator_average = None + # self.last_generator_averaging_step = -1 + # self.store_discr_outputs_for_vis = store_discr_outputs_for_vis + # + # if self.config.losses.get("l1", {"weight_known": 0})['weight_known'] > 0: + # self.loss_l1 = nn.L1Loss(reduction='none') + # + # if self.config.losses.get("mse", {"weight": 0})['weight'] > 0: + # self.loss_mse = nn.MSELoss(reduction='none') + # + # if self.config.losses.perceptual.weight > 0: + # self.loss_pl = PerceptualLoss() + # + # # if self.config.losses.get("resnet_pl", {"weight": 0})['weight'] > 0: + # # self.loss_resnet_pl = ResNetPL(**self.config.losses.resnet_pl) + # # else: + # # self.loss_resnet_pl = None + # + # self.loss_resnet_pl = None + + self.visualize_each_iters = visualize_each_iters + LOGGER.info('BaseInpaintingTrainingModule init done') + + def configure_optimizers(self): + discriminator_params = list(self.discriminator.parameters()) + return [ + dict(optimizer=make_optimizer(self.generator.parameters(), **self.config.optimizers.generator)), + dict(optimizer=make_optimizer(discriminator_params, **self.config.optimizers.discriminator)), + ] + + def train_dataloader(self): + kwargs = dict(self.config.data.train) + if self.use_ddp: + kwargs['ddp_kwargs'] = dict(num_replicas=self.trainer.num_nodes * self.trainer.num_processes, + rank=self.trainer.global_rank, + shuffle=True) + dataloader = make_default_train_dataloader(**self.config.data.train) + return dataloader + + def val_dataloader(self): + res = [make_default_val_dataloader(**self.config.data.val)] + + if self.config.data.visual_test is not None: + res = res + [make_default_val_dataloader(**self.config.data.visual_test)] + else: + res = res + res + + extra_val = self.config.data.get('extra_val', ()) + if extra_val: + res += [make_default_val_dataloader(**extra_val[k]) for k in self.extra_val_titles] + + return res + + def training_step(self, batch, batch_idx, optimizer_idx=None): + self._is_training_step = True + return self._do_step(batch, batch_idx, mode='train', optimizer_idx=optimizer_idx) + + def validation_step(self, batch, batch_idx, dataloader_idx): + extra_val_key = None + if dataloader_idx == 0: + mode = 'val' + elif dataloader_idx == 1: + mode = 'test' + else: + mode = 'extra_val' + extra_val_key = self.extra_val_titles[dataloader_idx - 2] + self._is_training_step = False + return self._do_step(batch, batch_idx, mode=mode, extra_val_key=extra_val_key) + + def training_step_end(self, batch_parts_outputs): + if self.training and self.average_generator \ + and self.global_step >= self.average_generator_start_step \ + and self.global_step >= self.last_generator_averaging_step + self.average_generator_period: + if self.generator_average is None: + self.generator_average = copy.deepcopy(self.generator) + else: + update_running_average(self.generator_average, self.generator, decay=self.generator_avg_beta) + self.last_generator_averaging_step = self.global_step + + full_loss = (batch_parts_outputs['loss'].mean() + if torch.is_tensor(batch_parts_outputs['loss']) # loss is not tensor when no discriminator used + else torch.tensor(batch_parts_outputs['loss']).float().requires_grad_(True)) + log_info = {k: v.mean() for k, v in batch_parts_outputs['log_info'].items()} + self.log_dict(log_info, on_step=True, on_epoch=False) + return full_loss + + def validation_epoch_end(self, outputs): + outputs = [step_out for out_group in outputs for step_out in out_group] + averaged_logs = average_dicts(step_out['log_info'] for step_out in outputs) + self.log_dict({k: v.mean() for k, v in averaged_logs.items()}) + + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + + # standard validation + val_evaluator_states = [s['val_evaluator_state'] for s in outputs if 'val_evaluator_state' in s] + val_evaluator_res = self.val_evaluator.evaluation_end(states=val_evaluator_states) + val_evaluator_res_df = pd.DataFrame(val_evaluator_res).stack(1).unstack(0) + val_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Validation metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{val_evaluator_res_df}') + + for k, v in flatten_dict(val_evaluator_res).items(): + self.log(f'val_{k}', v) + + # standard visual test + test_evaluator_states = [s['test_evaluator_state'] for s in outputs + if 'test_evaluator_state' in s] + test_evaluator_res = self.test_evaluator.evaluation_end(states=test_evaluator_states) + test_evaluator_res_df = pd.DataFrame(test_evaluator_res).stack(1).unstack(0) + test_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Test metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{test_evaluator_res_df}') + + for k, v in flatten_dict(test_evaluator_res).items(): + self.log(f'test_{k}', v) + + # extra validations + if self.extra_evaluators: + for cur_eval_title, cur_evaluator in self.extra_evaluators.items(): + cur_state_key = f'extra_val_{cur_eval_title}_evaluator_state' + cur_states = [s[cur_state_key] for s in outputs if cur_state_key in s] + cur_evaluator_res = cur_evaluator.evaluation_end(states=cur_states) + cur_evaluator_res_df = pd.DataFrame(cur_evaluator_res).stack(1).unstack(0) + cur_evaluator_res_df.dropna(axis=1, how='all', inplace=True) + LOGGER.info(f'Extra val {cur_eval_title} metrics after epoch #{self.current_epoch}, ' + f'total {self.global_step} iterations:\n{cur_evaluator_res_df}') + for k, v in flatten_dict(cur_evaluator_res).items(): + self.log(f'extra_val_{cur_eval_title}_{k}', v) + + def _do_step(self, batch, batch_idx, mode='train', optimizer_idx=None, extra_val_key=None): + if optimizer_idx == 0: # step for generator + set_requires_grad(self.generator, True) + set_requires_grad(self.discriminator, False) + elif optimizer_idx == 1: # step for discriminator + set_requires_grad(self.generator, False) + set_requires_grad(self.discriminator, True) + + batch = self(batch) + + total_loss = 0 + metrics = {} + + if optimizer_idx is None or optimizer_idx == 0: # step for generator + total_loss, metrics = self.generator_loss(batch) + + elif optimizer_idx is None or optimizer_idx == 1: # step for discriminator + if self.config.losses.adversarial.weight > 0: + total_loss, metrics = self.discriminator_loss(batch) + + if self.get_ddp_rank() in (None, 0) and (batch_idx % self.visualize_each_iters == 0 or mode == 'test'): + if self.config.losses.adversarial.weight > 0: + if self.store_discr_outputs_for_vis: + with torch.no_grad(): + self.store_discr_outputs(batch) + vis_suffix = f'_{mode}' + if mode == 'extra_val': + vis_suffix += f'_{extra_val_key}' + self.visualizer(self.current_epoch, batch_idx, batch, suffix=vis_suffix) + + metrics_prefix = f'{mode}_' + if mode == 'extra_val': + metrics_prefix += f'{extra_val_key}_' + result = dict(loss=total_loss, log_info=add_prefix_to_keys(metrics, metrics_prefix)) + if mode == 'val': + result['val_evaluator_state'] = self.val_evaluator.process_batch(batch) + elif mode == 'test': + result['test_evaluator_state'] = self.test_evaluator.process_batch(batch) + elif mode == 'extra_val': + result[f'extra_val_{extra_val_key}_evaluator_state'] = self.extra_evaluators[extra_val_key].process_batch(batch) + + return result + + def get_current_generator(self, no_average=False): + if not no_average and not self.training and self.average_generator and self.generator_average is not None: + return self.generator_average + return self.generator + + def forward(self, batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + """Pass data through generator and obtain at leas 'predicted_image' and 'inpainted' keys""" + raise NotImplementedError() + + def generator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + raise NotImplementedError() + + def discriminator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: + raise NotImplementedError() + + def store_discr_outputs(self, batch): + out_size = batch['image'].shape[2:] + discr_real_out, _ = self.discriminator(batch['image']) + discr_fake_out, _ = self.discriminator(batch['predicted_image']) + batch['discr_output_real'] = F.interpolate(discr_real_out, size=out_size, mode='nearest') + batch['discr_output_fake'] = F.interpolate(discr_fake_out, size=out_size, mode='nearest') + batch['discr_output_diff'] = batch['discr_output_real'] - batch['discr_output_fake'] + + def get_ddp_rank(self): + return self.trainer.global_rank if (self.trainer.num_nodes * self.trainer.num_processes) > 1 else None diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/default.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/default.py new file mode 100644 index 0000000000000000000000000000000000000000..29cd10ec376d5fe3ebcd957d807d2d3f83b6ec59 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/trainers/default.py @@ -0,0 +1,175 @@ +import logging + +import torch +import torch.nn.functional as F +from omegaconf import OmegaConf + +# from annotator.lama.saicinpainting.training.data.datasets import make_constant_area_crop_params +from annotator.lama.saicinpainting.training.losses.distance_weighting import make_mask_distance_weighter +from annotator.lama.saicinpainting.training.losses.feature_matching import feature_matching_loss, masked_l1_loss +# from annotator.lama.saicinpainting.training.modules.fake_fakes import FakeFakesGenerator +from annotator.lama.saicinpainting.training.trainers.base import BaseInpaintingTrainingModule, make_multiscale_noise +from annotator.lama.saicinpainting.utils import add_prefix_to_keys, get_ramp + +LOGGER = logging.getLogger(__name__) + + +def make_constant_area_crop_batch(batch, **kwargs): + crop_y, crop_x, crop_height, crop_width = make_constant_area_crop_params(img_height=batch['image'].shape[2], + img_width=batch['image'].shape[3], + **kwargs) + batch['image'] = batch['image'][:, :, crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] + batch['mask'] = batch['mask'][:, :, crop_y: crop_y + crop_height, crop_x: crop_x + crop_width] + return batch + + +class DefaultInpaintingTrainingModule(BaseInpaintingTrainingModule): + def __init__(self, *args, concat_mask=True, rescale_scheduler_kwargs=None, image_to_discriminator='predicted_image', + add_noise_kwargs=None, noise_fill_hole=False, const_area_crop_kwargs=None, + distance_weighter_kwargs=None, distance_weighted_mask_for_discr=False, + fake_fakes_proba=0, fake_fakes_generator_kwargs=None, + **kwargs): + super().__init__(*args, **kwargs) + self.concat_mask = concat_mask + self.rescale_size_getter = get_ramp(**rescale_scheduler_kwargs) if rescale_scheduler_kwargs is not None else None + self.image_to_discriminator = image_to_discriminator + self.add_noise_kwargs = add_noise_kwargs + self.noise_fill_hole = noise_fill_hole + self.const_area_crop_kwargs = const_area_crop_kwargs + self.refine_mask_for_losses = make_mask_distance_weighter(**distance_weighter_kwargs) \ + if distance_weighter_kwargs is not None else None + self.distance_weighted_mask_for_discr = distance_weighted_mask_for_discr + + self.fake_fakes_proba = fake_fakes_proba + if self.fake_fakes_proba > 1e-3: + self.fake_fakes_gen = FakeFakesGenerator(**(fake_fakes_generator_kwargs or {})) + + def forward(self, batch): + if self.training and self.rescale_size_getter is not None: + cur_size = self.rescale_size_getter(self.global_step) + batch['image'] = F.interpolate(batch['image'], size=cur_size, mode='bilinear', align_corners=False) + batch['mask'] = F.interpolate(batch['mask'], size=cur_size, mode='nearest') + + if self.training and self.const_area_crop_kwargs is not None: + batch = make_constant_area_crop_batch(batch, **self.const_area_crop_kwargs) + + img = batch['image'] + mask = batch['mask'] + + masked_img = img * (1 - mask) + + if self.add_noise_kwargs is not None: + noise = make_multiscale_noise(masked_img, **self.add_noise_kwargs) + if self.noise_fill_hole: + masked_img = masked_img + mask * noise[:, :masked_img.shape[1]] + masked_img = torch.cat([masked_img, noise], dim=1) + + if self.concat_mask: + masked_img = torch.cat([masked_img, mask], dim=1) + + batch['predicted_image'] = self.generator(masked_img) + batch['inpainted'] = mask * batch['predicted_image'] + (1 - mask) * batch['image'] + + if self.fake_fakes_proba > 1e-3: + if self.training and torch.rand(1).item() < self.fake_fakes_proba: + batch['fake_fakes'], batch['fake_fakes_masks'] = self.fake_fakes_gen(img, mask) + batch['use_fake_fakes'] = True + else: + batch['fake_fakes'] = torch.zeros_like(img) + batch['fake_fakes_masks'] = torch.zeros_like(mask) + batch['use_fake_fakes'] = False + + batch['mask_for_losses'] = self.refine_mask_for_losses(img, batch['predicted_image'], mask) \ + if self.refine_mask_for_losses is not None and self.training \ + else mask + + return batch + + def generator_loss(self, batch): + img = batch['image'] + predicted_img = batch[self.image_to_discriminator] + original_mask = batch['mask'] + supervised_mask = batch['mask_for_losses'] + + # L1 + l1_value = masked_l1_loss(predicted_img, img, supervised_mask, + self.config.losses.l1.weight_known, + self.config.losses.l1.weight_missing) + + total_loss = l1_value + metrics = dict(gen_l1=l1_value) + + # vgg-based perceptual loss + if self.config.losses.perceptual.weight > 0: + pl_value = self.loss_pl(predicted_img, img, mask=supervised_mask).sum() * self.config.losses.perceptual.weight + total_loss = total_loss + pl_value + metrics['gen_pl'] = pl_value + + # discriminator + # adversarial_loss calls backward by itself + mask_for_discr = supervised_mask if self.distance_weighted_mask_for_discr else original_mask + self.adversarial_loss.pre_generator_step(real_batch=img, fake_batch=predicted_img, + generator=self.generator, discriminator=self.discriminator) + discr_real_pred, discr_real_features = self.discriminator(img) + discr_fake_pred, discr_fake_features = self.discriminator(predicted_img) + adv_gen_loss, adv_metrics = self.adversarial_loss.generator_loss(real_batch=img, + fake_batch=predicted_img, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_pred, + mask=mask_for_discr) + total_loss = total_loss + adv_gen_loss + metrics['gen_adv'] = adv_gen_loss + metrics.update(add_prefix_to_keys(adv_metrics, 'adv_')) + + # feature matching + if self.config.losses.feature_matching.weight > 0: + need_mask_in_fm = OmegaConf.to_container(self.config.losses.feature_matching).get('pass_mask', False) + mask_for_fm = supervised_mask if need_mask_in_fm else None + fm_value = feature_matching_loss(discr_fake_features, discr_real_features, + mask=mask_for_fm) * self.config.losses.feature_matching.weight + total_loss = total_loss + fm_value + metrics['gen_fm'] = fm_value + + if self.loss_resnet_pl is not None: + resnet_pl_value = self.loss_resnet_pl(predicted_img, img) + total_loss = total_loss + resnet_pl_value + metrics['gen_resnet_pl'] = resnet_pl_value + + return total_loss, metrics + + def discriminator_loss(self, batch): + total_loss = 0 + metrics = {} + + predicted_img = batch[self.image_to_discriminator].detach() + self.adversarial_loss.pre_discriminator_step(real_batch=batch['image'], fake_batch=predicted_img, + generator=self.generator, discriminator=self.discriminator) + discr_real_pred, discr_real_features = self.discriminator(batch['image']) + discr_fake_pred, discr_fake_features = self.discriminator(predicted_img) + adv_discr_loss, adv_metrics = self.adversarial_loss.discriminator_loss(real_batch=batch['image'], + fake_batch=predicted_img, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_pred, + mask=batch['mask']) + total_loss = total_loss + adv_discr_loss + metrics['discr_adv'] = adv_discr_loss + metrics.update(add_prefix_to_keys(adv_metrics, 'adv_')) + + + if batch.get('use_fake_fakes', False): + fake_fakes = batch['fake_fakes'] + self.adversarial_loss.pre_discriminator_step(real_batch=batch['image'], fake_batch=fake_fakes, + generator=self.generator, discriminator=self.discriminator) + discr_fake_fakes_pred, _ = self.discriminator(fake_fakes) + fake_fakes_adv_discr_loss, fake_fakes_adv_metrics = self.adversarial_loss.discriminator_loss( + real_batch=batch['image'], + fake_batch=fake_fakes, + discr_real_pred=discr_real_pred, + discr_fake_pred=discr_fake_fakes_pred, + mask=batch['mask'] + ) + total_loss = total_loss + fake_fakes_adv_discr_loss + metrics['discr_adv_fake_fakes'] = fake_fakes_adv_discr_loss + metrics.update(add_prefix_to_keys(fake_fakes_adv_metrics, 'adv_')) + + return total_loss, metrics diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/__init__.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d280fd8d48428c249c40c341ecc3c36f34524c99 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/__init__.py @@ -0,0 +1,15 @@ +import logging + +from annotator.lama.saicinpainting.training.visualizers.directory import DirectoryVisualizer +from annotator.lama.saicinpainting.training.visualizers.noop import NoopVisualizer + + +def make_visualizer(kind, **kwargs): + logging.info(f'Make visualizer {kind}') + + if kind == 'directory': + return DirectoryVisualizer(**kwargs) + if kind == 'noop': + return NoopVisualizer() + + raise ValueError(f'Unknown visualizer kind {kind}') diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/base.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..675f01682ddf5e31b6cc341735378c6f3b242e49 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/base.py @@ -0,0 +1,73 @@ +import abc +from typing import Dict, List + +import numpy as np +import torch +from skimage import color +from skimage.segmentation import mark_boundaries + +from . import colors + +COLORS, _ = colors.generate_colors(151) # 151 - max classes for semantic segmentation + + +class BaseVisualizer: + @abc.abstractmethod + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + """ + Take a batch, make an image from it and visualize + """ + raise NotImplementedError() + + +def visualize_mask_and_images(images_dict: Dict[str, np.ndarray], keys: List[str], + last_without_mask=True, rescale_keys=None, mask_only_first=None, + black_mask=False) -> np.ndarray: + mask = images_dict['mask'] > 0.5 + result = [] + for i, k in enumerate(keys): + img = images_dict[k] + img = np.transpose(img, (1, 2, 0)) + + if rescale_keys is not None and k in rescale_keys: + img = img - img.min() + img /= img.max() + 1e-5 + if len(img.shape) == 2: + img = np.expand_dims(img, 2) + + if img.shape[2] == 1: + img = np.repeat(img, 3, axis=2) + elif (img.shape[2] > 3): + img_classes = img.argmax(2) + img = color.label2rgb(img_classes, colors=COLORS) + + if mask_only_first: + need_mark_boundaries = i == 0 + else: + need_mark_boundaries = i < len(keys) - 1 or not last_without_mask + + if need_mark_boundaries: + if black_mask: + img = img * (1 - mask[0][..., None]) + img = mark_boundaries(img, + mask[0], + color=(1., 0., 0.), + outline_color=(1., 1., 1.), + mode='thick') + result.append(img) + return np.concatenate(result, axis=1) + + +def visualize_mask_and_images_batch(batch: Dict[str, torch.Tensor], keys: List[str], max_items=10, + last_without_mask=True, rescale_keys=None) -> np.ndarray: + batch = {k: tens.detach().cpu().numpy() for k, tens in batch.items() + if k in keys or k == 'mask'} + + batch_size = next(iter(batch.values())).shape[0] + items_to_vis = min(batch_size, max_items) + result = [] + for i in range(items_to_vis): + cur_dct = {k: tens[i] for k, tens in batch.items()} + result.append(visualize_mask_and_images(cur_dct, keys, last_without_mask=last_without_mask, + rescale_keys=rescale_keys)) + return np.concatenate(result, axis=0) diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/colors.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/colors.py new file mode 100644 index 0000000000000000000000000000000000000000..9e9e39182c58cb06a1c5e97a7e6c497cc3388ebe --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/colors.py @@ -0,0 +1,76 @@ +import random +import colorsys + +import numpy as np +import matplotlib +matplotlib.use('agg') +import matplotlib.pyplot as plt +from matplotlib.colors import LinearSegmentedColormap + + +def generate_colors(nlabels, type='bright', first_color_black=False, last_color_black=True, verbose=False): + # https://stackoverflow.com/questions/14720331/how-to-generate-random-colors-in-matplotlib + """ + Creates a random colormap to be used together with matplotlib. Useful for segmentation tasks + :param nlabels: Number of labels (size of colormap) + :param type: 'bright' for strong colors, 'soft' for pastel colors + :param first_color_black: Option to use first color as black, True or False + :param last_color_black: Option to use last color as black, True or False + :param verbose: Prints the number of labels and shows the colormap. True or False + :return: colormap for matplotlib + """ + if type not in ('bright', 'soft'): + print ('Please choose "bright" or "soft" for type') + return + + if verbose: + print('Number of labels: ' + str(nlabels)) + + # Generate color map for bright colors, based on hsv + if type == 'bright': + randHSVcolors = [(np.random.uniform(low=0.0, high=1), + np.random.uniform(low=0.2, high=1), + np.random.uniform(low=0.9, high=1)) for i in range(nlabels)] + + # Convert HSV list to RGB + randRGBcolors = [] + for HSVcolor in randHSVcolors: + randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2])) + + if first_color_black: + randRGBcolors[0] = [0, 0, 0] + + if last_color_black: + randRGBcolors[-1] = [0, 0, 0] + + random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels) + + # Generate soft pastel colors, by limiting the RGB spectrum + if type == 'soft': + low = 0.6 + high = 0.95 + randRGBcolors = [(np.random.uniform(low=low, high=high), + np.random.uniform(low=low, high=high), + np.random.uniform(low=low, high=high)) for i in range(nlabels)] + + if first_color_black: + randRGBcolors[0] = [0, 0, 0] + + if last_color_black: + randRGBcolors[-1] = [0, 0, 0] + random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels) + + # Display colorbar + if verbose: + from matplotlib import colors, colorbar + from matplotlib import pyplot as plt + fig, ax = plt.subplots(1, 1, figsize=(15, 0.5)) + + bounds = np.linspace(0, nlabels, nlabels + 1) + norm = colors.BoundaryNorm(bounds, nlabels) + + cb = colorbar.ColorbarBase(ax, cmap=random_colormap, norm=norm, spacing='proportional', ticks=None, + boundaries=bounds, format='%1i', orientation=u'horizontal') + + return randRGBcolors, random_colormap + diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/directory.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/directory.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a3b5eb93c0738784bf24083bdd54d50e4782f6 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/directory.py @@ -0,0 +1,36 @@ +import os + +import cv2 +import numpy as np + +from annotator.lama.saicinpainting.training.visualizers.base import BaseVisualizer, visualize_mask_and_images_batch +from annotator.lama.saicinpainting.utils import check_and_warn_input_range + + +class DirectoryVisualizer(BaseVisualizer): + DEFAULT_KEY_ORDER = 'image predicted_image inpainted'.split(' ') + + def __init__(self, outdir, key_order=DEFAULT_KEY_ORDER, max_items_in_batch=10, + last_without_mask=True, rescale_keys=None): + self.outdir = outdir + os.makedirs(self.outdir, exist_ok=True) + self.key_order = key_order + self.max_items_in_batch = max_items_in_batch + self.last_without_mask = last_without_mask + self.rescale_keys = rescale_keys + + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + check_and_warn_input_range(batch['image'], 0, 1, 'DirectoryVisualizer target image') + vis_img = visualize_mask_and_images_batch(batch, self.key_order, max_items=self.max_items_in_batch, + last_without_mask=self.last_without_mask, + rescale_keys=self.rescale_keys) + + vis_img = np.clip(vis_img * 255, 0, 255).astype('uint8') + + curoutdir = os.path.join(self.outdir, f'epoch{epoch_i:04d}{suffix}') + os.makedirs(curoutdir, exist_ok=True) + rank_suffix = f'_r{rank}' if rank is not None else '' + out_fname = os.path.join(curoutdir, f'batch{batch_i:07d}{rank_suffix}.jpg') + + vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR) + cv2.imwrite(out_fname, vis_img) diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/noop.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/noop.py new file mode 100644 index 0000000000000000000000000000000000000000..4479597baf33a817686a4f679b4576f83b6e5c31 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/training/visualizers/noop.py @@ -0,0 +1,9 @@ +from annotator.lama.saicinpainting.training.visualizers.base import BaseVisualizer + + +class NoopVisualizer(BaseVisualizer): + def __init__(self, *args, **kwargs): + pass + + def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None): + pass diff --git a/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/utils.py b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f36f5130d4c105b63689642da5321ce2e1863a9f --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/annotator/lama/saicinpainting/utils.py @@ -0,0 +1,174 @@ +import bisect +import functools +import logging +import numbers +import os +import signal +import sys +import traceback +import warnings + +import torch +from pytorch_lightning import seed_everything + +LOGGER = logging.getLogger(__name__) + + +def check_and_warn_input_range(tensor, min_value, max_value, name): + actual_min = tensor.min() + actual_max = tensor.max() + if actual_min < min_value or actual_max > max_value: + warnings.warn(f"{name} must be in {min_value}..{max_value} range, but it ranges {actual_min}..{actual_max}") + + +def sum_dict_with_prefix(target, cur_dict, prefix, default=0): + for k, v in cur_dict.items(): + target_key = prefix + k + target[target_key] = target.get(target_key, default) + v + + +def average_dicts(dict_list): + result = {} + norm = 1e-3 + for dct in dict_list: + sum_dict_with_prefix(result, dct, '') + norm += 1 + for k in list(result): + result[k] /= norm + return result + + +def add_prefix_to_keys(dct, prefix): + return {prefix + k: v for k, v in dct.items()} + + +def set_requires_grad(module, value): + for param in module.parameters(): + param.requires_grad = value + + +def flatten_dict(dct): + result = {} + for k, v in dct.items(): + if isinstance(k, tuple): + k = '_'.join(k) + if isinstance(v, dict): + for sub_k, sub_v in flatten_dict(v).items(): + result[f'{k}_{sub_k}'] = sub_v + else: + result[k] = v + return result + + +class LinearRamp: + def __init__(self, start_value=0, end_value=1, start_iter=-1, end_iter=0): + self.start_value = start_value + self.end_value = end_value + self.start_iter = start_iter + self.end_iter = end_iter + + def __call__(self, i): + if i < self.start_iter: + return self.start_value + if i >= self.end_iter: + return self.end_value + part = (i - self.start_iter) / (self.end_iter - self.start_iter) + return self.start_value * (1 - part) + self.end_value * part + + +class LadderRamp: + def __init__(self, start_iters, values): + self.start_iters = start_iters + self.values = values + assert len(values) == len(start_iters) + 1, (len(values), len(start_iters)) + + def __call__(self, i): + segment_i = bisect.bisect_right(self.start_iters, i) + return self.values[segment_i] + + +def get_ramp(kind='ladder', **kwargs): + if kind == 'linear': + return LinearRamp(**kwargs) + if kind == 'ladder': + return LadderRamp(**kwargs) + raise ValueError(f'Unexpected ramp kind: {kind}') + + +def print_traceback_handler(sig, frame): + LOGGER.warning(f'Received signal {sig}') + bt = ''.join(traceback.format_stack()) + LOGGER.warning(f'Requested stack trace:\n{bt}') + + +def register_debug_signal_handlers(sig=None, handler=print_traceback_handler): + LOGGER.warning(f'Setting signal {sig} handler {handler}') + signal.signal(sig, handler) + + +def handle_deterministic_config(config): + seed = dict(config).get('seed', None) + if seed is None: + return False + + seed_everything(seed) + return True + + +def get_shape(t): + if torch.is_tensor(t): + return tuple(t.shape) + elif isinstance(t, dict): + return {n: get_shape(q) for n, q in t.items()} + elif isinstance(t, (list, tuple)): + return [get_shape(q) for q in t] + elif isinstance(t, numbers.Number): + return type(t) + else: + raise ValueError('unexpected type {}'.format(type(t))) + + +def get_has_ddp_rank(): + master_port = os.environ.get('MASTER_PORT', None) + node_rank = os.environ.get('NODE_RANK', None) + local_rank = os.environ.get('LOCAL_RANK', None) + world_size = os.environ.get('WORLD_SIZE', None) + has_rank = master_port is not None or node_rank is not None or local_rank is not None or world_size is not None + return has_rank + + +def handle_ddp_subprocess(): + def main_decorator(main_func): + @functools.wraps(main_func) + def new_main(*args, **kwargs): + # Trainer sets MASTER_PORT, NODE_RANK, LOCAL_RANK, WORLD_SIZE + parent_cwd = os.environ.get('TRAINING_PARENT_WORK_DIR', None) + has_parent = parent_cwd is not None + has_rank = get_has_ddp_rank() + assert has_parent == has_rank, f'Inconsistent state: has_parent={has_parent}, has_rank={has_rank}' + + if has_parent: + # we are in the worker + sys.argv.extend([ + f'hydra.run.dir={parent_cwd}', + # 'hydra/hydra_logging=disabled', + # 'hydra/job_logging=disabled' + ]) + # do nothing if this is a top-level process + # TRAINING_PARENT_WORK_DIR is set in handle_ddp_parent_process after hydra initialization + + main_func(*args, **kwargs) + return new_main + return main_decorator + + +def handle_ddp_parent_process(): + parent_cwd = os.environ.get('TRAINING_PARENT_WORK_DIR', None) + has_parent = parent_cwd is not None + has_rank = get_has_ddp_rank() + assert has_parent == has_rank, f'Inconsistent state: has_parent={has_parent}, has_rank={has_rank}' + + if parent_cwd is None: + os.environ['TRAINING_PARENT_WORK_DIR'] = os.getcwd() + + return has_parent diff --git a/extensions-builtin/forge_preprocessor_inpaint/scripts/lama_config.yaml b/extensions-builtin/forge_preprocessor_inpaint/scripts/lama_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55fd91b5bcacd654e3045a2331e9c186818e6edc --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/scripts/lama_config.yaml @@ -0,0 +1,157 @@ +run_title: b18_ffc075_batch8x15 +training_model: + kind: default + visualize_each_iters: 1000 + concat_mask: true + store_discr_outputs_for_vis: true +losses: + l1: + weight_missing: 0 + weight_known: 10 + perceptual: + weight: 0 + adversarial: + kind: r1 + weight: 10 + gp_coef: 0.001 + mask_as_fake_target: true + allow_scale_mask: true + feature_matching: + weight: 100 + resnet_pl: + weight: 30 + weights_path: ${env:TORCH_HOME} + +optimizers: + generator: + kind: adam + lr: 0.001 + discriminator: + kind: adam + lr: 0.0001 +visualizer: + key_order: + - image + - predicted_image + - discr_output_fake + - discr_output_real + - inpainted + rescale_keys: + - discr_output_fake + - discr_output_real + kind: directory + outdir: /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples +location: + data_root_dir: /group-volume/User-Driven-Content-Generation/datasets/inpainting_data_root_large + out_root_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/experiments + tb_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/tb_logs +data: + batch_size: 15 + val_batch_size: 2 + num_workers: 3 + train: + indir: ${location.data_root_dir}/train + out_size: 256 + mask_gen_kwargs: + irregular_proba: 1 + irregular_kwargs: + max_angle: 4 + max_len: 200 + max_width: 100 + max_times: 5 + min_times: 1 + box_proba: 1 + box_kwargs: + margin: 10 + bbox_min_size: 30 + bbox_max_size: 150 + max_times: 3 + min_times: 1 + segm_proba: 0 + segm_kwargs: + confidence_threshold: 0.5 + max_object_area: 0.5 + min_mask_area: 0.07 + downsample_levels: 6 + num_variants_per_mask: 1 + rigidness_mode: 1 + max_foreground_coverage: 0.3 + max_foreground_intersection: 0.7 + max_mask_intersection: 0.1 + max_hidden_area: 0.1 + max_scale_change: 0.25 + horizontal_flip: true + max_vertical_shift: 0.2 + position_shuffle: true + transform_variant: distortions + dataloader_kwargs: + batch_size: ${data.batch_size} + shuffle: true + num_workers: ${data.num_workers} + val: + indir: ${location.data_root_dir}/val + img_suffix: .png + dataloader_kwargs: + batch_size: ${data.val_batch_size} + shuffle: false + num_workers: ${data.num_workers} + visual_test: + indir: ${location.data_root_dir}/korean_test + img_suffix: _input.png + pad_out_to_modulo: 32 + dataloader_kwargs: + batch_size: 1 + shuffle: false + num_workers: ${data.num_workers} +generator: + kind: ffc_resnet + input_nc: 4 + output_nc: 3 + ngf: 64 + n_downsampling: 3 + n_blocks: 18 + add_out_act: sigmoid + init_conv_kwargs: + ratio_gin: 0 + ratio_gout: 0 + enable_lfu: false + downsample_conv_kwargs: + ratio_gin: ${generator.init_conv_kwargs.ratio_gout} + ratio_gout: ${generator.downsample_conv_kwargs.ratio_gin} + enable_lfu: false + resnet_conv_kwargs: + ratio_gin: 0.75 + ratio_gout: ${generator.resnet_conv_kwargs.ratio_gin} + enable_lfu: false +discriminator: + kind: pix2pixhd_nlayer + input_nc: 3 + ndf: 64 + n_layers: 4 +evaluator: + kind: default + inpainted_key: inpainted + integral_kind: ssim_fid100_f1 +trainer: + kwargs: + gpus: -1 + accelerator: ddp + max_epochs: 200 + gradient_clip_val: 1 + log_gpu_memory: None + limit_train_batches: 25000 + val_check_interval: ${trainer.kwargs.limit_train_batches} + log_every_n_steps: 1000 + precision: 32 + terminate_on_nan: false + check_val_every_n_epoch: 1 + num_sanity_val_steps: 8 + limit_val_batches: 1000 + replace_sampler_ddp: false + checkpoint_kwargs: + verbose: true + save_top_k: 5 + save_last: true + period: 1 + monitor: val_ssim_fid100_f1_total_mean + mode: max diff --git a/extensions-builtin/forge_preprocessor_inpaint/scripts/preprocessor_inpaint.py b/extensions-builtin/forge_preprocessor_inpaint/scripts/preprocessor_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..1ccb65fa022d8d736bd6dd9b3a1bb533f41c9a47 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_inpaint/scripts/preprocessor_inpaint.py @@ -0,0 +1,167 @@ +import os +import cv2 +import torch +import numpy as np +import yaml +import einops + +from omegaconf import OmegaConf +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.forge_util import numpy_to_pytorch, resize_image_with_pad +from modules_forge.shared import preprocessor_dir, add_supported_preprocessor +from modules.modelloader import load_file_from_url +from annotator.lama.saicinpainting.training.trainers import load_checkpoint + + +class PreprocessorInpaint(Preprocessor): + def __init__(self): + super().__init__() + self.name = 'inpaint_global_harmonious' + self.tags = ['Inpaint'] + self.model_filename_filters = ['inpaint'] + self.slider_resolution = PreprocessorParameter(visible=False) + self.fill_mask_with_one_when_resize_and_fill = True + self.expand_mask_when_resize_and_fill = True + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + mask = mask.round() + mixed_cond = cond * (1.0 - mask) - mask + return mixed_cond, None + + +class PreprocessorInpaintOnly(PreprocessorInpaint): + def __init__(self): + super().__init__() + self.name = 'inpaint_only' + self.image = None + self.mask = None + self.latent = None + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + mask = mask.round() + self.image = cond + self.mask = mask + + vae = process.sd_model.forge_objects.vae + # This is a powerful VAE with integrated memory management, bf16, and tiled fallback. + + latent_image = vae.encode(self.image.movedim(1, -1)) + latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image) + + B, C, H, W = latent_image.shape + + latent_mask = self.mask + latent_mask = torch.nn.functional.interpolate(latent_mask, size=(H * 8, W * 8), mode="bilinear").round() + latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round().to(latent_image) + + unet = process.sd_model.forge_objects.unet.clone() + + def pre_cfg(model, c, uc, x, timestep, model_options): + noisy_latent = latent_image.to(x) + timestep[:, None, None, None].to(x) * torch.randn_like(latent_image).to(x) + x = x * latent_mask.to(x) + noisy_latent.to(x) * (1.0 - latent_mask.to(x)) + return model, c, uc, x, timestep, model_options + + def post_cfg(args): + denoised = args['denoised'] + denoised = denoised * latent_mask.to(denoised) + latent_image.to(denoised) * (1.0 - latent_mask.to(denoised)) + return denoised + + unet.add_sampler_pre_cfg_function(pre_cfg) + unet.set_model_sampler_post_cfg_function(post_cfg) + + process.sd_model.forge_objects.unet = unet + + self.latent = latent_image + + mixed_cond = cond * (1.0 - mask) - mask + + return mixed_cond, None + + def process_after_every_sampling(self, process, params, *args, **kwargs): + a1111_batch_result = args[0] + new_results = [] + + for img in a1111_batch_result.images: + sigma = 7 + mask = self.mask[0, 0].detach().cpu().numpy().astype(np.float32) + mask = cv2.dilate(mask, np.ones((sigma, sigma), dtype=np.uint8)) + mask = cv2.blur(mask, (sigma, sigma))[None] + mask = torch.from_numpy(np.ascontiguousarray(mask).copy()).to(img).clip(0, 1) + raw = self.image[0].to(img).clip(0, 1) + img = img.clip(0, 1) + new_results.append(raw * (1.0 - mask) + img * mask) + + a1111_batch_result.images = new_results + return + + +class PreprocessorInpaintLama(PreprocessorInpaintOnly): + def __init__(self): + super().__init__() + self.name = 'inpaint_only+lama' + + def load_model(self): + remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ControlNetLama.pth" + model_path = load_file_from_url(remote_model_path, model_dir=preprocessor_dir) + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lama_config.yaml') + cfg = yaml.safe_load(open(config_path, 'rt')) + cfg = OmegaConf.create(cfg) + cfg.training_model.predict_only = True + cfg.visualizer.kind = 'noop' + model = load_checkpoint(cfg, os.path.abspath(model_path), strict=False, map_location='cpu') + self.setup_model_patcher(model) + return + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, input_mask=None, **kwargs): + if input_mask is None: + return input_image + + H, W, C = input_image.shape + raw_color = input_image.copy() + raw_mask = input_mask.copy() + + input_image, remove_pad = resize_image_with_pad(input_image, 256) + input_mask, remove_pad = resize_image_with_pad(input_mask, 256) + input_mask = input_mask[..., :1] + + self.load_model() + + self.move_all_model_patchers_to_gpu() + + color = np.ascontiguousarray(input_image).astype(np.float32) / 255.0 + mask = np.ascontiguousarray(input_mask).astype(np.float32) / 255.0 + with torch.no_grad(): + color = self.send_tensor_to_model_device(torch.from_numpy(color)) + mask = self.send_tensor_to_model_device(torch.from_numpy(mask)) + mask = (mask > 0.5).float() + color = color * (1 - mask) + image_feed = torch.cat([color, mask], dim=2) + image_feed = einops.rearrange(image_feed, 'h w c -> 1 c h w') + prd_color = self.model_patcher.model(image_feed)[0] + prd_color = einops.rearrange(prd_color, 'c h w -> h w c') + prd_color = prd_color * mask + color * (1 - mask) + prd_color *= 255.0 + prd_color = prd_color.detach().cpu().numpy().clip(0, 255).astype(np.uint8) + + prd_color = remove_pad(prd_color) + prd_color = cv2.resize(prd_color, (W, H)) + + alpha = raw_mask.astype(np.float32) / 255.0 + fin_color = prd_color.astype(np.float32) * alpha + raw_color.astype(np.float32) * (1 - alpha) + fin_color = fin_color.clip(0, 255).astype(np.uint8) + + return fin_color + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + cond, mask = super().process_before_every_sampling(process, cond, mask, *args, **kwargs) + sigma_max = process.sd_model.forge_objects.unet.model.model_sampling.sigma_max + original_noise = kwargs['noise'] + process.modified_noise = original_noise + self.latent.to(original_noise) / sigma_max.to(original_noise) + return cond, mask + + +add_supported_preprocessor(PreprocessorInpaint()) + +add_supported_preprocessor(PreprocessorInpaintOnly()) + +add_supported_preprocessor(PreprocessorInpaintLama()) diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/model/__init__.py b/extensions-builtin/forge_preprocessor_marigold/marigold/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/model/marigold_pipeline.py b/extensions-builtin/forge_preprocessor_marigold/marigold/model/marigold_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..6a69b42e98f68260e5905e3f5d9c8f7afa679ae3 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/model/marigold_pipeline.py @@ -0,0 +1,313 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-11 + +import logging +from typing import Dict + +import numpy as np +import torch +from diffusers import ( + DDIMScheduler, + DDPMScheduler, + PNDMScheduler, + DEISMultistepScheduler, + SchedulerMixin, + UNet2DConditionModel, +) +from torch import nn +from torch.nn import Conv2d +from torch.nn.parameter import Parameter +from tqdm.auto import tqdm +from transformers import CLIPTextModel, CLIPTokenizer + +from .rgb_encoder import RGBEncoder +from .stacked_depth_AE import StackedDepthAE + + +class MarigoldPipeline(nn.Module): + """ + Marigold monocular depth estimator. + """ + + def __init__( + self, + unet_pretrained_path: Dict, # {path: xxx, subfolder: xxx} + rgb_encoder_pretrained_path: Dict, + depht_ae_pretrained_path: Dict, + noise_scheduler_pretrained_path: Dict, + tokenizer_pretrained_path: Dict, + text_encoder_pretrained_path: Dict, + empty_text_embed=None, + trainable_unet=False, + rgb_latent_scale_factor=0.18215, + depth_latent_scale_factor=0.18215, + noise_scheduler_type=None, + enable_gradient_checkpointing=False, + enable_xformers=True, + ) -> None: + super().__init__() + + self.rgb_latent_scale_factor = rgb_latent_scale_factor + self.depth_latent_scale_factor = depth_latent_scale_factor + self.device = "cpu" + + # ******* Initialize modules ******* + # Trainable modules + self.trainable_module_dic: Dict[str, nn.Module] = {} + self.trainable_unet = trainable_unet + + # Denoising UNet + self.unet: UNet2DConditionModel = UNet2DConditionModel.from_pretrained( + unet_pretrained_path["path"], subfolder=unet_pretrained_path["subfolder"] + ) + logging.info(f"pretrained UNet loaded from: {unet_pretrained_path}") + if 8 != self.unet.config["in_channels"]: + self._replace_unet_conv_in() + logging.warning("Unet conv_in layer is replaced") + if enable_xformers: + self.unet.enable_xformers_memory_efficient_attention() + else: + self.unet.disable_xformers_memory_efficient_attention() + + # Image encoder + self.rgb_encoder = RGBEncoder( + pretrained_path=rgb_encoder_pretrained_path["path"], + subfolder=rgb_encoder_pretrained_path["subfolder"], + ) + logging.info( + f"pretrained RGBEncoder loaded from: {rgb_encoder_pretrained_path}" + ) + self.rgb_encoder.requires_grad_(False) + + # Depth encoder-decoder + self.depth_ae = StackedDepthAE( + pretrained_path=depht_ae_pretrained_path["path"], + subfolder=depht_ae_pretrained_path["subfolder"], + ) + logging.info( + f"pretrained Depth Autoencoder loaded from: {rgb_encoder_pretrained_path}" + ) + + # Trainability + # unet + if self.trainable_unet: + self.unet.requires_grad_(True) + self.trainable_module_dic["unet"] = self.unet + logging.debug(f"UNet is set to trainable") + else: + self.unet.requires_grad_(False) + logging.debug(f"UNet is set to frozen") + + # Gradient checkpointing + if enable_gradient_checkpointing: + self.unet.enable_gradient_checkpointing() + self.depth_ae.vae.enable_gradient_checkpointing() + + # Noise scheduler + if "DDPMScheduler" == noise_scheduler_type: + self.noise_scheduler: SchedulerMixin = DDPMScheduler.from_pretrained( + noise_scheduler_pretrained_path["path"], + subfolder=noise_scheduler_pretrained_path["subfolder"], + ) + elif "DDIMScheduler" == noise_scheduler_type: + self.noise_scheduler: SchedulerMixin = DDIMScheduler.from_pretrained( + noise_scheduler_pretrained_path["path"], + subfolder=noise_scheduler_pretrained_path["subfolder"], + ) + elif "PNDMScheduler" == noise_scheduler_type: + self.noise_scheduler: SchedulerMixin = PNDMScheduler.from_pretrained( + noise_scheduler_pretrained_path["path"], + subfolder=noise_scheduler_pretrained_path["subfolder"], + ) + elif "DEISMultistepScheduler" == noise_scheduler_type: + self.noise_scheduler: SchedulerMixin = DEISMultistepScheduler.from_pretrained( + noise_scheduler_pretrained_path["path"], + subfolder=noise_scheduler_pretrained_path["subfolder"], + ) + else: + raise NotImplementedError + + # Text embed for empty prompt (always in CPU) + if empty_text_embed is None: + tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained( + tokenizer_pretrained_path["path"], + subfolder=tokenizer_pretrained_path["subfolder"], + ) + text_encoder: CLIPTextModel = CLIPTextModel.from_pretrained( + text_encoder_pretrained_path["path"], + subfolder=text_encoder_pretrained_path["subfolder"], + ) + with torch.no_grad(): + self.empty_text_embed = self._encode_text( + "", tokenizer, text_encoder + ).detach()#.to(dtype=precision) # [1, 2, 1024] + else: + self.empty_text_embed = empty_text_embed + + def from_pretrained(pretrained_path, **kwargs): + return __class__( + unet_pretrained_path={"path": pretrained_path, "subfolder": "unet"}, + rgb_encoder_pretrained_path={"path": pretrained_path, "subfolder": "vae"}, + depht_ae_pretrained_path={"path": pretrained_path, "subfolder": "vae"}, + noise_scheduler_pretrained_path={ + "path": pretrained_path, + "subfolder": "scheduler", + }, + tokenizer_pretrained_path={ + "path": pretrained_path, + "subfolder": "tokenizer", + }, + text_encoder_pretrained_path={ + "path": pretrained_path, + "subfolder": "text_encoder", + }, + **kwargs, + ) + + def _replace_unet_conv_in(self): + # Replace the first layer to accept 8 in_channels. Only applied when loading pretrained SD U-Net + _weight = self.unet.conv_in.weight.clone() # [320, 4, 3, 3] + _bias = self.unet.conv_in.bias.clone() # [320] + _weight = _weight.repeat((1, 2, 1, 1)) # Keep selected channel(s) + # half the activation magnitude + _weight *= 0.5 + _bias *= 0.5 + # new conv_in channel + _n_convin_out_channel = self.unet.conv_in.out_channels + _new_conv_in = Conv2d( + 8, _n_convin_out_channel, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) + ) + _new_conv_in.weight = Parameter(_weight) + _new_conv_in.bias = Parameter(_bias) + self.unet.conv_in = _new_conv_in + # replace config + self.unet.config["in_channels"] = 8 + return + + def to(self, device): + self.rgb_encoder.to(device) + self.depth_ae.to(device) + self.unet.to(device) + self.empty_text_embed = self.empty_text_embed.to(device) + self.device = device + return self + + def forward( + self, + rgb_in, + num_inference_steps: int = 50, + num_output_inter_results: int = 0, + show_pbar=False, + init_depth_latent=None, + return_depth_latent=False, + ): + device = rgb_in.device + precision = self.unet.dtype + # Set timesteps + self.noise_scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.noise_scheduler.timesteps # [T] + + # Encode image + rgb_latent = self.encode_rgb(rgb_in) + + # Initial depth map (noise) + if init_depth_latent is not None: + init_depth_latent = init_depth_latent.to(dtype=precision) + assert ( + init_depth_latent.shape == rgb_latent.shape + ), "initial depth latent should be the size of [B, 4, H/8, W/8]" + depth_latent = init_depth_latent + depth_latent = torch.randn(rgb_latent.shape, device=device, dtype=precision) + else: + depth_latent = torch.randn(rgb_latent.shape, device=device) # [B, 4, h, w] + + # Expand text embeding for batch + batch_empty_text_embed = self.empty_text_embed.repeat( + (rgb_latent.shape[0], 1, 1) + ).to(device=device, dtype=precision) # [B, 2, 1024] + + # Export intermediate denoising steps + if num_output_inter_results > 0: + depth_latent_ls = [] + inter_steps = [] + _idx = ( + -1 + * ( + np.arange(0, num_output_inter_results) + * num_inference_steps + / num_output_inter_results + ) + .round() + .astype(int) + - 1 + ) + steps_to_output = timesteps[_idx] + + # Denoising loop + if show_pbar: + iterable = tqdm(enumerate(timesteps), total=len(timesteps), leave=False, desc="denoising") + else: + iterable = enumerate(timesteps) + for i, t in iterable: + unet_input = torch.cat( + [rgb_latent, depth_latent], dim=1 + ) # this order is important + unet_input = unet_input.to(dtype=precision) + # predict the noise residual + noise_pred = self.unet( + unet_input, t, encoder_hidden_states=batch_empty_text_embed + ).sample # [B, 4, h, w] + # compute the previous noisy sample x_t -> x_t-1 + depth_latent = self.noise_scheduler.step( + noise_pred, t, depth_latent + ).prev_sample.to(dtype=precision) + + + if num_output_inter_results > 0 and t in steps_to_output: + depth_latent_ls.append(depth_latent.detach().clone()) + #depth_latent_ls = depth_latent_ls.to(dtype=precision) + inter_steps.append(t - 1) + + # Decode depth latent + if num_output_inter_results > 0: + assert 0 in inter_steps + depth = [self.decode_depth(lat) for lat in depth_latent_ls] + if return_depth_latent: + return depth, inter_steps, depth_latent_ls + else: + return depth, inter_steps + else: + depth = self.decode_depth(depth_latent) + if return_depth_latent: + return depth, depth_latent + else: + return depth + + def encode_rgb(self, rgb_in): + rgb_latent = self.rgb_encoder(rgb_in) # [B, 4, h, w] + rgb_latent = rgb_latent * self.rgb_latent_scale_factor + return rgb_latent + + def encode_depth(self, depth_in): + depth_latent = self.depth_ae.encode(depth_in) + depth_latent = depth_latent * self.depth_latent_scale_factor + return depth_latent + + def decode_depth(self, depth_latent): + #depth_latent = depth_latent.to(dtype=torch.float16) + depth_latent = depth_latent / self.depth_latent_scale_factor + depth = self.depth_ae.decode(depth_latent) # [B, 1, H, W] + return depth + + @staticmethod + def _encode_text(prompt, tokenizer, text_encoder): + text_inputs = tokenizer( + prompt, + padding="do_not_pad", + max_length=tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids.to(text_encoder.device) + text_embed = text_encoder(text_input_ids)[0] + return text_embed diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/model/rgb_encoder.py b/extensions-builtin/forge_preprocessor_marigold/marigold/model/rgb_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..ea22947136e7cece553b6309fba516c041b305b2 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/model/rgb_encoder.py @@ -0,0 +1,36 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-05 + +import torch +import torch.nn as nn +import logging +from diffusers import AutoencoderKL + + +class RGBEncoder(nn.Module): + """ + The encoder of pretrained Stable Diffusion VAE + """ + + def __init__(self, pretrained_path, subfolder=None) -> None: + super().__init__() + + vae: AutoencoderKL = AutoencoderKL.from_pretrained(pretrained_path, subfolder=subfolder) + logging.info(f"pretrained AutoencoderKL loaded from: {pretrained_path}") + + self.rgb_encoder = nn.Sequential( + vae.encoder, + vae.quant_conv, + ) + + def to(self, *args, **kwargs): + self.rgb_encoder.to(*args, **kwargs) + + def forward(self, rgb_in): + return self.encode(rgb_in) + + def encode(self, rgb_in): + moments = self.rgb_encoder(rgb_in) # [B, 8, H/8, W/8] + mean, logvar = torch.chunk(moments, 2, dim=1) + rgb_latent = mean + return rgb_latent \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/model/stacked_depth_AE.py b/extensions-builtin/forge_preprocessor_marigold/marigold/model/stacked_depth_AE.py new file mode 100644 index 0000000000000000000000000000000000000000..9b85155e2922366c0027815d3c07c6ac669ed017 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/model/stacked_depth_AE.py @@ -0,0 +1,52 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-05 + +import torch +import torch.nn as nn +import logging +from diffusers import AutoencoderKL + + +class StackedDepthAE(nn.Module): + """ + Tailored pretrained image VAE for depth map. + Encode: Depth images are repeated into 3 channels. + Decode: The average of 3 chennels are taken as output. + """ + + def __init__(self, pretrained_path, subfolder=None) -> None: + super().__init__() + + self.vae: AutoencoderKL = AutoencoderKL.from_pretrained(pretrained_path, subfolder=subfolder) + logging.info(f"pretrained AutoencoderKL loaded from: {pretrained_path}") + + def forward(self, depth_in): + depth_latent = self.encode(depth_in) + depth_out = self.decode(depth_latent) + return depth_out + + def to(self, *args, **kwargs): + self.vae.to(*args, **kwargs) + + @staticmethod + def _stack_depth_images(depth_in): + if 4 == len(depth_in.shape): + stacked = depth_in.repeat(1, 3, 1, 1) + elif 3 == len(depth_in.shape): + stacked = depth_in.unsqueeze(1) + stacked = depth_in.repeat(1, 3, 1, 1) + return stacked + + def encode(self, depth_in): + stacked = self._stack_depth_images(depth_in) + h = self.vae.encoder(stacked) + moments = self.vae.quant_conv(h) + mean, logvar = torch.chunk(moments, 2, dim=1) + depth_latent = mean + return depth_latent + + def decode(self, depth_latent): + z = self.vae.post_quant_conv(depth_latent) + stacked = self.vae.decoder(z) + depth_mean = stacked.mean(dim=1, keepdim=True) + return depth_mean \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/util/batchsize.py b/extensions-builtin/forge_preprocessor_marigold/marigold/util/batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..7740518ee4a0f4428add30e859895d51bd58fe3f --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/util/batchsize.py @@ -0,0 +1,38 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-11 + +import torch +import math + + +# Search table for suggested max. inference batch size +bs_search_table = [ + # tested on A100-PCIE-80GB + {"res": 768, "total_vram": 79, "bs": 35}, + {"res": 1024, "total_vram": 79, "bs": 20}, + # tested on A100-PCIE-40GB + {"res": 768, "total_vram": 39, "bs": 15}, + {"res": 1024, "total_vram": 39, "bs": 8}, + # tested on RTX3090, RTX4090 + {"res": 512, "total_vram": 23, "bs": 20}, + {"res": 768, "total_vram": 23, "bs": 7}, + {"res": 1024, "total_vram": 23, "bs": 3}, + # tested on GTX1080Ti + {"res": 512, "total_vram": 10, "bs": 5}, + {"res": 768, "total_vram": 10, "bs": 2}, +] + + + +def find_batch_size(n_repeat, input_res): + total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3 + + for settings in sorted(bs_search_table, key=lambda k: (k['res'], -k['total_vram'])): + if input_res <= settings['res'] and total_vram >= settings['total_vram']: + bs = settings['bs'] + if bs > n_repeat: + bs = n_repeat + elif bs > math.ceil(n_repeat / 2) and bs < n_repeat: + bs = math.ceil(n_repeat / 2) + return bs + return 1 \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/util/ensemble.py b/extensions-builtin/forge_preprocessor_marigold/marigold/util/ensemble.py new file mode 100644 index 0000000000000000000000000000000000000000..85c8f7720dfc1ebe74152fd515db28a0d8128fc6 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/util/ensemble.py @@ -0,0 +1,103 @@ +# Test align depth images +# Author: Bingxin Ke +# Last modified: 2023-12-11 + +import numpy as np +import torch + +from scipy.optimize import minimize + +def inter_distances(tensors): + """ + To calculate the distance between each two depth maps. + """ + distances = [] + for i, j in torch.combinations(torch.arange(tensors.shape[0])): + arr1 = tensors[i:i+1] + arr2 = tensors[j:j+1] + distances.append(arr1 - arr2) + dist = torch.concatenate(distances, dim=0) + return dist + + +def ensemble_depths(input_images, regularizer_strength=0.02, max_iter=2, tol=1e-3, reduction='median', max_res=None, disp=False, device='cuda'): + """ + To ensemble multiple affine-invariant depth images (up to scale and shift), + by aligning estimating the scale and shift + """ + device = input_images.device + original_input = input_images.clone() + n_img = input_images.shape[0] + ori_shape = input_images.shape + + if max_res is not None: + scale_factor = torch.min(max_res / torch.tensor(ori_shape[-2:])) + if scale_factor < 1: + downscaler = torch.nn.Upsample(scale_factor=scale_factor, mode='nearest') + input_images = downscaler(torch.from_numpy(input_images)).numpy() + + # init guess + _min = np.min(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) + _max = np.max(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) + s_init = 1.0 / (_max - _min).reshape((-1, 1, 1)) + t_init = (-1 * s_init.flatten() * _min.flatten()).reshape((-1, 1, 1)) + x = np.concatenate([s_init, t_init]).reshape(-1) + + input_images = input_images.to(device) + + # objective function + def closure(x): + x = x.astype(np.float32) + l = len(x) + s = x[:int(l/2)] + t = x[int(l/2):] + s = torch.from_numpy(s).to(device) + t = torch.from_numpy(t).to(device) + + transformed_arrays = input_images * s.view((-1, 1, 1)) + t.view((-1, 1, 1)) + dists = inter_distances(transformed_arrays) + sqrt_dist = torch.sqrt(torch.mean(dists**2)) + + if 'mean' == reduction: + pred = torch.mean(transformed_arrays, dim=0) + elif 'median' == reduction: + pred = torch.median(transformed_arrays, dim=0).values + else: + raise ValueError + + near_err = torch.sqrt((0 - torch.min(pred))**2) + far_err = torch.sqrt((1 - torch.max(pred))**2) + + err = sqrt_dist + (near_err + far_err) * regularizer_strength + err = err.detach().cpu().numpy() + return err + + res = minimize(closure, x, method='BFGS', tol=tol, options={'maxiter': max_iter, 'disp': disp}) + x = res.x + l = len(x) + s = x[:int(l/2)] + t = x[int(l/2):] + + # Prediction + s = torch.from_numpy(s).to(device) + t = torch.from_numpy(t).to(device) + transformed_arrays = original_input * s.view(-1, 1, 1) + t.view(-1, 1, 1) + if 'mean' == reduction: + aligned_images = torch.mean(transformed_arrays, dim=0) + std = torch.std(transformed_arrays, dim=0) + uncertainty = std + elif 'median' == reduction: + aligned_images = torch.median(transformed_arrays, dim=0).values + # MAD (median absolute deviation) as uncertainty indicator + abs_dev = torch.abs(transformed_arrays - aligned_images) + mad = torch.median(abs_dev, dim=0).values + uncertainty = mad + else: + raise ValueError + + # Scale and shift to [0, 1] + _min = torch.min(aligned_images) + _max = torch.max(aligned_images) + aligned_images = (aligned_images - _min) / (_max - _min) + uncertainty /= (_max - _min) + return aligned_images, uncertainty diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/util/image_util.py b/extensions-builtin/forge_preprocessor_marigold/marigold/util/image_util.py new file mode 100644 index 0000000000000000000000000000000000000000..6a06d52b7ebf7165fde4ea274f97b26c73af34b3 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/util/image_util.py @@ -0,0 +1,66 @@ + +import matplotlib +import numpy as np +import torch +from PIL import Image + +def colorize_depth_maps(depth_map, min_depth, max_depth, cmap='Spectral', valid_mask=None): + """ + Colorize depth maps. + """ + assert len(depth_map.shape) >= 2, "Invalid dimension" + + if isinstance(depth_map, torch.Tensor): + depth = depth_map.detach().clone().squeeze().numpy() + elif isinstance(depth_map, np.ndarray): + depth = depth_map.copy().squeeze() + # reshape to [ (B,) H, W ] + if depth.ndim < 3: + depth = depth[np.newaxis, :, :] + + # colorize + cm = matplotlib.colormaps[cmap] + depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1) + img_colored_np = cm(depth, bytes=False)[:,:,:,0:3] # value from 0 to 1 + img_colored_np = np.rollaxis(img_colored_np, 3, 1) + + if valid_mask is not None: + if isinstance(depth_map, torch.Tensor): + valid_mask = valid_mask.detach().numpy() + valid_mask = valid_mask.squeeze() # [H, W] or [B, H, W] + if valid_mask.ndim < 3: + valid_mask = valid_mask[np.newaxis, np.newaxis, :, :] + else: + valid_mask = valid_mask[:, np.newaxis, :, :] + valid_mask = np.repeat(valid_mask, 3, axis=1) + img_colored_np[~valid_mask] = 0 + + if isinstance(depth_map, torch.Tensor): + img_colored = torch.from_numpy(img_colored_np).float() + elif isinstance(depth_map, np.ndarray): + img_colored = img_colored_np + + return img_colored + + +def chw2hwc(chw): + assert 3 == len(chw.shape) + if isinstance(chw, torch.Tensor): + hwc = torch.permute(chw, (1, 2, 0)) + elif isinstance(chw, np.ndarray): + hwc = np.moveaxis(chw, 0, -1) + return hwc + + +def resize_max_res(img: Image.Image, max_edge_resolution): + original_width, original_height = img.size + downscale_factor = min(max_edge_resolution / original_width, max_edge_resolution / original_height) + + new_width = int(original_width * downscale_factor) + new_height = int(original_height * downscale_factor) + + resized_img = img.resize((new_width, new_height)) + return resized_img + + + \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_marigold/marigold/util/seed_all.py b/extensions-builtin/forge_preprocessor_marigold/marigold/util/seed_all.py new file mode 100644 index 0000000000000000000000000000000000000000..588ef7989bec25026cd6b2b2cfe74fad6966bdb5 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/marigold/util/seed_all.py @@ -0,0 +1,14 @@ + +import numpy as np +import random +import torch + + +def seed_all(seed: int = 0): + """ + Set random seeds of all components. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_marigold/scripts/preprocessor_marigold.py b/extensions-builtin/forge_preprocessor_marigold/scripts/preprocessor_marigold.py new file mode 100644 index 0000000000000000000000000000000000000000..a5b479a11b2a4601cc3c0e34e0e2ff8adfef09a2 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_marigold/scripts/preprocessor_marigold.py @@ -0,0 +1,68 @@ +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import preprocessor_dir, add_supported_preprocessor +from modules_forge.forge_util import resize_image_with_pad + + +import os +import torch +import numpy as np + +from marigold.model.marigold_pipeline import MarigoldPipeline +from huggingface_hub import snapshot_download +from modules_forge.diffusers_patcher import DiffusersModelPatcher +from modules_forge.forge_util import numpy_to_pytorch, HWC3 + + +class PreprocessorMarigold(Preprocessor): + def __init__(self): + super().__init__() + self.name = 'depth_marigold' + self.tags = ['Depth'] + self.model_filename_filters = ['depth'] + self.slider_resolution = PreprocessorParameter( + label='Resolution', minimum=128, maximum=2048, value=768, step=8, visible=True) + self.slider_1 = PreprocessorParameter(visible=False) + self.slider_2 = PreprocessorParameter(visible=False) + self.slider_3 = PreprocessorParameter(visible=False) + self.show_control_mode = True + self.do_not_need_model = False + self.sorting_priority = 100 # higher goes to top in the list + self.diffusers_patcher = None + + def load_model(self): + if self.model_patcher is not None: + return + + self.diffusers_patcher = DiffusersModelPatcher( + pipeline_class=MarigoldPipeline, + pretrained_path="Bingxin/Marigold", + enable_xformers=False, + noise_scheduler_type='DDIMScheduler') + + return + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + input_image, remove_pad = resize_image_with_pad(input_image, resolution) + + self.load_model() + + H, W, C = input_image.shape + + self.diffusers_patcher.prepare_memory_before_sampling( + batchsize=1, latent_width=W // 8, latent_height=H // 8 + ) + + with torch.no_grad(): + img = numpy_to_pytorch(input_image).movedim(-1, 1) + img = self.diffusers_patcher.move_tensor_to_current_device(img) + + img = img * 2.0 - 1.0 + depth = self.diffusers_patcher.pipeline(img, num_inference_steps=20, show_pbar=False) + depth = 0.5 - depth * 0.5 + depth = depth.movedim(1, -1)[0].cpu().numpy() + depth_image = HWC3((depth * 255.0).clip(0, 255).astype(np.uint8)) + + return remove_pad(depth_image) + + +add_supported_preprocessor(PreprocessorMarigold()) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/LICENSE b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..16a9d56a3d4c15e4f34ac5426459c58487b01520 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Caroline Chan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/__init__.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d094b07d8a888b39e0dd8858c69ebbc2fd1f0e --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/__init__.py @@ -0,0 +1,16 @@ +import torch + + +def load_checkpoint(fpath, model): + ckpt = torch.load(fpath, map_location='cpu')['model'] + + load_dict = {} + for k, v in ckpt.items(): + if k.startswith('module.'): + k_ = k.replace('module.', '') + load_dict[k_] = v + else: + load_dict[k] = v + + model.load_state_dict(load_dict) + return model diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/NNET.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/NNET.py new file mode 100644 index 0000000000000000000000000000000000000000..3ddbc50c3ac18aa4b7f16779fe3c0133981ecc7a --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/NNET.py @@ -0,0 +1,22 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .submodules.encoder import Encoder +from .submodules.decoder import Decoder + + +class NNET(nn.Module): + def __init__(self, args): + super(NNET, self).__init__() + self.encoder = Encoder() + self.decoder = Decoder(args) + + def get_1x_lr_params(self): # lr/10 learning rate + return self.encoder.parameters() + + def get_10x_lr_params(self): # lr learning rate + return self.decoder.parameters() + + def forward(self, img, **kwargs): + return self.decoder(self.encoder(img), **kwargs) \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/baseline.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/baseline.py new file mode 100644 index 0000000000000000000000000000000000000000..602d0fbdac1acc9ede9bc1f2e10a5df78831ce9d --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/baseline.py @@ -0,0 +1,85 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .submodules.submodules import UpSampleBN, norm_normalize + + +# This is the baseline encoder-decoder we used in the ablation study +class NNET(nn.Module): + def __init__(self, args=None): + super(NNET, self).__init__() + self.encoder = Encoder() + self.decoder = Decoder(num_classes=4) + + def forward(self, x, **kwargs): + out = self.decoder(self.encoder(x), **kwargs) + + # Bilinearly upsample the output to match the input resolution + up_out = F.interpolate(out, size=[x.size(2), x.size(3)], mode='bilinear', align_corners=False) + + # L2-normalize the first three channels / ensure positive value for concentration parameters (kappa) + up_out = norm_normalize(up_out) + return up_out + + def get_1x_lr_params(self): # lr/10 learning rate + return self.encoder.parameters() + + def get_10x_lr_params(self): # lr learning rate + modules = [self.decoder] + for m in modules: + yield from m.parameters() + + +# Encoder +class Encoder(nn.Module): + def __init__(self): + super(Encoder, self).__init__() + + basemodel_name = 'tf_efficientnet_b5_ap' + basemodel = torch.hub.load('rwightman/gen-efficientnet-pytorch', basemodel_name, pretrained=True) + + # Remove last layer + basemodel.global_pool = nn.Identity() + basemodel.classifier = nn.Identity() + + self.original_model = basemodel + + def forward(self, x): + features = [x] + for k, v in self.original_model._modules.items(): + if (k == 'blocks'): + for ki, vi in v._modules.items(): + features.append(vi(features[-1])) + else: + features.append(v(features[-1])) + return features + + +# Decoder (no pixel-wise MLP, no uncertainty-guided sampling) +class Decoder(nn.Module): + def __init__(self, num_classes=4): + super(Decoder, self).__init__() + self.conv2 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) + self.up1 = UpSampleBN(skip_input=2048 + 176, output_features=1024) + self.up2 = UpSampleBN(skip_input=1024 + 64, output_features=512) + self.up3 = UpSampleBN(skip_input=512 + 40, output_features=256) + self.up4 = UpSampleBN(skip_input=256 + 24, output_features=128) + self.conv3 = nn.Conv2d(128, num_classes, kernel_size=3, stride=1, padding=1) + + def forward(self, features): + x_block0, x_block1, x_block2, x_block3, x_block4 = features[4], features[5], features[6], features[8], features[11] + x_d0 = self.conv2(x_block4) + x_d1 = self.up1(x_d0, x_block3) + x_d2 = self.up2(x_d1, x_block2) + x_d3 = self.up3(x_d2, x_block1) + x_d4 = self.up4(x_d3, x_block0) + out = self.conv3(x_d4) + return out + + +if __name__ == '__main__': + model = Baseline() + x = torch.rand(2, 3, 480, 640) + out = model(x) + print(out.shape) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/decoder.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..993203d1792311f1c492091eaea3c1ac9088187f --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/decoder.py @@ -0,0 +1,202 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from .submodules import UpSampleBN, UpSampleGN, norm_normalize, sample_points + + +class Decoder(nn.Module): + def __init__(self, args): + super(Decoder, self).__init__() + + # hyper-parameter for sampling + self.sampling_ratio = args.sampling_ratio + self.importance_ratio = args.importance_ratio + + # feature-map + self.conv2 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) + if args.architecture == 'BN': + self.up1 = UpSampleBN(skip_input=2048 + 176, output_features=1024) + self.up2 = UpSampleBN(skip_input=1024 + 64, output_features=512) + self.up3 = UpSampleBN(skip_input=512 + 40, output_features=256) + self.up4 = UpSampleBN(skip_input=256 + 24, output_features=128) + + elif args.architecture == 'GN': + self.up1 = UpSampleGN(skip_input=2048 + 176, output_features=1024) + self.up2 = UpSampleGN(skip_input=1024 + 64, output_features=512) + self.up3 = UpSampleGN(skip_input=512 + 40, output_features=256) + self.up4 = UpSampleGN(skip_input=256 + 24, output_features=128) + + else: + raise Exception('invalid architecture') + + # produces 1/8 res output + self.out_conv_res8 = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + + # produces 1/4 res output + self.out_conv_res4 = nn.Sequential( + nn.Conv1d(512 + 4, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 4, kernel_size=1), + ) + + # produces 1/2 res output + self.out_conv_res2 = nn.Sequential( + nn.Conv1d(256 + 4, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 4, kernel_size=1), + ) + + # produces 1/1 res output + self.out_conv_res1 = nn.Sequential( + nn.Conv1d(128 + 4, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(), + nn.Conv1d(128, 4, kernel_size=1), + ) + + def forward(self, features, gt_norm_mask=None, mode='test'): + x_block0, x_block1, x_block2, x_block3, x_block4 = features[4], features[5], features[6], features[8], features[11] + + # generate feature-map + + x_d0 = self.conv2(x_block4) # x_d0 : [2, 2048, 15, 20] 1/32 res + x_d1 = self.up1(x_d0, x_block3) # x_d1 : [2, 1024, 30, 40] 1/16 res + x_d2 = self.up2(x_d1, x_block2) # x_d2 : [2, 512, 60, 80] 1/8 res + x_d3 = self.up3(x_d2, x_block1) # x_d3: [2, 256, 120, 160] 1/4 res + x_d4 = self.up4(x_d3, x_block0) # x_d4: [2, 128, 240, 320] 1/2 res + + # 1/8 res output + out_res8 = self.out_conv_res8(x_d2) # out_res8: [2, 4, 60, 80] 1/8 res output + out_res8 = norm_normalize(out_res8) # out_res8: [2, 4, 60, 80] 1/8 res output + + ################################################################################################################ + # out_res4 + ################################################################################################################ + + if mode == 'train': + # upsampling ... out_res8: [2, 4, 60, 80] -> out_res8_res4: [2, 4, 120, 160] + out_res8_res4 = F.interpolate(out_res8, scale_factor=2, mode='bilinear', align_corners=True) + B, _, H, W = out_res8_res4.shape + + # samples: [B, 1, N, 2] + point_coords_res4, rows_int, cols_int = sample_points(out_res8_res4.detach(), gt_norm_mask, + sampling_ratio=self.sampling_ratio, + beta=self.importance_ratio) + + # output (needed for evaluation / visualization) + out_res4 = out_res8_res4 + + # grid_sample feature-map + feat_res4 = F.grid_sample(x_d2, point_coords_res4, mode='bilinear', align_corners=True) # (B, 512, 1, N) + init_pred = F.grid_sample(out_res8, point_coords_res4, mode='bilinear', align_corners=True) # (B, 4, 1, N) + feat_res4 = torch.cat([feat_res4, init_pred], dim=1) # (B, 512+4, 1, N) + + # prediction (needed to compute loss) + samples_pred_res4 = self.out_conv_res4(feat_res4[:, :, 0, :]) # (B, 4, N) + samples_pred_res4 = norm_normalize(samples_pred_res4) # (B, 4, N) - normalized + + for i in range(B): + out_res4[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res4[i, :, :] + + else: + # grid_sample feature-map + feat_map = F.interpolate(x_d2, scale_factor=2, mode='bilinear', align_corners=True) + init_pred = F.interpolate(out_res8, scale_factor=2, mode='bilinear', align_corners=True) + feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W) + B, _, H, W = feat_map.shape + + # try all pixels + out_res4 = self.out_conv_res4(feat_map.view(B, 512 + 4, -1)) # (B, 4, N) + out_res4 = norm_normalize(out_res4) # (B, 4, N) - normalized + out_res4 = out_res4.view(B, 4, H, W) + samples_pred_res4 = point_coords_res4 = None + + ################################################################################################################ + # out_res2 + ################################################################################################################ + + if mode == 'train': + + # upsampling ... out_res4: [2, 4, 120, 160] -> out_res4_res2: [2, 4, 240, 320] + out_res4_res2 = F.interpolate(out_res4, scale_factor=2, mode='bilinear', align_corners=True) + B, _, H, W = out_res4_res2.shape + + # samples: [B, 1, N, 2] + point_coords_res2, rows_int, cols_int = sample_points(out_res4_res2.detach(), gt_norm_mask, + sampling_ratio=self.sampling_ratio, + beta=self.importance_ratio) + + # output (needed for evaluation / visualization) + out_res2 = out_res4_res2 + + # grid_sample feature-map + feat_res2 = F.grid_sample(x_d3, point_coords_res2, mode='bilinear', align_corners=True) # (B, 256, 1, N) + init_pred = F.grid_sample(out_res4, point_coords_res2, mode='bilinear', align_corners=True) # (B, 4, 1, N) + feat_res2 = torch.cat([feat_res2, init_pred], dim=1) # (B, 256+4, 1, N) + + # prediction (needed to compute loss) + samples_pred_res2 = self.out_conv_res2(feat_res2[:, :, 0, :]) # (B, 4, N) + samples_pred_res2 = norm_normalize(samples_pred_res2) # (B, 4, N) - normalized + + for i in range(B): + out_res2[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res2[i, :, :] + + else: + # grid_sample feature-map + feat_map = F.interpolate(x_d3, scale_factor=2, mode='bilinear', align_corners=True) + init_pred = F.interpolate(out_res4, scale_factor=2, mode='bilinear', align_corners=True) + feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W) + B, _, H, W = feat_map.shape + + out_res2 = self.out_conv_res2(feat_map.view(B, 256 + 4, -1)) # (B, 4, N) + out_res2 = norm_normalize(out_res2) # (B, 4, N) - normalized + out_res2 = out_res2.view(B, 4, H, W) + samples_pred_res2 = point_coords_res2 = None + + ################################################################################################################ + # out_res1 + ################################################################################################################ + + if mode == 'train': + # upsampling ... out_res4: [2, 4, 120, 160] -> out_res4_res2: [2, 4, 240, 320] + out_res2_res1 = F.interpolate(out_res2, scale_factor=2, mode='bilinear', align_corners=True) + B, _, H, W = out_res2_res1.shape + + # samples: [B, 1, N, 2] + point_coords_res1, rows_int, cols_int = sample_points(out_res2_res1.detach(), gt_norm_mask, + sampling_ratio=self.sampling_ratio, + beta=self.importance_ratio) + + # output (needed for evaluation / visualization) + out_res1 = out_res2_res1 + + # grid_sample feature-map + feat_res1 = F.grid_sample(x_d4, point_coords_res1, mode='bilinear', align_corners=True) # (B, 128, 1, N) + init_pred = F.grid_sample(out_res2, point_coords_res1, mode='bilinear', align_corners=True) # (B, 4, 1, N) + feat_res1 = torch.cat([feat_res1, init_pred], dim=1) # (B, 128+4, 1, N) + + # prediction (needed to compute loss) + samples_pred_res1 = self.out_conv_res1(feat_res1[:, :, 0, :]) # (B, 4, N) + samples_pred_res1 = norm_normalize(samples_pred_res1) # (B, 4, N) - normalized + + for i in range(B): + out_res1[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res1[i, :, :] + + else: + # grid_sample feature-map + feat_map = F.interpolate(x_d4, scale_factor=2, mode='bilinear', align_corners=True) + init_pred = F.interpolate(out_res2, scale_factor=2, mode='bilinear', align_corners=True) + feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W) + B, _, H, W = feat_map.shape + + out_res1 = self.out_conv_res1(feat_map.view(B, 128 + 4, -1)) # (B, 4, N) + out_res1 = norm_normalize(out_res1) # (B, 4, N) - normalized + out_res1 = out_res1.view(B, 4, H, W) + samples_pred_res1 = point_coords_res1 = None + + return [out_res8, out_res4, out_res2, out_res1], \ + [out_res8, samples_pred_res4, samples_pred_res2, samples_pred_res1], \ + [None, point_coords_res4, point_coords_res2, point_coords_res1] + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/.gitignore b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f04e5fff91094d9b9c662bba977d762bf71516ac --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/.gitignore @@ -0,0 +1,109 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# pytorch stuff +*.pth +*.onnx +*.pb + +trained_models/ +.fuse_hidden* diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/BENCHMARK.md b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/BENCHMARK.md new file mode 100644 index 0000000000000000000000000000000000000000..6ead7171ce5a5bbd2702f6b5c825dc9808ba5658 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/BENCHMARK.md @@ -0,0 +1,555 @@ +# Model Performance Benchmarks + +All benchmarks run as per: + +``` +python onnx_export.py --model mobilenetv3_100 ./mobilenetv3_100.onnx +python onnx_optimize.py ./mobilenetv3_100.onnx --output mobilenetv3_100-opt.onnx +python onnx_to_caffe.py ./mobilenetv3_100.onnx --c2-prefix mobilenetv3 +python onnx_to_caffe.py ./mobilenetv3_100-opt.onnx --c2-prefix mobilenetv3-opt +python caffe2_benchmark.py --c2-init ./mobilenetv3.init.pb --c2-predict ./mobilenetv3.predict.pb +python caffe2_benchmark.py --c2-init ./mobilenetv3-opt.init.pb --c2-predict ./mobilenetv3-opt.predict.pb +``` + +## EfficientNet-B0 + +### Unoptimized +``` +Main run finished. Milliseconds per iter: 49.2862. Iters per second: 20.2897 +Time per operator type: + 29.7378 ms. 60.5145%. Conv + 12.1785 ms. 24.7824%. Sigmoid + 3.62811 ms. 7.38297%. SpatialBN + 2.98444 ms. 6.07314%. Mul + 0.326902 ms. 0.665225%. AveragePool + 0.197317 ms. 0.401528%. FC + 0.0852877 ms. 0.173555%. Add + 0.0032607 ms. 0.00663532%. Squeeze + 49.1416 ms in Total +FLOP per operator type: + 0.76907 GFLOP. 95.2696%. Conv + 0.0269508 GFLOP. 3.33857%. SpatialBN + 0.00846444 GFLOP. 1.04855%. Mul + 0.002561 GFLOP. 0.317248%. FC + 0.000210112 GFLOP. 0.0260279%. Add + 0.807256 GFLOP in Total +Feature Memory Read per operator type: + 58.5253 MB. 43.0891%. Mul + 43.2015 MB. 31.807%. Conv + 27.2869 MB. 20.0899%. SpatialBN + 5.12912 MB. 3.77631%. FC + 1.6809 MB. 1.23756%. Add + 135.824 MB in Total +Feature Memory Written per operator type: + 33.8578 MB. 38.1965%. Mul + 26.9881 MB. 30.4465%. Conv + 26.9508 MB. 30.4044%. SpatialBN + 0.840448 MB. 0.948147%. Add + 0.004 MB. 0.00451258%. FC + 88.6412 MB in Total +Parameter Memory per operator type: + 15.8248 MB. 74.9391%. Conv + 5.124 MB. 24.265%. FC + 0.168064 MB. 0.795877%. SpatialBN + 0 MB. 0%. Add + 0 MB. 0%. Mul + 21.1168 MB in Total +``` +### Optimized +``` +Main run finished. Milliseconds per iter: 46.0838. Iters per second: 21.6996 +Time per operator type: + 29.776 ms. 65.002%. Conv + 12.2803 ms. 26.8084%. Sigmoid + 3.15073 ms. 6.87815%. Mul + 0.328651 ms. 0.717456%. AveragePool + 0.186237 ms. 0.406563%. FC + 0.0832429 ms. 0.181722%. Add + 0.0026184 ms. 0.00571606%. Squeeze + 45.8078 ms in Total +FLOP per operator type: + 0.76907 GFLOP. 98.5601%. Conv + 0.00846444 GFLOP. 1.08476%. Mul + 0.002561 GFLOP. 0.328205%. FC + 0.000210112 GFLOP. 0.0269269%. Add + 0.780305 GFLOP in Total +Feature Memory Read per operator type: + 58.5253 MB. 53.8803%. Mul + 43.2855 MB. 39.8501%. Conv + 5.12912 MB. 4.72204%. FC + 1.6809 MB. 1.54749%. Add + 108.621 MB in Total +Feature Memory Written per operator type: + 33.8578 MB. 54.8834%. Mul + 26.9881 MB. 43.7477%. Conv + 0.840448 MB. 1.36237%. Add + 0.004 MB. 0.00648399%. FC + 61.6904 MB in Total +Parameter Memory per operator type: + 15.8248 MB. 75.5403%. Conv + 5.124 MB. 24.4597%. FC + 0 MB. 0%. Add + 0 MB. 0%. Mul + 20.9488 MB in Total +``` + +## EfficientNet-B1 +### Optimized +``` +Main run finished. Milliseconds per iter: 71.8102. Iters per second: 13.9256 +Time per operator type: + 45.7915 ms. 66.3206%. Conv + 17.8718 ms. 25.8841%. Sigmoid + 4.44132 ms. 6.43244%. Mul + 0.51001 ms. 0.738658%. AveragePool + 0.233283 ms. 0.337868%. Add + 0.194986 ms. 0.282402%. FC + 0.00268255 ms. 0.00388519%. Squeeze + 69.0456 ms in Total +FLOP per operator type: + 1.37105 GFLOP. 98.7673%. Conv + 0.0138759 GFLOP. 0.99959%. Mul + 0.002561 GFLOP. 0.184489%. FC + 0.000674432 GFLOP. 0.0485847%. Add + 1.38816 GFLOP in Total +Feature Memory Read per operator type: + 94.624 MB. 54.0789%. Mul + 69.8255 MB. 39.9062%. Conv + 5.39546 MB. 3.08357%. Add + 5.12912 MB. 2.93136%. FC + 174.974 MB in Total +Feature Memory Written per operator type: + 55.5035 MB. 54.555%. Mul + 43.5333 MB. 42.7894%. Conv + 2.69773 MB. 2.65163%. Add + 0.004 MB. 0.00393165%. FC + 101.739 MB in Total +Parameter Memory per operator type: + 25.7479 MB. 83.4024%. Conv + 5.124 MB. 16.5976%. FC + 0 MB. 0%. Add + 0 MB. 0%. Mul + 30.8719 MB in Total +``` + +## EfficientNet-B2 +### Optimized +``` +Main run finished. Milliseconds per iter: 92.28. Iters per second: 10.8366 +Time per operator type: + 61.4627 ms. 67.5845%. Conv + 22.7458 ms. 25.0113%. Sigmoid + 5.59931 ms. 6.15701%. Mul + 0.642567 ms. 0.706568%. AveragePool + 0.272795 ms. 0.299965%. Add + 0.216178 ms. 0.237709%. FC + 0.00268895 ms. 0.00295677%. Squeeze + 90.942 ms in Total +FLOP per operator type: + 1.98431 GFLOP. 98.9343%. Conv + 0.0177039 GFLOP. 0.882686%. Mul + 0.002817 GFLOP. 0.140451%. FC + 0.000853984 GFLOP. 0.0425782%. Add + 2.00568 GFLOP in Total +Feature Memory Read per operator type: + 120.609 MB. 54.9637%. Mul + 86.3512 MB. 39.3519%. Conv + 6.83187 MB. 3.11341%. Add + 5.64163 MB. 2.571%. FC + 219.433 MB in Total +Feature Memory Written per operator type: + 70.8155 MB. 54.6573%. Mul + 55.3273 MB. 42.7031%. Conv + 3.41594 MB. 2.63651%. Add + 0.004 MB. 0.00308731%. FC + 129.563 MB in Total +Parameter Memory per operator type: + 30.4721 MB. 84.3913%. Conv + 5.636 MB. 15.6087%. FC + 0 MB. 0%. Add + 0 MB. 0%. Mul + 36.1081 MB in Total +``` + +## MixNet-M +### Optimized +``` +Main run finished. Milliseconds per iter: 63.1122. Iters per second: 15.8448 +Time per operator type: + 48.1139 ms. 75.2052%. Conv + 7.1341 ms. 11.1511%. Sigmoid + 2.63706 ms. 4.12189%. SpatialBN + 1.73186 ms. 2.70701%. Mul + 1.38707 ms. 2.16809%. Split + 1.29322 ms. 2.02139%. Concat + 1.00093 ms. 1.56452%. Relu + 0.235309 ms. 0.367803%. Add + 0.221579 ms. 0.346343%. FC + 0.219315 ms. 0.342803%. AveragePool + 0.00250145 ms. 0.00390993%. Squeeze + 63.9768 ms in Total +FLOP per operator type: + 0.675273 GFLOP. 95.5827%. Conv + 0.0221072 GFLOP. 3.12921%. SpatialBN + 0.00538445 GFLOP. 0.762152%. Mul + 0.003073 GFLOP. 0.434973%. FC + 0.000642488 GFLOP. 0.0909421%. Add + 0 GFLOP. 0%. Concat + 0 GFLOP. 0%. Relu + 0.70648 GFLOP in Total +Feature Memory Read per operator type: + 46.8424 MB. 30.502%. Conv + 36.8626 MB. 24.0036%. Mul + 22.3152 MB. 14.5309%. SpatialBN + 22.1074 MB. 14.3955%. Concat + 14.1496 MB. 9.21372%. Relu + 6.15414 MB. 4.00735%. FC + 5.1399 MB. 3.34692%. Add + 153.571 MB in Total +Feature Memory Written per operator type: + 32.7672 MB. 28.4331%. Conv + 22.1072 MB. 19.1831%. Concat + 22.1072 MB. 19.1831%. SpatialBN + 21.5378 MB. 18.689%. Mul + 14.1496 MB. 12.2781%. Relu + 2.56995 MB. 2.23003%. Add + 0.004 MB. 0.00347092%. FC + 115.243 MB in Total +Parameter Memory per operator type: + 13.7059 MB. 68.674%. Conv + 6.148 MB. 30.8049%. FC + 0.104 MB. 0.521097%. SpatialBN + 0 MB. 0%. Add + 0 MB. 0%. Concat + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 19.9579 MB in Total +``` + +## TF MobileNet-V3 Large 1.0 + +### Optimized +``` +Main run finished. Milliseconds per iter: 22.0495. Iters per second: 45.3525 +Time per operator type: + 17.437 ms. 80.0087%. Conv + 1.27662 ms. 5.8577%. Add + 1.12759 ms. 5.17387%. Div + 0.701155 ms. 3.21721%. Mul + 0.562654 ms. 2.58171%. Relu + 0.431144 ms. 1.97828%. Clip + 0.156902 ms. 0.719936%. FC + 0.0996858 ms. 0.457402%. AveragePool + 0.00112455 ms. 0.00515993%. Flatten + 21.7939 ms in Total +FLOP per operator type: + 0.43062 GFLOP. 98.1484%. Conv + 0.002561 GFLOP. 0.583713%. FC + 0.00210867 GFLOP. 0.480616%. Mul + 0.00193868 GFLOP. 0.441871%. Add + 0.00151532 GFLOP. 0.345377%. Div + 0 GFLOP. 0%. Relu + 0.438743 GFLOP in Total +Feature Memory Read per operator type: + 34.7967 MB. 43.9391%. Conv + 14.496 MB. 18.3046%. Mul + 9.44828 MB. 11.9307%. Add + 9.26157 MB. 11.6949%. Relu + 6.0614 MB. 7.65395%. Div + 5.12912 MB. 6.47673%. FC + 79.193 MB in Total +Feature Memory Written per operator type: + 17.6247 MB. 35.8656%. Conv + 9.26157 MB. 18.847%. Relu + 8.43469 MB. 17.1643%. Mul + 7.75472 MB. 15.7806%. Add + 6.06128 MB. 12.3345%. Div + 0.004 MB. 0.00813985%. FC + 49.1409 MB in Total +Parameter Memory per operator type: + 16.6851 MB. 76.5052%. Conv + 5.124 MB. 23.4948%. FC + 0 MB. 0%. Add + 0 MB. 0%. Div + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 21.8091 MB in Total +``` + +## MobileNet-V3 (RW) + +### Unoptimized +``` +Main run finished. Milliseconds per iter: 24.8316. Iters per second: 40.2712 +Time per operator type: + 15.9266 ms. 69.2624%. Conv + 2.36551 ms. 10.2873%. SpatialBN + 1.39102 ms. 6.04936%. Add + 1.30327 ms. 5.66773%. Div + 0.737014 ms. 3.20517%. Mul + 0.639697 ms. 2.78195%. Relu + 0.375681 ms. 1.63378%. Clip + 0.153126 ms. 0.665921%. FC + 0.0993787 ms. 0.432184%. AveragePool + 0.0032632 ms. 0.0141912%. Squeeze + 22.9946 ms in Total +FLOP per operator type: + 0.430616 GFLOP. 94.4041%. Conv + 0.0175992 GFLOP. 3.85829%. SpatialBN + 0.002561 GFLOP. 0.561449%. FC + 0.00210961 GFLOP. 0.46249%. Mul + 0.00173891 GFLOP. 0.381223%. Add + 0.00151626 GFLOP. 0.33241%. Div + 0 GFLOP. 0%. Relu + 0.456141 GFLOP in Total +Feature Memory Read per operator type: + 34.7354 MB. 36.4363%. Conv + 17.7944 MB. 18.6658%. SpatialBN + 14.5035 MB. 15.2137%. Mul + 9.25778 MB. 9.71113%. Relu + 7.84641 MB. 8.23064%. Add + 6.06516 MB. 6.36216%. Div + 5.12912 MB. 5.38029%. FC + 95.3317 MB in Total +Feature Memory Written per operator type: + 17.6246 MB. 26.7264%. Conv + 17.5992 MB. 26.6878%. SpatialBN + 9.25778 MB. 14.0387%. Relu + 8.43843 MB. 12.7962%. Mul + 6.95565 MB. 10.5477%. Add + 6.06502 MB. 9.19713%. Div + 0.004 MB. 0.00606568%. FC + 65.9447 MB in Total +Parameter Memory per operator type: + 16.6778 MB. 76.1564%. Conv + 5.124 MB. 23.3979%. FC + 0.0976 MB. 0.445674%. SpatialBN + 0 MB. 0%. Add + 0 MB. 0%. Div + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 21.8994 MB in Total + +``` +### Optimized + +``` +Main run finished. Milliseconds per iter: 22.0981. Iters per second: 45.2527 +Time per operator type: + 17.146 ms. 78.8965%. Conv + 1.38453 ms. 6.37084%. Add + 1.30991 ms. 6.02749%. Div + 0.685417 ms. 3.15391%. Mul + 0.532589 ms. 2.45068%. Relu + 0.418263 ms. 1.92461%. Clip + 0.15128 ms. 0.696106%. FC + 0.102065 ms. 0.469648%. AveragePool + 0.0022143 ms. 0.010189%. Squeeze + 21.7323 ms in Total +FLOP per operator type: + 0.430616 GFLOP. 98.1927%. Conv + 0.002561 GFLOP. 0.583981%. FC + 0.00210961 GFLOP. 0.481051%. Mul + 0.00173891 GFLOP. 0.396522%. Add + 0.00151626 GFLOP. 0.34575%. Div + 0 GFLOP. 0%. Relu + 0.438542 GFLOP in Total +Feature Memory Read per operator type: + 34.7842 MB. 44.833%. Conv + 14.5035 MB. 18.6934%. Mul + 9.25778 MB. 11.9323%. Relu + 7.84641 MB. 10.1132%. Add + 6.06516 MB. 7.81733%. Div + 5.12912 MB. 6.61087%. FC + 77.5861 MB in Total +Feature Memory Written per operator type: + 17.6246 MB. 36.4556%. Conv + 9.25778 MB. 19.1492%. Relu + 8.43843 MB. 17.4544%. Mul + 6.95565 MB. 14.3874%. Add + 6.06502 MB. 12.5452%. Div + 0.004 MB. 0.00827378%. FC + 48.3455 MB in Total +Parameter Memory per operator type: + 16.6778 MB. 76.4973%. Conv + 5.124 MB. 23.5027%. FC + 0 MB. 0%. Add + 0 MB. 0%. Div + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 21.8018 MB in Total + +``` + +## MnasNet-A1 + +### Unoptimized +``` +Main run finished. Milliseconds per iter: 30.0892. Iters per second: 33.2345 +Time per operator type: + 24.4656 ms. 79.0905%. Conv + 4.14958 ms. 13.4144%. SpatialBN + 1.60598 ms. 5.19169%. Relu + 0.295219 ms. 0.95436%. Mul + 0.187609 ms. 0.606486%. FC + 0.120556 ms. 0.389724%. AveragePool + 0.09036 ms. 0.292109%. Add + 0.015727 ms. 0.050841%. Sigmoid + 0.00306205 ms. 0.00989875%. Squeeze + 30.9337 ms in Total +FLOP per operator type: + 0.620598 GFLOP. 95.6434%. Conv + 0.0248873 GFLOP. 3.8355%. SpatialBN + 0.002561 GFLOP. 0.394688%. FC + 0.000597408 GFLOP. 0.0920695%. Mul + 0.000222656 GFLOP. 0.0343146%. Add + 0 GFLOP. 0%. Relu + 0.648867 GFLOP in Total +Feature Memory Read per operator type: + 35.5457 MB. 38.4109%. Conv + 25.1552 MB. 27.1829%. SpatialBN + 22.5235 MB. 24.339%. Relu + 5.12912 MB. 5.54256%. FC + 2.40586 MB. 2.59978%. Mul + 1.78125 MB. 1.92483%. Add + 92.5406 MB in Total +Feature Memory Written per operator type: + 24.9042 MB. 32.9424%. Conv + 24.8873 MB. 32.92%. SpatialBN + 22.5235 MB. 29.7932%. Relu + 2.38963 MB. 3.16092%. Mul + 0.890624 MB. 1.17809%. Add + 0.004 MB. 0.00529106%. FC + 75.5993 MB in Total +Parameter Memory per operator type: + 10.2732 MB. 66.1459%. Conv + 5.124 MB. 32.9917%. FC + 0.133952 MB. 0.86247%. SpatialBN + 0 MB. 0%. Add + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 15.5312 MB in Total +``` + +### Optimized +``` +Main run finished. Milliseconds per iter: 24.2367. Iters per second: 41.2597 +Time per operator type: + 22.0547 ms. 91.1375%. Conv + 1.49096 ms. 6.16116%. Relu + 0.253417 ms. 1.0472%. Mul + 0.18506 ms. 0.76473%. FC + 0.112942 ms. 0.466717%. AveragePool + 0.086769 ms. 0.358559%. Add + 0.0127889 ms. 0.0528479%. Sigmoid + 0.0027346 ms. 0.0113003%. Squeeze + 24.1994 ms in Total +FLOP per operator type: + 0.620598 GFLOP. 99.4581%. Conv + 0.002561 GFLOP. 0.41043%. FC + 0.000597408 GFLOP. 0.0957417%. Mul + 0.000222656 GFLOP. 0.0356832%. Add + 0 GFLOP. 0%. Relu + 0.623979 GFLOP in Total +Feature Memory Read per operator type: + 35.6127 MB. 52.7968%. Conv + 22.5235 MB. 33.3917%. Relu + 5.12912 MB. 7.60406%. FC + 2.40586 MB. 3.56675%. Mul + 1.78125 MB. 2.64075%. Add + 67.4524 MB in Total +Feature Memory Written per operator type: + 24.9042 MB. 49.1092%. Conv + 22.5235 MB. 44.4145%. Relu + 2.38963 MB. 4.71216%. Mul + 0.890624 MB. 1.75624%. Add + 0.004 MB. 0.00788768%. FC + 50.712 MB in Total +Parameter Memory per operator type: + 10.2732 MB. 66.7213%. Conv + 5.124 MB. 33.2787%. FC + 0 MB. 0%. Add + 0 MB. 0%. Mul + 0 MB. 0%. Relu + 15.3972 MB in Total +``` +## MnasNet-B1 + +### Unoptimized +``` +Main run finished. Milliseconds per iter: 28.3109. Iters per second: 35.322 +Time per operator type: + 29.1121 ms. 83.3081%. Conv + 4.14959 ms. 11.8746%. SpatialBN + 1.35823 ms. 3.88675%. Relu + 0.186188 ms. 0.532802%. FC + 0.116244 ms. 0.332647%. Add + 0.018641 ms. 0.0533437%. AveragePool + 0.0040904 ms. 0.0117052%. Squeeze + 34.9451 ms in Total +FLOP per operator type: + 0.626272 GFLOP. 96.2088%. Conv + 0.0218266 GFLOP. 3.35303%. SpatialBN + 0.002561 GFLOP. 0.393424%. FC + 0.000291648 GFLOP. 0.0448034%. Add + 0 GFLOP. 0%. Relu + 0.650951 GFLOP in Total +Feature Memory Read per operator type: + 34.4354 MB. 41.3788%. Conv + 22.1299 MB. 26.5921%. SpatialBN + 19.1923 MB. 23.0622%. Relu + 5.12912 MB. 6.16333%. FC + 2.33318 MB. 2.80364%. Add + 83.2199 MB in Total +Feature Memory Written per operator type: + 21.8266 MB. 34.0955%. Conv + 21.8266 MB. 34.0955%. SpatialBN + 19.1923 MB. 29.9805%. Relu + 1.16659 MB. 1.82234%. Add + 0.004 MB. 0.00624844%. FC + 64.016 MB in Total +Parameter Memory per operator type: + 12.2576 MB. 69.9104%. Conv + 5.124 MB. 29.2245%. FC + 0.15168 MB. 0.865099%. SpatialBN + 0 MB. 0%. Add + 0 MB. 0%. Relu + 17.5332 MB in Total +``` + +### Optimized +``` +Main run finished. Milliseconds per iter: 26.6364. Iters per second: 37.5426 +Time per operator type: + 24.9888 ms. 94.0962%. Conv + 1.26147 ms. 4.75011%. Relu + 0.176234 ms. 0.663619%. FC + 0.113309 ms. 0.426672%. Add + 0.0138708 ms. 0.0522311%. AveragePool + 0.00295685 ms. 0.0111341%. Squeeze + 26.5566 ms in Total +FLOP per operator type: + 0.626272 GFLOP. 99.5466%. Conv + 0.002561 GFLOP. 0.407074%. FC + 0.000291648 GFLOP. 0.0463578%. Add + 0 GFLOP. 0%. Relu + 0.629124 GFLOP in Total +Feature Memory Read per operator type: + 34.5112 MB. 56.4224%. Conv + 19.1923 MB. 31.3775%. Relu + 5.12912 MB. 8.3856%. FC + 2.33318 MB. 3.81452%. Add + 61.1658 MB in Total +Feature Memory Written per operator type: + 21.8266 MB. 51.7346%. Conv + 19.1923 MB. 45.4908%. Relu + 1.16659 MB. 2.76513%. Add + 0.004 MB. 0.00948104%. FC + 42.1895 MB in Total +Parameter Memory per operator type: + 12.2576 MB. 70.5205%. Conv + 5.124 MB. 29.4795%. FC + 0 MB. 0%. Add + 0 MB. 0%. Relu + 17.3816 MB in Total +``` diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/LICENSE b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..80e7d15508202f3262a50db27f5198460d7f509f --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Ross Wightman + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/README.md b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..463368280d6a5015060eb73d20fe6512f8e04c50 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/README.md @@ -0,0 +1,323 @@ +# (Generic) EfficientNets for PyTorch + +A 'generic' implementation of EfficientNet, MixNet, MobileNetV3, etc. that covers most of the compute/parameter efficient architectures derived from the MobileNet V1/V2 block sequence, including those found via automated neural architecture search. + +All models are implemented by GenEfficientNet or MobileNetV3 classes, with string based architecture definitions to configure the block layouts (idea from [here](https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py)) + +## What's New + +### Aug 19, 2020 +* Add updated PyTorch trained EfficientNet-B3 weights trained by myself with `timm` (82.1 top-1) +* Add PyTorch trained EfficientNet-Lite0 contributed by [@hal-314](https://github.com/hal-314) (75.5 top-1) +* Update ONNX and Caffe2 export / utility scripts to work with latest PyTorch / ONNX +* ONNX runtime based validation script added +* activations (mostly) brought in sync with `timm` equivalents + + +### April 5, 2020 +* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite + * 3.5M param MobileNet-V2 100 @ 73% + * 4.5M param MobileNet-V2 110d @ 75% + * 6.1M param MobileNet-V2 140 @ 76.5% + * 5.8M param MobileNet-V2 120d @ 77.3% + +### March 23, 2020 + * Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) + * Add PyTorch trained MobileNet-V3 Large weights with 75.77% top-1 + * IMPORTANT CHANGE (if training from scratch) - weight init changed to better match Tensorflow impl, set `fix_group_fanout=False` in `initialize_weight_goog` for old behavior + +### Feb 12, 2020 + * Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) + * Port new EfficientNet-B8 (RandAugment) weights from TF TPU, these are different than the B8 AdvProp, different input normalization. + * Add RandAugment PyTorch trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin) + +### Jan 22, 2020 + * Update weights for EfficientNet B0, B2, B3 and MixNet-XL with latest RandAugment trained weights. Trained with (https://github.com/rwightman/pytorch-image-models) + * Fix torchscript compatibility for PyTorch 1.4, add torchscript support for MixedConv2d using ModuleDict + * Test models, torchscript, onnx export with PyTorch 1.4 -- no issues + +### Nov 22, 2019 + * New top-1 high! Ported official TF EfficientNet AdvProp (https://arxiv.org/abs/1911.09665) weights and B8 model spec. Created a new set of `ap` models since they use a different + preprocessing (Inception mean/std) from the original EfficientNet base/AA/RA weights. + +### Nov 15, 2019 + * Ported official TF MobileNet-V3 float32 large/small/minimalistic weights + * Modifications to MobileNet-V3 model and components to support some additional config needed for differences between TF MobileNet-V3 and mine + +### Oct 30, 2019 + * Many of the models will now work with torch.jit.script, MixNet being the biggest exception + * Improved interface for enabling torchscript or ONNX export compatible modes (via config) + * Add JIT optimized mem-efficient Swish/Mish autograd.fn in addition to memory-efficient autgrad.fn + * Activation factory to select best version of activation by name or override one globally + * Add pretrained checkpoint load helper that handles input conv and classifier changes + +### Oct 27, 2019 + * Add CondConv EfficientNet variants ported from https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv + * Add RandAug weights for TF EfficientNet B5 and B7 from https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet + * Bring over MixNet-XL model and depth scaling algo from my pytorch-image-models code base + * Switch activations and global pooling to modules + * Add memory-efficient Swish/Mish impl + * Add as_sequential() method to all models and allow as an argument in entrypoint fns + * Move MobileNetV3 into own file since it has a different head + * Remove ChamNet, MobileNet V2/V1 since they will likely never be used here + +## Models + +Implemented models include: + * EfficientNet NoisyStudent (B0-B7, L2) (https://arxiv.org/abs/1911.04252) + * EfficientNet AdvProp (B0-B8) (https://arxiv.org/abs/1911.09665) + * EfficientNet (B0-B8) (https://arxiv.org/abs/1905.11946) + * EfficientNet-EdgeTPU (S, M, L) (https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html) + * EfficientNet-CondConv (https://arxiv.org/abs/1904.04971) + * EfficientNet-Lite (https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) + * MixNet (https://arxiv.org/abs/1907.09595) + * MNASNet B1, A1 (Squeeze-Excite), and Small (https://arxiv.org/abs/1807.11626) + * MobileNet-V3 (https://arxiv.org/abs/1905.02244) + * FBNet-C (https://arxiv.org/abs/1812.03443) + * Single-Path NAS (https://arxiv.org/abs/1904.02877) + +I originally implemented and trained some these models with code [here](https://github.com/rwightman/pytorch-image-models), this repository contains just the GenEfficientNet models, validation, and associated ONNX/Caffe2 export code. + +## Pretrained + +I've managed to train several of the models to accuracies close to or above the originating papers and official impl. My training code is here: https://github.com/rwightman/pytorch-image-models + + +|Model | Prec@1 (Err) | Prec@5 (Err) | Param#(M) | MAdds(M) | Image Scaling | Resolution | Crop | +|---|---|---|---|---|---|---|---| +| efficientnet_b3 | 82.240 (17.760) | 96.116 (3.884) | 12.23 | TBD | bicubic | 320 | 1.0 | +| efficientnet_b3 | 82.076 (17.924) | 96.020 (3.980) | 12.23 | TBD | bicubic | 300 | 0.904 | +| mixnet_xl | 81.074 (18.926) | 95.282 (4.718) | 11.90 | TBD | bicubic | 256 | 1.0 | +| efficientnet_b2 | 80.612 (19.388) | 95.318 (4.682) | 9.1 | TBD | bicubic | 288 | 1.0 | +| mixnet_xl | 80.476 (19.524) | 94.936 (5.064) | 11.90 | TBD | bicubic | 224 | 0.875 | +| efficientnet_b2 | 80.288 (19.712) | 95.166 (4.834) | 9.1 | 1003 | bicubic | 260 | 0.890 | +| mixnet_l | 78.976 (21.024 | 94.184 (5.816) | 7.33 | TBD | bicubic | 224 | 0.875 | +| efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.8 | 694 | bicubic | 240 | 0.882 | +| efficientnet_es | 78.066 (21.934) | 93.926 (6.074) | 5.44 | TBD | bicubic | 224 | 0.875 | +| efficientnet_b0 | 77.698 (22.302) | 93.532 (6.468) | 5.3 | 390 | bicubic | 224 | 0.875 | +| mobilenetv2_120d | 77.294 (22.706 | 93.502 (6.498) | 5.8 | TBD | bicubic | 224 | 0.875 | +| mixnet_m | 77.256 (22.744) | 93.418 (6.582) | 5.01 | 353 | bicubic | 224 | 0.875 | +| mobilenetv2_140 | 76.524 (23.476) | 92.990 (7.010) | 6.1 | TBD | bicubic | 224 | 0.875 | +| mixnet_s | 75.988 (24.012) | 92.794 (7.206) | 4.13 | TBD | bicubic | 224 | 0.875 | +| mobilenetv3_large_100 | 75.766 (24.234) | 92.542 (7.458) | 5.5 | TBD | bicubic | 224 | 0.875 | +| mobilenetv3_rw | 75.634 (24.366) | 92.708 (7.292) | 5.5 | 219 | bicubic | 224 | 0.875 | +| efficientnet_lite0 | 75.472 (24.528) | 92.520 (7.480) | 4.65 | TBD | bicubic | 224 | 0.875 | +| mnasnet_a1 | 75.448 (24.552) | 92.604 (7.396) | 3.9 | 312 | bicubic | 224 | 0.875 | +| fbnetc_100 | 75.124 (24.876) | 92.386 (7.614) | 5.6 | 385 | bilinear | 224 | 0.875 | +| mobilenetv2_110d | 75.052 (24.948) | 92.180 (7.820) | 4.5 | TBD | bicubic | 224 | 0.875 | +| mnasnet_b1 | 74.658 (25.342) | 92.114 (7.886) | 4.4 | 315 | bicubic | 224 | 0.875 | +| spnasnet_100 | 74.084 (25.916) | 91.818 (8.182) | 4.4 | TBD | bilinear | 224 | 0.875 | +| mobilenetv2_100 | 72.978 (27.022) | 91.016 (8.984) | 3.5 | TBD | bicubic | 224 | 0.875 | + + +More pretrained models to come... + + +## Ported Weights + +The weights ported from Tensorflow checkpoints for the EfficientNet models do pretty much match accuracy in Tensorflow once a SAME convolution padding equivalent is added, and the same crop factors, image scaling, etc (see table) are used via cmd line args. + +**IMPORTANT:** +* Tensorflow ported weights for EfficientNet AdvProp (AP), EfficientNet EdgeTPU, EfficientNet-CondConv, EfficientNet-Lite, and MobileNet-V3 models use Inception style (0.5, 0.5, 0.5) for mean and std. +* Enabling the Tensorflow preprocessing pipeline with `--tf-preprocessing` at validation time will improve scores by 0.1-0.5%, very close to original TF impl. + +To run validation for tf_efficientnet_b5: +`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b5 -b 64 --img-size 456 --crop-pct 0.934 --interpolation bicubic` + +To run validation w/ TF preprocessing for tf_efficientnet_b5: +`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b5 -b 64 --img-size 456 --tf-preprocessing` + +To run validation for a model with Inception preprocessing, ie EfficientNet-B8 AdvProp: +`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b8_ap -b 48 --num-gpu 2 --img-size 672 --crop-pct 0.954 --mean 0.5 --std 0.5` + +|Model | Prec@1 (Err) | Prec@5 (Err) | Param # | Image Scaling | Image Size | Crop | +|---|---|---|---|---|---|---| +| tf_efficientnet_l2_ns *tfp | 88.352 (11.648) | 98.652 (1.348) | 480 | bicubic | 800 | N/A | +| tf_efficientnet_l2_ns | TBD | TBD | 480 | bicubic | 800 | 0.961 | +| tf_efficientnet_l2_ns_475 | 88.234 (11.766) | 98.546 (1.454) | 480 | bicubic | 475 | 0.936 | +| tf_efficientnet_l2_ns_475 *tfp | 88.172 (11.828) | 98.566 (1.434) | 480 | bicubic | 475 | N/A | +| tf_efficientnet_b7_ns *tfp | 86.844 (13.156) | 98.084 (1.916) | 66.35 | bicubic | 600 | N/A | +| tf_efficientnet_b7_ns | 86.840 (13.160) | 98.094 (1.906) | 66.35 | bicubic | 600 | N/A | +| tf_efficientnet_b6_ns | 86.452 (13.548) | 97.882 (2.118) | 43.04 | bicubic | 528 | N/A | +| tf_efficientnet_b6_ns *tfp | 86.444 (13.556) | 97.880 (2.120) | 43.04 | bicubic | 528 | N/A | +| tf_efficientnet_b5_ns *tfp | 86.064 (13.936) | 97.746 (2.254) | 30.39 | bicubic | 456 | N/A | +| tf_efficientnet_b5_ns | 86.088 (13.912) | 97.752 (2.248) | 30.39 | bicubic | 456 | N/A | +| tf_efficientnet_b8_ap *tfp | 85.436 (14.564) | 97.272 (2.728) | 87.4 | bicubic | 672 | N/A | +| tf_efficientnet_b8 *tfp | 85.384 (14.616) | 97.394 (2.606) | 87.4 | bicubic | 672 | N/A | +| tf_efficientnet_b8 | 85.370 (14.630) | 97.390 (2.610) | 87.4 | bicubic | 672 | 0.954 | +| tf_efficientnet_b8_ap | 85.368 (14.632) | 97.294 (2.706) | 87.4 | bicubic | 672 | 0.954 | +| tf_efficientnet_b4_ns *tfp | 85.298 (14.702) | 97.504 (2.496) | 19.34 | bicubic | 380 | N/A | +| tf_efficientnet_b4_ns | 85.162 (14.838) | 97.470 (2.530) | 19.34 | bicubic | 380 | 0.922 | +| tf_efficientnet_b7_ap *tfp | 85.154 (14.846) | 97.244 (2.756) | 66.35 | bicubic | 600 | N/A | +| tf_efficientnet_b7_ap | 85.118 (14.882) | 97.252 (2.748) | 66.35 | bicubic | 600 | 0.949 | +| tf_efficientnet_b7 *tfp | 84.940 (15.060) | 97.214 (2.786) | 66.35 | bicubic | 600 | N/A | +| tf_efficientnet_b7 | 84.932 (15.068) | 97.208 (2.792) | 66.35 | bicubic | 600 | 0.949 | +| tf_efficientnet_b6_ap | 84.786 (15.214) | 97.138 (2.862) | 43.04 | bicubic | 528 | 0.942 | +| tf_efficientnet_b6_ap *tfp | 84.760 (15.240) | 97.124 (2.876) | 43.04 | bicubic | 528 | N/A | +| tf_efficientnet_b5_ap *tfp | 84.276 (15.724) | 96.932 (3.068) | 30.39 | bicubic | 456 | N/A | +| tf_efficientnet_b5_ap | 84.254 (15.746) | 96.976 (3.024) | 30.39 | bicubic | 456 | 0.934 | +| tf_efficientnet_b6 *tfp | 84.140 (15.860) | 96.852 (3.148) | 43.04 | bicubic | 528 | N/A | +| tf_efficientnet_b6 | 84.110 (15.890) | 96.886 (3.114) | 43.04 | bicubic | 528 | 0.942 | +| tf_efficientnet_b3_ns *tfp | 84.054 (15.946) | 96.918 (3.082) | 12.23 | bicubic | 300 | N/A | +| tf_efficientnet_b3_ns | 84.048 (15.952) | 96.910 (3.090) | 12.23 | bicubic | 300 | .904 | +| tf_efficientnet_b5 *tfp | 83.822 (16.178) | 96.756 (3.244) | 30.39 | bicubic | 456 | N/A | +| tf_efficientnet_b5 | 83.812 (16.188) | 96.748 (3.252) | 30.39 | bicubic | 456 | 0.934 | +| tf_efficientnet_b4_ap *tfp | 83.278 (16.722) | 96.376 (3.624) | 19.34 | bicubic | 380 | N/A | +| tf_efficientnet_b4_ap | 83.248 (16.752) | 96.388 (3.612) | 19.34 | bicubic | 380 | 0.922 | +| tf_efficientnet_b4 | 83.022 (16.978) | 96.300 (3.700) | 19.34 | bicubic | 380 | 0.922 | +| tf_efficientnet_b4 *tfp | 82.948 (17.052) | 96.308 (3.692) | 19.34 | bicubic | 380 | N/A | +| tf_efficientnet_b2_ns *tfp | 82.436 (17.564) | 96.268 (3.732) | 9.11 | bicubic | 260 | N/A | +| tf_efficientnet_b2_ns | 82.380 (17.620) | 96.248 (3.752) | 9.11 | bicubic | 260 | 0.89 | +| tf_efficientnet_b3_ap *tfp | 81.882 (18.118) | 95.662 (4.338) | 12.23 | bicubic | 300 | N/A | +| tf_efficientnet_b3_ap | 81.828 (18.172) | 95.624 (4.376) | 12.23 | bicubic | 300 | 0.904 | +| tf_efficientnet_b3 | 81.636 (18.364) | 95.718 (4.282) | 12.23 | bicubic | 300 | 0.904 | +| tf_efficientnet_b3 *tfp | 81.576 (18.424) | 95.662 (4.338) | 12.23 | bicubic | 300 | N/A | +| tf_efficientnet_lite4 | 81.528 (18.472) | 95.668 (4.332) | 13.00 | bilinear | 380 | 0.92 | +| tf_efficientnet_b1_ns *tfp | 81.514 (18.486) | 95.776 (4.224) | 7.79 | bicubic | 240 | N/A | +| tf_efficientnet_lite4 *tfp | 81.502 (18.498) | 95.676 (4.324) | 13.00 | bilinear | 380 | N/A | +| tf_efficientnet_b1_ns | 81.388 (18.612) | 95.738 (4.262) | 7.79 | bicubic | 240 | 0.88 | +| tf_efficientnet_el | 80.534 (19.466) | 95.190 (4.810) | 10.59 | bicubic | 300 | 0.904 | +| tf_efficientnet_el *tfp | 80.476 (19.524) | 95.200 (4.800) | 10.59 | bicubic | 300 | N/A | +| tf_efficientnet_b2_ap *tfp | 80.420 (19.580) | 95.040 (4.960) | 9.11 | bicubic | 260 | N/A | +| tf_efficientnet_b2_ap | 80.306 (19.694) | 95.028 (4.972) | 9.11 | bicubic | 260 | 0.890 | +| tf_efficientnet_b2 *tfp | 80.188 (19.812) | 94.974 (5.026) | 9.11 | bicubic | 260 | N/A | +| tf_efficientnet_b2 | 80.086 (19.914) | 94.908 (5.092) | 9.11 | bicubic | 260 | 0.890 | +| tf_efficientnet_lite3 | 79.812 (20.188) | 94.914 (5.086) | 8.20 | bilinear | 300 | 0.904 | +| tf_efficientnet_lite3 *tfp | 79.734 (20.266) | 94.838 (5.162) | 8.20 | bilinear | 300 | N/A | +| tf_efficientnet_b1_ap *tfp | 79.532 (20.468) | 94.378 (5.622) | 7.79 | bicubic | 240 | N/A | +| tf_efficientnet_cc_b1_8e *tfp | 79.464 (20.536)| 94.492 (5.508) | 39.7 | bicubic | 240 | 0.88 | +| tf_efficientnet_cc_b1_8e | 79.298 (20.702) | 94.364 (5.636) | 39.7 | bicubic | 240 | 0.88 | +| tf_efficientnet_b1_ap | 79.278 (20.722) | 94.308 (5.692) | 7.79 | bicubic | 240 | 0.88 | +| tf_efficientnet_b1 *tfp | 79.172 (20.828) | 94.450 (5.550) | 7.79 | bicubic | 240 | N/A | +| tf_efficientnet_em *tfp | 78.958 (21.042) | 94.458 (5.542) | 6.90 | bicubic | 240 | N/A | +| tf_efficientnet_b0_ns *tfp | 78.806 (21.194) | 94.496 (5.504) | 5.29 | bicubic | 224 | N/A | +| tf_mixnet_l *tfp | 78.846 (21.154) | 94.212 (5.788) | 7.33 | bilinear | 224 | N/A | +| tf_efficientnet_b1 | 78.826 (21.174) | 94.198 (5.802) | 7.79 | bicubic | 240 | 0.88 | +| tf_mixnet_l | 78.770 (21.230) | 94.004 (5.996) | 7.33 | bicubic | 224 | 0.875 | +| tf_efficientnet_em | 78.742 (21.258) | 94.332 (5.668) | 6.90 | bicubic | 240 | 0.875 | +| tf_efficientnet_b0_ns | 78.658 (21.342) | 94.376 (5.624) | 5.29 | bicubic | 224 | 0.875 | +| tf_efficientnet_cc_b0_8e *tfp | 78.314 (21.686) | 93.790 (6.210) | 24.0 | bicubic | 224 | 0.875 | +| tf_efficientnet_cc_b0_8e | 77.908 (22.092) | 93.656 (6.344) | 24.0 | bicubic | 224 | 0.875 | +| tf_efficientnet_cc_b0_4e *tfp | 77.746 (22.254) | 93.552 (6.448) | 13.3 | bicubic | 224 | 0.875 | +| tf_efficientnet_cc_b0_4e | 77.304 (22.696) | 93.332 (6.668) | 13.3 | bicubic | 224 | 0.875 | +| tf_efficientnet_es *tfp | 77.616 (22.384) | 93.750 (6.250) | 5.44 | bicubic | 224 | N/A | +| tf_efficientnet_lite2 *tfp | 77.544 (22.456) | 93.800 (6.200) | 6.09 | bilinear | 260 | N/A | +| tf_efficientnet_lite2 | 77.460 (22.540) | 93.746 (6.254) | 6.09 | bicubic | 260 | 0.89 | +| tf_efficientnet_b0_ap *tfp | 77.514 (22.486) | 93.576 (6.424) | 5.29 | bicubic | 224 | N/A | +| tf_efficientnet_es | 77.264 (22.736) | 93.600 (6.400) | 5.44 | bicubic | 224 | N/A | +| tf_efficientnet_b0 *tfp | 77.258 (22.742) | 93.478 (6.522) | 5.29 | bicubic | 224 | N/A | +| tf_efficientnet_b0_ap | 77.084 (22.916) | 93.254 (6.746) | 5.29 | bicubic | 224 | 0.875 | +| tf_mixnet_m *tfp | 77.072 (22.928) | 93.368 (6.632) | 5.01 | bilinear | 224 | N/A | +| tf_mixnet_m | 76.950 (23.050) | 93.156 (6.844) | 5.01 | bicubic | 224 | 0.875 | +| tf_efficientnet_b0 | 76.848 (23.152) | 93.228 (6.772) | 5.29 | bicubic | 224 | 0.875 | +| tf_efficientnet_lite1 *tfp | 76.764 (23.236) | 93.326 (6.674) | 5.42 | bilinear | 240 | N/A | +| tf_efficientnet_lite1 | 76.638 (23.362) | 93.232 (6.768) | 5.42 | bicubic | 240 | 0.882 | +| tf_mixnet_s *tfp | 75.800 (24.200) | 92.788 (7.212) | 4.13 | bilinear | 224 | N/A | +| tf_mobilenetv3_large_100 *tfp | 75.768 (24.232) | 92.710 (7.290) | 5.48 | bilinear | 224 | N/A | +| tf_mixnet_s | 75.648 (24.352) | 92.636 (7.364) | 4.13 | bicubic | 224 | 0.875 | +| tf_mobilenetv3_large_100 | 75.516 (24.484) | 92.600 (7.400) | 5.48 | bilinear | 224 | 0.875 | +| tf_efficientnet_lite0 *tfp | 75.074 (24.926) | 92.314 (7.686) | 4.65 | bilinear | 224 | N/A | +| tf_efficientnet_lite0 | 74.842 (25.158) | 92.170 (7.830) | 4.65 | bicubic | 224 | 0.875 | +| tf_mobilenetv3_large_075 *tfp | 73.730 (26.270) | 91.616 (8.384) | 3.99 | bilinear | 224 |N/A | +| tf_mobilenetv3_large_075 | 73.442 (26.558) | 91.352 (8.648) | 3.99 | bilinear | 224 | 0.875 | +| tf_mobilenetv3_large_minimal_100 *tfp | 72.678 (27.322) | 90.860 (9.140) | 3.92 | bilinear | 224 | N/A | +| tf_mobilenetv3_large_minimal_100 | 72.244 (27.756) | 90.636 (9.364) | 3.92 | bilinear | 224 | 0.875 | +| tf_mobilenetv3_small_100 *tfp | 67.918 (32.082) | 87.958 (12.042 | 2.54 | bilinear | 224 | N/A | +| tf_mobilenetv3_small_100 | 67.918 (32.082) | 87.662 (12.338) | 2.54 | bilinear | 224 | 0.875 | +| tf_mobilenetv3_small_075 *tfp | 66.142 (33.858) | 86.498 (13.502) | 2.04 | bilinear | 224 | N/A | +| tf_mobilenetv3_small_075 | 65.718 (34.282) | 86.136 (13.864) | 2.04 | bilinear | 224 | 0.875 | +| tf_mobilenetv3_small_minimal_100 *tfp | 63.378 (36.622) | 84.802 (15.198) | 2.04 | bilinear | 224 | N/A | +| tf_mobilenetv3_small_minimal_100 | 62.898 (37.102) | 84.230 (15.770) | 2.04 | bilinear | 224 | 0.875 | + + +*tfp models validated with `tf-preprocessing` pipeline + +Google tf and tflite weights ported from official Tensorflow repositories +* https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet +* https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet +* https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet + +## Usage + +### Environment + +All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x, 3.7.x, 3.8.x. + +Users have reported that a Python 3 Anaconda install in Windows works. I have not verified this myself. + +PyTorch versions 1.4, 1.5, 1.6 have been tested with this code. + +I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda: +``` +conda create -n torch-env +conda activate torch-env +conda install -c pytorch pytorch torchvision cudatoolkit=10.2 +``` + +### PyTorch Hub + +Models can be accessed via the PyTorch Hub API + +``` +>>> torch.hub.list('rwightman/gen-efficientnet-pytorch') +['efficientnet_b0', ...] +>>> model = torch.hub.load('rwightman/gen-efficientnet-pytorch', 'efficientnet_b0', pretrained=True) +>>> model.eval() +>>> output = model(torch.randn(1,3,224,224)) +``` + +### Pip +This package can be installed via pip. + +Install (after conda env/install): +``` +pip install geffnet +``` + +Eval use: +``` +>>> import geffnet +>>> m = geffnet.create_model('mobilenetv3_large_100', pretrained=True) +>>> m.eval() +``` + +Train use: +``` +>>> import geffnet +>>> # models can also be created by using the entrypoint directly +>>> m = geffnet.efficientnet_b2(pretrained=True, drop_rate=0.25, drop_connect_rate=0.2) +>>> m.train() +``` + +Create in a nn.Sequential container, for fast.ai, etc: +``` +>>> import geffnet +>>> m = geffnet.mixnet_l(pretrained=True, drop_rate=0.25, drop_connect_rate=0.2, as_sequential=True) +``` + +### Exporting + +Scripts are included to +* export models to ONNX (`onnx_export.py`) +* optimized ONNX graph (`onnx_optimize.py` or `onnx_validate.py` w/ `--onnx-output-opt` arg) +* validate with ONNX runtime (`onnx_validate.py`) +* convert ONNX model to Caffe2 (`onnx_to_caffe.py`) +* validate in Caffe2 (`caffe2_validate.py`) +* benchmark in Caffe2 w/ FLOPs, parameters output (`caffe2_benchmark.py`) + +As an example, to export the MobileNet-V3 pretrained model and then run an Imagenet validation: +``` +python onnx_export.py --model mobilenetv3_large_100 ./mobilenetv3_100.onnx +python onnx_validate.py /imagenet/validation/ --onnx-input ./mobilenetv3_100.onnx +``` + +These scripts were tested to be working as of PyTorch 1.6 and ONNX 1.7 w/ ONNX runtime 1.4. Caffe2 compatible +export now requires additional args mentioned in the export script (not needed in earlier versions). + +#### Export Notes +1. The TF ported weights with the 'SAME' conv padding activated cannot be exported to ONNX unless `_EXPORTABLE` flag in `config.py` is set to True. Use `config.set_exportable(True)` as in the `onnx_export.py` script. +2. TF ported models with 'SAME' padding will have the padding fixed at export time to the resolution used for export. Even though dynamic padding is supported in opset >= 11, I can't get it working. +3. ONNX optimize facility doesn't work reliably in PyTorch 1.6 / ONNX 1.7. Fortunately, the onnxruntime based inference is working very well now and includes on the fly optimization. +3. ONNX / Caffe2 export/import frequently breaks with different PyTorch and ONNX version releases. Please check their respective issue trackers before filing issues here. + + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_benchmark.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..93f28a1e63d9f7287ca02997c7991fe66dd0aeb9 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_benchmark.py @@ -0,0 +1,65 @@ +""" Caffe2 validation script + +This script runs Caffe2 benchmark on exported ONNX model. +It is a useful tool for reporting model FLOPS. + +Copyright 2020 Ross Wightman +""" +import argparse +from caffe2.python import core, workspace, model_helper +from caffe2.proto import caffe2_pb2 + + +parser = argparse.ArgumentParser(description='Caffe2 Model Benchmark') +parser.add_argument('--c2-prefix', default='', type=str, metavar='NAME', + help='caffe2 model pb name prefix') +parser.add_argument('--c2-init', default='', type=str, metavar='PATH', + help='caffe2 model init .pb') +parser.add_argument('--c2-predict', default='', type=str, metavar='PATH', + help='caffe2 model predict .pb') +parser.add_argument('-b', '--batch-size', default=1, type=int, + metavar='N', help='mini-batch size (default: 1)') +parser.add_argument('--img-size', default=224, type=int, + metavar='N', help='Input image dimension, uses model default if empty') + + +def main(): + args = parser.parse_args() + args.gpu_id = 0 + if args.c2_prefix: + args.c2_init = args.c2_prefix + '.init.pb' + args.c2_predict = args.c2_prefix + '.predict.pb' + + model = model_helper.ModelHelper(name="le_net", init_params=False) + + # Bring in the init net from init_net.pb + init_net_proto = caffe2_pb2.NetDef() + with open(args.c2_init, "rb") as f: + init_net_proto.ParseFromString(f.read()) + model.param_init_net = core.Net(init_net_proto) + + # bring in the predict net from predict_net.pb + predict_net_proto = caffe2_pb2.NetDef() + with open(args.c2_predict, "rb") as f: + predict_net_proto.ParseFromString(f.read()) + model.net = core.Net(predict_net_proto) + + # CUDA performance not impressive + #device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id) + #model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) + #model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) + + input_blob = model.net.external_inputs[0] + model.param_init_net.GaussianFill( + [], + input_blob.GetUnscopedName(), + shape=(args.batch_size, 3, args.img_size, args.img_size), + mean=0.0, + std=1.0) + workspace.RunNetOnce(model.param_init_net) + workspace.CreateNet(model.net, overwrite=True) + workspace.BenchmarkNet(model.net.Proto().name, 5, 20, True) + + +if __name__ == '__main__': + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_validate.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_validate.py new file mode 100644 index 0000000000000000000000000000000000000000..7cfaab38c095663fe32e4addbdf06b57bcb53614 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_validate.py @@ -0,0 +1,138 @@ +""" Caffe2 validation script + +This script is created to verify exported ONNX models running in Caffe2 +It utilizes the same PyTorch dataloader/processing pipeline for a +fair comparison against the originals. + +Copyright 2020 Ross Wightman +""" +import argparse +import numpy as np +from caffe2.python import core, workspace, model_helper +from caffe2.proto import caffe2_pb2 +from data import create_loader, resolve_data_config, Dataset +from utils import AverageMeter +import time + +parser = argparse.ArgumentParser(description='Caffe2 ImageNet Validation') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--c2-prefix', default='', type=str, metavar='NAME', + help='caffe2 model pb name prefix') +parser.add_argument('--c2-init', default='', type=str, metavar='PATH', + help='caffe2 model init .pb') +parser.add_argument('--c2-predict', default='', type=str, metavar='PATH', + help='caffe2 model predict .pb') +parser.add_argument('-j', '--workers', default=2, type=int, metavar='N', + help='number of data loading workers (default: 2)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--crop-pct', type=float, default=None, metavar='PCT', + help='Override default crop pct of 0.875') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('--tf-preprocessing', dest='tf_preprocessing', action='store_true', + help='use tensorflow mnasnet preporcessing') +parser.add_argument('--print-freq', '-p', default=10, type=int, + metavar='N', help='print frequency (default: 10)') + + +def main(): + args = parser.parse_args() + args.gpu_id = 0 + if args.c2_prefix: + args.c2_init = args.c2_prefix + '.init.pb' + args.c2_predict = args.c2_prefix + '.predict.pb' + + model = model_helper.ModelHelper(name="validation_net", init_params=False) + + # Bring in the init net from init_net.pb + init_net_proto = caffe2_pb2.NetDef() + with open(args.c2_init, "rb") as f: + init_net_proto.ParseFromString(f.read()) + model.param_init_net = core.Net(init_net_proto) + + # bring in the predict net from predict_net.pb + predict_net_proto = caffe2_pb2.NetDef() + with open(args.c2_predict, "rb") as f: + predict_net_proto.ParseFromString(f.read()) + model.net = core.Net(predict_net_proto) + + data_config = resolve_data_config(None, args) + loader = create_loader( + Dataset(args.data, load_bytes=args.tf_preprocessing), + input_size=data_config['input_size'], + batch_size=args.batch_size, + use_prefetcher=False, + interpolation=data_config['interpolation'], + mean=data_config['mean'], + std=data_config['std'], + num_workers=args.workers, + crop_pct=data_config['crop_pct'], + tensorflow_preprocessing=args.tf_preprocessing) + + # this is so obvious, wonderful interface + input_blob = model.net.external_inputs[0] + output_blob = model.net.external_outputs[0] + + if True: + device_opts = None + else: + # CUDA is crashing, no idea why, awesome error message, give it a try for kicks + device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id) + model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) + model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True) + + model.param_init_net.GaussianFill( + [], input_blob.GetUnscopedName(), + shape=(1,) + data_config['input_size'], mean=0.0, std=1.0) + workspace.RunNetOnce(model.param_init_net) + workspace.CreateNet(model.net, overwrite=True) + + batch_time = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + end = time.time() + for i, (input, target) in enumerate(loader): + # run the net and return prediction + caffe2_in = input.data.numpy() + workspace.FeedBlob(input_blob, caffe2_in, device_opts) + workspace.RunNet(model.net, num_iter=1) + output = workspace.FetchBlob(output_blob) + + # measure accuracy and record loss + prec1, prec5 = accuracy_np(output.data, target.numpy()) + top1.update(prec1.item(), input.size(0)) + top5.update(prec5.item(), input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Test: [{0}/{1}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, + ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5)) + + print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format( + top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg)) + + +def accuracy_np(output, target): + max_indices = np.argsort(output, axis=1)[:, ::-1] + top5 = 100 * np.equal(max_indices[:, :5], target[:, np.newaxis]).sum(axis=1).mean() + top1 = 100 * np.equal(max_indices[:, 0], target).mean() + return top1, top5 + + +if __name__ == '__main__': + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/__init__.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e441a5838d1e972823b9668ac8d459445f6f6ce --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/__init__.py @@ -0,0 +1,5 @@ +from .gen_efficientnet import * +from .mobilenetv3 import * +from .model_factory import create_model +from .config import is_exportable, is_scriptable, set_exportable, set_scriptable +from .activations import * \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/__init__.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..813421a743ffc33b8eb53ebf62dd4a03d831b654 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/__init__.py @@ -0,0 +1,137 @@ +from geffnet import config +from geffnet.activations.activations_me import * +from geffnet.activations.activations_jit import * +from geffnet.activations.activations import * +import torch + +_has_silu = 'silu' in dir(torch.nn.functional) + +_ACT_FN_DEFAULT = dict( + silu=F.silu if _has_silu else swish, + swish=F.silu if _has_silu else swish, + mish=mish, + relu=F.relu, + relu6=F.relu6, + sigmoid=sigmoid, + tanh=tanh, + hard_sigmoid=hard_sigmoid, + hard_swish=hard_swish, +) + +_ACT_FN_JIT = dict( + silu=F.silu if _has_silu else swish_jit, + swish=F.silu if _has_silu else swish_jit, + mish=mish_jit, +) + +_ACT_FN_ME = dict( + silu=F.silu if _has_silu else swish_me, + swish=F.silu if _has_silu else swish_me, + mish=mish_me, + hard_swish=hard_swish_me, + hard_sigmoid_jit=hard_sigmoid_me, +) + +_ACT_LAYER_DEFAULT = dict( + silu=nn.SiLU if _has_silu else Swish, + swish=nn.SiLU if _has_silu else Swish, + mish=Mish, + relu=nn.ReLU, + relu6=nn.ReLU6, + sigmoid=Sigmoid, + tanh=Tanh, + hard_sigmoid=HardSigmoid, + hard_swish=HardSwish, +) + +_ACT_LAYER_JIT = dict( + silu=nn.SiLU if _has_silu else SwishJit, + swish=nn.SiLU if _has_silu else SwishJit, + mish=MishJit, +) + +_ACT_LAYER_ME = dict( + silu=nn.SiLU if _has_silu else SwishMe, + swish=nn.SiLU if _has_silu else SwishMe, + mish=MishMe, + hard_swish=HardSwishMe, + hard_sigmoid=HardSigmoidMe +) + +_OVERRIDE_FN = dict() +_OVERRIDE_LAYER = dict() + + +def add_override_act_fn(name, fn): + global _OVERRIDE_FN + _OVERRIDE_FN[name] = fn + + +def update_override_act_fn(overrides): + assert isinstance(overrides, dict) + global _OVERRIDE_FN + _OVERRIDE_FN.update(overrides) + + +def clear_override_act_fn(): + global _OVERRIDE_FN + _OVERRIDE_FN = dict() + + +def add_override_act_layer(name, fn): + _OVERRIDE_LAYER[name] = fn + + +def update_override_act_layer(overrides): + assert isinstance(overrides, dict) + global _OVERRIDE_LAYER + _OVERRIDE_LAYER.update(overrides) + + +def clear_override_act_layer(): + global _OVERRIDE_LAYER + _OVERRIDE_LAYER = dict() + + +def get_act_fn(name='relu'): + """ Activation Function Factory + Fetching activation fns by name with this function allows export or torch script friendly + functions to be returned dynamically based on current config. + """ + if name in _OVERRIDE_FN: + return _OVERRIDE_FN[name] + use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit()) + if use_me and name in _ACT_FN_ME: + # If not exporting or scripting the model, first look for a memory optimized version + # activation with custom autograd, then fallback to jit scripted, then a Python or Torch builtin + return _ACT_FN_ME[name] + if config.is_exportable() and name in ('silu', 'swish'): + # FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack + return swish + use_jit = not (config.is_exportable() or config.is_no_jit()) + # NOTE: export tracing should work with jit scripted components, but I keep running into issues + if use_jit and name in _ACT_FN_JIT: # jit scripted models should be okay for export/scripting + return _ACT_FN_JIT[name] + return _ACT_FN_DEFAULT[name] + + +def get_act_layer(name='relu'): + """ Activation Layer Factory + Fetching activation layers by name with this function allows export or torch script friendly + functions to be returned dynamically based on current config. + """ + if name in _OVERRIDE_LAYER: + return _OVERRIDE_LAYER[name] + use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit()) + if use_me and name in _ACT_LAYER_ME: + return _ACT_LAYER_ME[name] + if config.is_exportable() and name in ('silu', 'swish'): + # FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack + return Swish + use_jit = not (config.is_exportable() or config.is_no_jit()) + # NOTE: export tracing should work with jit scripted components, but I keep running into issues + if use_jit and name in _ACT_FN_JIT: # jit scripted models should be okay for export/scripting + return _ACT_LAYER_JIT[name] + return _ACT_LAYER_DEFAULT[name] + + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations.py new file mode 100644 index 0000000000000000000000000000000000000000..bdea692d1397673b2513d898c33edbcb37d94240 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations.py @@ -0,0 +1,102 @@ +""" Activations + +A collection of activations fn and modules with a common interface so that they can +easily be swapped. All have an `inplace` arg even if not used. + +Copyright 2020 Ross Wightman +""" +from torch import nn as nn +from torch.nn import functional as F + + +def swish(x, inplace: bool = False): + """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3) + and also as Swish (https://arxiv.org/abs/1710.05941). + + TODO Rename to SiLU with addition to PyTorch + """ + return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) + + +class Swish(nn.Module): + def __init__(self, inplace: bool = False): + super(Swish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return swish(x, self.inplace) + + +def mish(x, inplace: bool = False): + """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + """ + return x.mul(F.softplus(x).tanh()) + + +class Mish(nn.Module): + def __init__(self, inplace: bool = False): + super(Mish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return mish(x, self.inplace) + + +def sigmoid(x, inplace: bool = False): + return x.sigmoid_() if inplace else x.sigmoid() + + +# PyTorch has this, but not with a consistent inplace argmument interface +class Sigmoid(nn.Module): + def __init__(self, inplace: bool = False): + super(Sigmoid, self).__init__() + self.inplace = inplace + + def forward(self, x): + return x.sigmoid_() if self.inplace else x.sigmoid() + + +def tanh(x, inplace: bool = False): + return x.tanh_() if inplace else x.tanh() + + +# PyTorch has this, but not with a consistent inplace argmument interface +class Tanh(nn.Module): + def __init__(self, inplace: bool = False): + super(Tanh, self).__init__() + self.inplace = inplace + + def forward(self, x): + return x.tanh_() if self.inplace else x.tanh() + + +def hard_swish(x, inplace: bool = False): + inner = F.relu6(x + 3.).div_(6.) + return x.mul_(inner) if inplace else x.mul(inner) + + +class HardSwish(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSwish, self).__init__() + self.inplace = inplace + + def forward(self, x): + return hard_swish(x, self.inplace) + + +def hard_sigmoid(x, inplace: bool = False): + if inplace: + return x.add_(3.).clamp_(0., 6.).div_(6.) + else: + return F.relu6(x + 3.) / 6. + + +class HardSigmoid(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSigmoid, self).__init__() + self.inplace = inplace + + def forward(self, x): + return hard_sigmoid(x, self.inplace) + + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_jit.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_jit.py new file mode 100644 index 0000000000000000000000000000000000000000..7176b05e779787528a47f20d55d64d4a0f219360 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_jit.py @@ -0,0 +1,79 @@ +""" Activations (jit) + +A collection of jit-scripted activations fn and modules with a common interface so that they can +easily be swapped. All have an `inplace` arg even if not used. + +All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not +currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted +versions if they contain in-place ops. + +Copyright 2020 Ross Wightman +""" + +import torch +from torch import nn as nn +from torch.nn import functional as F + +__all__ = ['swish_jit', 'SwishJit', 'mish_jit', 'MishJit', + 'hard_sigmoid_jit', 'HardSigmoidJit', 'hard_swish_jit', 'HardSwishJit'] + + +@torch.jit.script +def swish_jit(x, inplace: bool = False): + """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3) + and also as Swish (https://arxiv.org/abs/1710.05941). + + TODO Rename to SiLU with addition to PyTorch + """ + return x.mul(x.sigmoid()) + + +@torch.jit.script +def mish_jit(x, _inplace: bool = False): + """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + """ + return x.mul(F.softplus(x).tanh()) + + +class SwishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(SwishJit, self).__init__() + + def forward(self, x): + return swish_jit(x) + + +class MishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(MishJit, self).__init__() + + def forward(self, x): + return mish_jit(x) + + +@torch.jit.script +def hard_sigmoid_jit(x, inplace: bool = False): + # return F.relu6(x + 3.) / 6. + return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? + + +class HardSigmoidJit(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSigmoidJit, self).__init__() + + def forward(self, x): + return hard_sigmoid_jit(x) + + +@torch.jit.script +def hard_swish_jit(x, inplace: bool = False): + # return x * (F.relu6(x + 3.) / 6) + return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? + + +class HardSwishJit(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSwishJit, self).__init__() + + def forward(self, x): + return hard_swish_jit(x) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_me.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_me.py new file mode 100644 index 0000000000000000000000000000000000000000..e91df5a50fdbe40bc386e2541a4fda743ad95e9a --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_me.py @@ -0,0 +1,174 @@ +""" Activations (memory-efficient w/ custom autograd) + +A collection of activations fn and modules with a common interface so that they can +easily be swapped. All have an `inplace` arg even if not used. + +These activations are not compatible with jit scripting or ONNX export of the model, please use either +the JIT or basic versions of the activations. + +Copyright 2020 Ross Wightman +""" + +import torch +from torch import nn as nn +from torch.nn import functional as F + + +__all__ = ['swish_me', 'SwishMe', 'mish_me', 'MishMe', + 'hard_sigmoid_me', 'HardSigmoidMe', 'hard_swish_me', 'HardSwishMe'] + + +@torch.jit.script +def swish_jit_fwd(x): + return x.mul(torch.sigmoid(x)) + + +@torch.jit.script +def swish_jit_bwd(x, grad_output): + x_sigmoid = torch.sigmoid(x) + return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid))) + + +class SwishJitAutoFn(torch.autograd.Function): + """ torch.jit.script optimised Swish w/ memory-efficient checkpoint + Inspired by conversation btw Jeremy Howard & Adam Pazske + https://twitter.com/jeremyphoward/status/1188251041835315200 + + Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3) + and also as Swish (https://arxiv.org/abs/1710.05941). + + TODO Rename to SiLU with addition to PyTorch + """ + + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return swish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return swish_jit_bwd(x, grad_output) + + +def swish_me(x, inplace=False): + return SwishJitAutoFn.apply(x) + + +class SwishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(SwishMe, self).__init__() + + def forward(self, x): + return SwishJitAutoFn.apply(x) + + +@torch.jit.script +def mish_jit_fwd(x): + return x.mul(torch.tanh(F.softplus(x))) + + +@torch.jit.script +def mish_jit_bwd(x, grad_output): + x_sigmoid = torch.sigmoid(x) + x_tanh_sp = F.softplus(x).tanh() + return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp)) + + +class MishJitAutoFn(torch.autograd.Function): + """ Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 + A memory efficient, jit scripted variant of Mish + """ + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return mish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return mish_jit_bwd(x, grad_output) + + +def mish_me(x, inplace=False): + return MishJitAutoFn.apply(x) + + +class MishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(MishMe, self).__init__() + + def forward(self, x): + return MishJitAutoFn.apply(x) + + +@torch.jit.script +def hard_sigmoid_jit_fwd(x, inplace: bool = False): + return (x + 3).clamp(min=0, max=6).div(6.) + + +@torch.jit.script +def hard_sigmoid_jit_bwd(x, grad_output): + m = torch.ones_like(x) * ((x >= -3.) & (x <= 3.)) / 6. + return grad_output * m + + +class HardSigmoidJitAutoFn(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return hard_sigmoid_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return hard_sigmoid_jit_bwd(x, grad_output) + + +def hard_sigmoid_me(x, inplace: bool = False): + return HardSigmoidJitAutoFn.apply(x) + + +class HardSigmoidMe(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSigmoidMe, self).__init__() + + def forward(self, x): + return HardSigmoidJitAutoFn.apply(x) + + +@torch.jit.script +def hard_swish_jit_fwd(x): + return x * (x + 3).clamp(min=0, max=6).div(6.) + + +@torch.jit.script +def hard_swish_jit_bwd(x, grad_output): + m = torch.ones_like(x) * (x >= 3.) + m = torch.where((x >= -3.) & (x <= 3.), x / 3. + .5, m) + return grad_output * m + + +class HardSwishJitAutoFn(torch.autograd.Function): + """A memory efficient, jit-scripted HardSwish activation""" + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return hard_swish_jit_fwd(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + return hard_swish_jit_bwd(x, grad_output) + + +def hard_swish_me(x, inplace=False): + return HardSwishJitAutoFn.apply(x) + + +class HardSwishMe(nn.Module): + def __init__(self, inplace: bool = False): + super(HardSwishMe, self).__init__() + + def forward(self, x): + return HardSwishJitAutoFn.apply(x) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/config.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/config.py new file mode 100644 index 0000000000000000000000000000000000000000..27d5307fd9ee0246f1e35f41520f17385d23f1dd --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/config.py @@ -0,0 +1,123 @@ +""" Global layer config state +""" +from typing import Any, Optional + +__all__ = [ + 'is_exportable', 'is_scriptable', 'is_no_jit', 'layer_config_kwargs', + 'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config' +] + +# Set to True if prefer to have layers with no jit optimization (includes activations) +_NO_JIT = False + +# Set to True if prefer to have activation layers with no jit optimization +# NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying +# the jit flags so far are activations. This will change as more layers are updated and/or added. +_NO_ACTIVATION_JIT = False + +# Set to True if exporting a model with Same padding via ONNX +_EXPORTABLE = False + +# Set to True if wanting to use torch.jit.script on a model +_SCRIPTABLE = False + + +def is_no_jit(): + return _NO_JIT + + +class set_no_jit: + def __init__(self, mode: bool) -> None: + global _NO_JIT + self.prev = _NO_JIT + _NO_JIT = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _NO_JIT + _NO_JIT = self.prev + return False + + +def is_exportable(): + return _EXPORTABLE + + +class set_exportable: + def __init__(self, mode: bool) -> None: + global _EXPORTABLE + self.prev = _EXPORTABLE + _EXPORTABLE = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _EXPORTABLE + _EXPORTABLE = self.prev + return False + + +def is_scriptable(): + return _SCRIPTABLE + + +class set_scriptable: + def __init__(self, mode: bool) -> None: + global _SCRIPTABLE + self.prev = _SCRIPTABLE + _SCRIPTABLE = mode + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _SCRIPTABLE + _SCRIPTABLE = self.prev + return False + + +class set_layer_config: + """ Layer config context manager that allows setting all layer config flags at once. + If a flag arg is None, it will not change the current value. + """ + def __init__( + self, + scriptable: Optional[bool] = None, + exportable: Optional[bool] = None, + no_jit: Optional[bool] = None, + no_activation_jit: Optional[bool] = None): + global _SCRIPTABLE + global _EXPORTABLE + global _NO_JIT + global _NO_ACTIVATION_JIT + self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT + if scriptable is not None: + _SCRIPTABLE = scriptable + if exportable is not None: + _EXPORTABLE = exportable + if no_jit is not None: + _NO_JIT = no_jit + if no_activation_jit is not None: + _NO_ACTIVATION_JIT = no_activation_jit + + def __enter__(self) -> None: + pass + + def __exit__(self, *args: Any) -> bool: + global _SCRIPTABLE + global _EXPORTABLE + global _NO_JIT + global _NO_ACTIVATION_JIT + _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev + return False + + +def layer_config_kwargs(kwargs): + """ Consume config kwargs and return contextmgr obj """ + return set_layer_config( + scriptable=kwargs.pop('scriptable', None), + exportable=kwargs.pop('exportable', None), + no_jit=kwargs.pop('no_jit', None)) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/conv2d_layers.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/conv2d_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..d8467460c4b36e54c83ce2dcd3ebe91d3432cad2 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/conv2d_layers.py @@ -0,0 +1,304 @@ +""" Conv2D w/ SAME padding, CondConv, MixedConv + +A collection of conv layers and padding helpers needed by EfficientNet, MixNet, and +MobileNetV3 models that maintain weight compatibility with original Tensorflow models. + +Copyright 2020 Ross Wightman +""" +import collections.abc +import math +from functools import partial +from itertools import repeat +from typing import Tuple, Optional + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .config import * + + +# From PyTorch internals +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + return parse + + +_single = _ntuple(1) +_pair = _ntuple(2) +_triple = _ntuple(3) +_quadruple = _ntuple(4) + + +def _is_static_pad(kernel_size, stride=1, dilation=1, **_): + return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0 + + +def _get_padding(kernel_size, stride=1, dilation=1, **_): + padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 + return padding + + +def _calc_same_pad(i: int, k: int, s: int, d: int): + return max((-(i // -s) - 1) * s + (k - 1) * d + 1 - i, 0) + + +def _same_pad_arg(input_size, kernel_size, stride, dilation): + ih, iw = input_size + kh, kw = kernel_size + pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0]) + pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1]) + return [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] + + +def _split_channels(num_chan, num_groups): + split = [num_chan // num_groups for _ in range(num_groups)] + split[0] += num_chan - sum(split) + return split + + +def conv2d_same( + x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1), + padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1): + ih, iw = x.size()[-2:] + kh, kw = weight.size()[-2:] + pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0]) + pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1]) + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups) + + +class Conv2dSame(nn.Conv2d): + """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions + """ + + # pylint: disable=unused-argument + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + super(Conv2dSame, self).__init__( + in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + + def forward(self, x): + return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + +class Conv2dSameExport(nn.Conv2d): + """ ONNX export friendly Tensorflow like 'SAME' convolution wrapper for 2D convolutions + + NOTE: This does not currently work with torch.jit.script + """ + + # pylint: disable=unused-argument + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + super(Conv2dSameExport, self).__init__( + in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + self.pad = None + self.pad_input_size = (0, 0) + + def forward(self, x): + input_size = x.size()[-2:] + if self.pad is None: + pad_arg = _same_pad_arg(input_size, self.weight.size()[-2:], self.stride, self.dilation) + self.pad = nn.ZeroPad2d(pad_arg) + self.pad_input_size = input_size + + if self.pad is not None: + x = self.pad(x) + return F.conv2d( + x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + +def get_padding_value(padding, kernel_size, **kwargs): + dynamic = False + if isinstance(padding, str): + # for any string padding, the padding will be calculated for you, one of three ways + padding = padding.lower() + if padding == 'same': + # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact + if _is_static_pad(kernel_size, **kwargs): + # static case, no extra overhead + padding = _get_padding(kernel_size, **kwargs) + else: + # dynamic padding + padding = 0 + dynamic = True + elif padding == 'valid': + # 'VALID' padding, same as padding=0 + padding = 0 + else: + # Default to PyTorch style 'same'-ish symmetric padding + padding = _get_padding(kernel_size, **kwargs) + return padding, dynamic + + +def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): + padding = kwargs.pop('padding', '') + kwargs.setdefault('bias', False) + padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs) + if is_dynamic: + if is_exportable(): + assert not is_scriptable() + return Conv2dSameExport(in_chs, out_chs, kernel_size, **kwargs) + else: + return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs) + else: + return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs) + + +class MixedConv2d(nn.ModuleDict): + """ Mixed Grouped Convolution + Based on MDConv and GroupedConv in MixNet impl: + https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py + """ + + def __init__(self, in_channels, out_channels, kernel_size=3, + stride=1, padding='', dilation=1, depthwise=False, **kwargs): + super(MixedConv2d, self).__init__() + + kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size] + num_groups = len(kernel_size) + in_splits = _split_channels(in_channels, num_groups) + out_splits = _split_channels(out_channels, num_groups) + self.in_channels = sum(in_splits) + self.out_channels = sum(out_splits) + for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)): + conv_groups = out_ch if depthwise else 1 + self.add_module( + str(idx), + create_conv2d_pad( + in_ch, out_ch, k, stride=stride, + padding=padding, dilation=dilation, groups=conv_groups, **kwargs) + ) + self.splits = in_splits + + def forward(self, x): + x_split = torch.split(x, self.splits, 1) + x_out = [conv(x_split[i]) for i, conv in enumerate(self.values())] + x = torch.cat(x_out, 1) + return x + + +def get_condconv_initializer(initializer, num_experts, expert_shape): + def condconv_initializer(weight): + """CondConv initializer function.""" + num_params = np.prod(expert_shape) + if (len(weight.shape) != 2 or weight.shape[0] != num_experts or + weight.shape[1] != num_params): + raise (ValueError( + 'CondConv variables must have shape [num_experts, num_params]')) + for i in range(num_experts): + initializer(weight[i].view(expert_shape)) + return condconv_initializer + + +class CondConv2d(nn.Module): + """ Conditional Convolution + Inspired by: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/condconv/condconv_layers.py + + Grouped convolution hackery for parallel execution of the per-sample kernel filters inspired by this discussion: + https://github.com/pytorch/pytorch/issues/17983 + """ + __constants__ = ['bias', 'in_channels', 'out_channels', 'dynamic_padding'] + + def __init__(self, in_channels, out_channels, kernel_size=3, + stride=1, padding='', dilation=1, groups=1, bias=False, num_experts=4): + super(CondConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + padding_val, is_padding_dynamic = get_padding_value( + padding, kernel_size, stride=stride, dilation=dilation) + self.dynamic_padding = is_padding_dynamic # if in forward to work with torchscript + self.padding = _pair(padding_val) + self.dilation = _pair(dilation) + self.groups = groups + self.num_experts = num_experts + + self.weight_shape = (self.out_channels, self.in_channels // self.groups) + self.kernel_size + weight_num_param = 1 + for wd in self.weight_shape: + weight_num_param *= wd + self.weight = torch.nn.Parameter(torch.Tensor(self.num_experts, weight_num_param)) + + if bias: + self.bias_shape = (self.out_channels,) + self.bias = torch.nn.Parameter(torch.Tensor(self.num_experts, self.out_channels)) + else: + self.register_parameter('bias', None) + + self.reset_parameters() + + def reset_parameters(self): + init_weight = get_condconv_initializer( + partial(nn.init.kaiming_uniform_, a=math.sqrt(5)), self.num_experts, self.weight_shape) + init_weight(self.weight) + if self.bias is not None: + fan_in = np.prod(self.weight_shape[1:]) + bound = 1 / math.sqrt(fan_in) + init_bias = get_condconv_initializer( + partial(nn.init.uniform_, a=-bound, b=bound), self.num_experts, self.bias_shape) + init_bias(self.bias) + + def forward(self, x, routing_weights): + B, C, H, W = x.shape + weight = torch.matmul(routing_weights, self.weight) + new_weight_shape = (B * self.out_channels, self.in_channels // self.groups) + self.kernel_size + weight = weight.view(new_weight_shape) + bias = None + if self.bias is not None: + bias = torch.matmul(routing_weights, self.bias) + bias = bias.view(B * self.out_channels) + # move batch elements with channels so each batch element can be efficiently convolved with separate kernel + x = x.view(1, B * C, H, W) + if self.dynamic_padding: + out = conv2d_same( + x, weight, bias, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * B) + else: + out = F.conv2d( + x, weight, bias, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * B) + out = out.permute([1, 0, 2, 3]).view(B, self.out_channels, out.shape[-2], out.shape[-1]) + + # Literal port (from TF definition) + # x = torch.split(x, 1, 0) + # weight = torch.split(weight, 1, 0) + # if self.bias is not None: + # bias = torch.matmul(routing_weights, self.bias) + # bias = torch.split(bias, 1, 0) + # else: + # bias = [None] * B + # out = [] + # for xi, wi, bi in zip(x, weight, bias): + # wi = wi.view(*self.weight_shape) + # if bi is not None: + # bi = bi.view(*self.bias_shape) + # out.append(self.conv_fn( + # xi, wi, bi, stride=self.stride, padding=self.padding, + # dilation=self.dilation, groups=self.groups)) + # out = torch.cat(out, 0) + return out + + +def select_conv2d(in_chs, out_chs, kernel_size, **kwargs): + assert 'groups' not in kwargs # only use 'depthwise' bool arg + if isinstance(kernel_size, list): + assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently + # We're going to use only lists for defining the MixedConv2d kernel groups, + # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. + m = MixedConv2d(in_chs, out_chs, kernel_size, **kwargs) + else: + depthwise = kwargs.pop('depthwise', False) + groups = out_chs if depthwise else 1 + if 'num_experts' in kwargs and kwargs['num_experts'] > 0: + m = CondConv2d(in_chs, out_chs, kernel_size, groups=groups, **kwargs) + else: + m = create_conv2d_pad(in_chs, out_chs, kernel_size, groups=groups, **kwargs) + return m diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/efficientnet_builder.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/efficientnet_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..95dd63d400e70d70664c5a433a2772363f865e61 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/efficientnet_builder.py @@ -0,0 +1,683 @@ +""" EfficientNet / MobileNetV3 Blocks and Builder + +Copyright 2020 Ross Wightman +""" +import re +from copy import deepcopy + +from .conv2d_layers import * +from geffnet.activations import * + +__all__ = ['get_bn_args_tf', 'resolve_bn_args', 'resolve_se_args', 'resolve_act_layer', 'make_divisible', + 'round_channels', 'drop_connect', 'SqueezeExcite', 'ConvBnAct', 'DepthwiseSeparableConv', + 'InvertedResidual', 'CondConvResidual', 'EdgeResidual', 'EfficientNetBuilder', 'decode_arch_def', + 'initialize_weight_default', 'initialize_weight_goog', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT' +] + +# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per +# papers and TF reference implementations. PT momentum equiv for TF decay is (1 - TF decay) +# NOTE: momentum varies btw .99 and .9997 depending on source +# .99 in official TF TPU impl +# .9997 (/w .999 in search space) for paper +# +# PyTorch defaults are momentum = .1, eps = 1e-5 +# +BN_MOMENTUM_TF_DEFAULT = 1 - 0.99 +BN_EPS_TF_DEFAULT = 1e-3 +_BN_ARGS_TF = dict(momentum=BN_MOMENTUM_TF_DEFAULT, eps=BN_EPS_TF_DEFAULT) + + +def get_bn_args_tf(): + return _BN_ARGS_TF.copy() + + +def resolve_bn_args(kwargs): + bn_args = get_bn_args_tf() if kwargs.pop('bn_tf', False) else {} + bn_momentum = kwargs.pop('bn_momentum', None) + if bn_momentum is not None: + bn_args['momentum'] = bn_momentum + bn_eps = kwargs.pop('bn_eps', None) + if bn_eps is not None: + bn_args['eps'] = bn_eps + return bn_args + + +_SE_ARGS_DEFAULT = dict( + gate_fn=sigmoid, + act_layer=None, # None == use containing block's activation layer + reduce_mid=False, + divisor=1) + + +def resolve_se_args(kwargs, in_chs, act_layer=None): + se_kwargs = kwargs.copy() if kwargs is not None else {} + # fill in args that aren't specified with the defaults + for k, v in _SE_ARGS_DEFAULT.items(): + se_kwargs.setdefault(k, v) + # some models, like MobilNetV3, calculate SE reduction chs from the containing block's mid_ch instead of in_ch + if not se_kwargs.pop('reduce_mid'): + se_kwargs['reduced_base_chs'] = in_chs + # act_layer override, if it remains None, the containing block's act_layer will be used + if se_kwargs['act_layer'] is None: + assert act_layer is not None + se_kwargs['act_layer'] = act_layer + return se_kwargs + + +def resolve_act_layer(kwargs, default='relu'): + act_layer = kwargs.pop('act_layer', default) + if isinstance(act_layer, str): + act_layer = get_act_layer(act_layer) + return act_layer + + +def make_divisible(v: int, divisor: int = 8, min_value: int = None): + min_value = min_value or divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: # ensure round down does not go down by more than 10%. + new_v += divisor + return new_v + + +def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None): + """Round number of filters based on depth multiplier.""" + if not multiplier: + return channels + channels *= multiplier + return make_divisible(channels, divisor, channel_min) + + +def drop_connect(inputs, training: bool = False, drop_connect_rate: float = 0.): + """Apply drop connect.""" + if not training: + return inputs + + keep_prob = 1 - drop_connect_rate + random_tensor = keep_prob + torch.rand( + (inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device) + random_tensor.floor_() # binarize + output = inputs.div(keep_prob) * random_tensor + return output + + +class SqueezeExcite(nn.Module): + + def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None, act_layer=nn.ReLU, gate_fn=sigmoid, divisor=1): + super(SqueezeExcite, self).__init__() + reduced_chs = make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor) + self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) + self.act1 = act_layer(inplace=True) + self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) + self.gate_fn = gate_fn + + def forward(self, x): + x_se = x.mean((2, 3), keepdim=True) + x_se = self.conv_reduce(x_se) + x_se = self.act1(x_se) + x_se = self.conv_expand(x_se) + x = x * self.gate_fn(x_se) + return x + + +class ConvBnAct(nn.Module): + def __init__(self, in_chs, out_chs, kernel_size, + stride=1, pad_type='', act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, norm_kwargs=None): + super(ConvBnAct, self).__init__() + assert stride in [1, 2] + norm_kwargs = norm_kwargs or {} + self.conv = select_conv2d(in_chs, out_chs, kernel_size, stride=stride, padding=pad_type) + self.bn1 = norm_layer(out_chs, **norm_kwargs) + self.act1 = act_layer(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn1(x) + x = self.act1(x) + return x + + +class DepthwiseSeparableConv(nn.Module): + """ DepthwiseSeparable block + Used for DS convs in MobileNet-V1 and in the place of IR blocks with an expansion + factor of 1.0. This is an alternative to having a IR with optional first pw conv. + """ + def __init__(self, in_chs, out_chs, dw_kernel_size=3, + stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, + pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None, + norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.): + super(DepthwiseSeparableConv, self).__init__() + assert stride in [1, 2] + norm_kwargs = norm_kwargs or {} + self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip + self.drop_connect_rate = drop_connect_rate + + self.conv_dw = select_conv2d( + in_chs, in_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True) + self.bn1 = norm_layer(in_chs, **norm_kwargs) + self.act1 = act_layer(inplace=True) + + # Squeeze-and-excitation + if se_ratio is not None and se_ratio > 0.: + se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer) + self.se = SqueezeExcite(in_chs, se_ratio=se_ratio, **se_kwargs) + else: + self.se = nn.Identity() + + self.conv_pw = select_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type) + self.bn2 = norm_layer(out_chs, **norm_kwargs) + self.act2 = act_layer(inplace=True) if pw_act else nn.Identity() + + def forward(self, x): + residual = x + + x = self.conv_dw(x) + x = self.bn1(x) + x = self.act1(x) + + x = self.se(x) + + x = self.conv_pw(x) + x = self.bn2(x) + x = self.act2(x) + + if self.has_residual: + if self.drop_connect_rate > 0.: + x = drop_connect(x, self.training, self.drop_connect_rate) + x += residual + return x + + +class InvertedResidual(nn.Module): + """ Inverted residual block w/ optional SE""" + + def __init__(self, in_chs, out_chs, dw_kernel_size=3, + stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, + exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, + se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, + conv_kwargs=None, drop_connect_rate=0.): + super(InvertedResidual, self).__init__() + norm_kwargs = norm_kwargs or {} + conv_kwargs = conv_kwargs or {} + mid_chs: int = make_divisible(in_chs * exp_ratio) + self.has_residual = (in_chs == out_chs and stride == 1) and not noskip + self.drop_connect_rate = drop_connect_rate + + # Point-wise expansion + self.conv_pw = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs) + self.bn1 = norm_layer(mid_chs, **norm_kwargs) + self.act1 = act_layer(inplace=True) + + # Depth-wise convolution + self.conv_dw = select_conv2d( + mid_chs, mid_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True, **conv_kwargs) + self.bn2 = norm_layer(mid_chs, **norm_kwargs) + self.act2 = act_layer(inplace=True) + + # Squeeze-and-excitation + if se_ratio is not None and se_ratio > 0.: + se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer) + self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs) + else: + self.se = nn.Identity() # for jit.script compat + + # Point-wise linear projection + self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs) + self.bn3 = norm_layer(out_chs, **norm_kwargs) + + def forward(self, x): + residual = x + + # Point-wise expansion + x = self.conv_pw(x) + x = self.bn1(x) + x = self.act1(x) + + # Depth-wise convolution + x = self.conv_dw(x) + x = self.bn2(x) + x = self.act2(x) + + # Squeeze-and-excitation + x = self.se(x) + + # Point-wise linear projection + x = self.conv_pwl(x) + x = self.bn3(x) + + if self.has_residual: + if self.drop_connect_rate > 0.: + x = drop_connect(x, self.training, self.drop_connect_rate) + x += residual + return x + + +class CondConvResidual(InvertedResidual): + """ Inverted residual block w/ CondConv routing""" + + def __init__(self, in_chs, out_chs, dw_kernel_size=3, + stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, + exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, + se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, + num_experts=0, drop_connect_rate=0.): + + self.num_experts = num_experts + conv_kwargs = dict(num_experts=self.num_experts) + + super(CondConvResidual, self).__init__( + in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, pad_type=pad_type, + act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size, + pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs, + norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs, + drop_connect_rate=drop_connect_rate) + + self.routing_fn = nn.Linear(in_chs, self.num_experts) + + def forward(self, x): + residual = x + + # CondConv routing + pooled_inputs = F.adaptive_avg_pool2d(x, 1).flatten(1) + routing_weights = torch.sigmoid(self.routing_fn(pooled_inputs)) + + # Point-wise expansion + x = self.conv_pw(x, routing_weights) + x = self.bn1(x) + x = self.act1(x) + + # Depth-wise convolution + x = self.conv_dw(x, routing_weights) + x = self.bn2(x) + x = self.act2(x) + + # Squeeze-and-excitation + x = self.se(x) + + # Point-wise linear projection + x = self.conv_pwl(x, routing_weights) + x = self.bn3(x) + + if self.has_residual: + if self.drop_connect_rate > 0.: + x = drop_connect(x, self.training, self.drop_connect_rate) + x += residual + return x + + +class EdgeResidual(nn.Module): + """ EdgeTPU Residual block with expansion convolution followed by pointwise-linear w/ stride""" + + def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0, + stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1, + se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.): + super(EdgeResidual, self).__init__() + norm_kwargs = norm_kwargs or {} + mid_chs = make_divisible(fake_in_chs * exp_ratio) if fake_in_chs > 0 else make_divisible(in_chs * exp_ratio) + self.has_residual = (in_chs == out_chs and stride == 1) and not noskip + self.drop_connect_rate = drop_connect_rate + + # Expansion convolution + self.conv_exp = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type) + self.bn1 = norm_layer(mid_chs, **norm_kwargs) + self.act1 = act_layer(inplace=True) + + # Squeeze-and-excitation + if se_ratio is not None and se_ratio > 0.: + se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer) + self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs) + else: + self.se = nn.Identity() + + # Point-wise linear projection + self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, stride=stride, padding=pad_type) + self.bn2 = nn.BatchNorm2d(out_chs, **norm_kwargs) + + def forward(self, x): + residual = x + + # Expansion convolution + x = self.conv_exp(x) + x = self.bn1(x) + x = self.act1(x) + + # Squeeze-and-excitation + x = self.se(x) + + # Point-wise linear projection + x = self.conv_pwl(x) + x = self.bn2(x) + + if self.has_residual: + if self.drop_connect_rate > 0.: + x = drop_connect(x, self.training, self.drop_connect_rate) + x += residual + + return x + + +class EfficientNetBuilder: + """ Build Trunk Blocks for Efficient/Mobile Networks + + This ended up being somewhat of a cross between + https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py + and + https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py + + """ + + def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None, + pad_type='', act_layer=None, se_kwargs=None, + norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.): + self.channel_multiplier = channel_multiplier + self.channel_divisor = channel_divisor + self.channel_min = channel_min + self.pad_type = pad_type + self.act_layer = act_layer + self.se_kwargs = se_kwargs + self.norm_layer = norm_layer + self.norm_kwargs = norm_kwargs + self.drop_connect_rate = drop_connect_rate + + # updated during build + self.in_chs = None + self.block_idx = 0 + self.block_count = 0 + + def _round_channels(self, chs): + return round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min) + + def _make_block(self, ba): + bt = ba.pop('block_type') + ba['in_chs'] = self.in_chs + ba['out_chs'] = self._round_channels(ba['out_chs']) + if 'fake_in_chs' in ba and ba['fake_in_chs']: + # FIXME this is a hack to work around mismatch in origin impl input filters for EdgeTPU + ba['fake_in_chs'] = self._round_channels(ba['fake_in_chs']) + ba['norm_layer'] = self.norm_layer + ba['norm_kwargs'] = self.norm_kwargs + ba['pad_type'] = self.pad_type + # block act fn overrides the model default + ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer + assert ba['act_layer'] is not None + if bt == 'ir': + ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count + ba['se_kwargs'] = self.se_kwargs + if ba.get('num_experts', 0) > 0: + block = CondConvResidual(**ba) + else: + block = InvertedResidual(**ba) + elif bt == 'ds' or bt == 'dsa': + ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count + ba['se_kwargs'] = self.se_kwargs + block = DepthwiseSeparableConv(**ba) + elif bt == 'er': + ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count + ba['se_kwargs'] = self.se_kwargs + block = EdgeResidual(**ba) + elif bt == 'cn': + block = ConvBnAct(**ba) + else: + assert False, 'Uknkown block type (%s) while building model.' % bt + self.in_chs = ba['out_chs'] # update in_chs for arg of next block + return block + + def _make_stack(self, stack_args): + blocks = [] + # each stack (stage) contains a list of block arguments + for i, ba in enumerate(stack_args): + if i >= 1: + # only the first block in any stack can have a stride > 1 + ba['stride'] = 1 + block = self._make_block(ba) + blocks.append(block) + self.block_idx += 1 # incr global idx (across all stacks) + return nn.Sequential(*blocks) + + def __call__(self, in_chs, block_args): + """ Build the blocks + Args: + in_chs: Number of input-channels passed to first block + block_args: A list of lists, outer list defines stages, inner + list contains strings defining block configuration(s) + Return: + List of block stacks (each stack wrapped in nn.Sequential) + """ + self.in_chs = in_chs + self.block_count = sum([len(x) for x in block_args]) + self.block_idx = 0 + blocks = [] + # outer list of block_args defines the stacks ('stages' by some conventions) + for stack_idx, stack in enumerate(block_args): + assert isinstance(stack, list) + stack = self._make_stack(stack) + blocks.append(stack) + return blocks + + +def _parse_ksize(ss): + if ss.isdigit(): + return int(ss) + else: + return [int(k) for k in ss.split('.')] + + +def _decode_block_str(block_str): + """ Decode block definition string + + Gets a list of block arg (dicts) through a string notation of arguments. + E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip + + All args can exist in any order with the exception of the leading string which + is assumed to indicate the block type. + + leading string - block type ( + ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct) + r - number of repeat blocks, + k - kernel size, + s - strides (1-9), + e - expansion ratio, + c - output channels, + se - squeeze/excitation ratio + n - activation fn ('re', 'r6', 'hs', or 'sw') + Args: + block_str: a string representation of block arguments. + Returns: + A list of block args (dicts) + Raises: + ValueError: if the string def not properly specified (TODO) + """ + assert isinstance(block_str, str) + ops = block_str.split('_') + block_type = ops[0] # take the block type off the front + ops = ops[1:] + options = {} + noskip = False + for op in ops: + # string options being checked on individual basis, combine if they grow + if op == 'noskip': + noskip = True + elif op.startswith('n'): + # activation fn + key = op[0] + v = op[1:] + if v == 're': + value = get_act_layer('relu') + elif v == 'r6': + value = get_act_layer('relu6') + elif v == 'hs': + value = get_act_layer('hard_swish') + elif v == 'sw': + value = get_act_layer('swish') + else: + continue + options[key] = value + else: + # all numeric options + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # if act_layer is None, the model default (passed to model init) will be used + act_layer = options['n'] if 'n' in options else None + exp_kernel_size = _parse_ksize(options['a']) if 'a' in options else 1 + pw_kernel_size = _parse_ksize(options['p']) if 'p' in options else 1 + fake_in_chs = int(options['fc']) if 'fc' in options else 0 # FIXME hack to deal with in_chs issue in TPU def + + num_repeat = int(options['r']) + # each type of block has different valid arguments, fill accordingly + if block_type == 'ir': + block_args = dict( + block_type=block_type, + dw_kernel_size=_parse_ksize(options['k']), + exp_kernel_size=exp_kernel_size, + pw_kernel_size=pw_kernel_size, + out_chs=int(options['c']), + exp_ratio=float(options['e']), + se_ratio=float(options['se']) if 'se' in options else None, + stride=int(options['s']), + act_layer=act_layer, + noskip=noskip, + ) + if 'cc' in options: + block_args['num_experts'] = int(options['cc']) + elif block_type == 'ds' or block_type == 'dsa': + block_args = dict( + block_type=block_type, + dw_kernel_size=_parse_ksize(options['k']), + pw_kernel_size=pw_kernel_size, + out_chs=int(options['c']), + se_ratio=float(options['se']) if 'se' in options else None, + stride=int(options['s']), + act_layer=act_layer, + pw_act=block_type == 'dsa', + noskip=block_type == 'dsa' or noskip, + ) + elif block_type == 'er': + block_args = dict( + block_type=block_type, + exp_kernel_size=_parse_ksize(options['k']), + pw_kernel_size=pw_kernel_size, + out_chs=int(options['c']), + exp_ratio=float(options['e']), + fake_in_chs=fake_in_chs, + se_ratio=float(options['se']) if 'se' in options else None, + stride=int(options['s']), + act_layer=act_layer, + noskip=noskip, + ) + elif block_type == 'cn': + block_args = dict( + block_type=block_type, + kernel_size=int(options['k']), + out_chs=int(options['c']), + stride=int(options['s']), + act_layer=act_layer, + ) + else: + assert False, 'Unknown block type (%s)' % block_type + + return block_args, num_repeat + + +def _scale_stage_depth(stack_args, repeats, depth_multiplier=1.0, depth_trunc='ceil'): + """ Per-stage depth scaling + Scales the block repeats in each stage. This depth scaling impl maintains + compatibility with the EfficientNet scaling method, while allowing sensible + scaling for other models that may have multiple block arg definitions in each stage. + """ + + # We scale the total repeat count for each stage, there may be multiple + # block arg defs per stage so we need to sum. + num_repeat = sum(repeats) + if depth_trunc == 'round': + # Truncating to int by rounding allows stages with few repeats to remain + # proportionally smaller for longer. This is a good choice when stage definitions + # include single repeat stages that we'd prefer to keep that way as long as possible + num_repeat_scaled = max(1, round(num_repeat * depth_multiplier)) + else: + # The default for EfficientNet truncates repeats to int via 'ceil'. + # Any multiplier > 1.0 will result in an increased depth for every stage. + num_repeat_scaled = int(math.ceil(num_repeat * depth_multiplier)) + + # Proportionally distribute repeat count scaling to each block definition in the stage. + # Allocation is done in reverse as it results in the first block being less likely to be scaled. + # The first block makes less sense to repeat in most of the arch definitions. + repeats_scaled = [] + for r in repeats[::-1]: + rs = max(1, round((r / num_repeat * num_repeat_scaled))) + repeats_scaled.append(rs) + num_repeat -= r + num_repeat_scaled -= rs + repeats_scaled = repeats_scaled[::-1] + + # Apply the calculated scaling to each block arg in the stage + sa_scaled = [] + for ba, rep in zip(stack_args, repeats_scaled): + sa_scaled.extend([deepcopy(ba) for _ in range(rep)]) + return sa_scaled + + +def decode_arch_def(arch_def, depth_multiplier=1.0, depth_trunc='ceil', experts_multiplier=1, fix_first_last=False): + arch_args = [] + for stack_idx, block_strings in enumerate(arch_def): + assert isinstance(block_strings, list) + stack_args = [] + repeats = [] + for block_str in block_strings: + assert isinstance(block_str, str) + ba, rep = _decode_block_str(block_str) + if ba.get('num_experts', 0) > 0 and experts_multiplier > 1: + ba['num_experts'] *= experts_multiplier + stack_args.append(ba) + repeats.append(rep) + if fix_first_last and (stack_idx == 0 or stack_idx == len(arch_def) - 1): + arch_args.append(_scale_stage_depth(stack_args, repeats, 1.0, depth_trunc)) + else: + arch_args.append(_scale_stage_depth(stack_args, repeats, depth_multiplier, depth_trunc)) + return arch_args + + +def initialize_weight_goog(m, n='', fix_group_fanout=True): + # weight init as per Tensorflow Official impl + # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py + if isinstance(m, CondConv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + if fix_group_fanout: + fan_out //= m.groups + init_weight_fn = get_condconv_initializer( + lambda w: w.data.normal_(0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape) + init_weight_fn(m.weight) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + if fix_group_fanout: + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1.0) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + fan_out = m.weight.size(0) # fan-out + fan_in = 0 + if 'routing_fn' in n: + fan_in = m.weight.size(1) + init_range = 1.0 / math.sqrt(fan_in + fan_out) + m.weight.data.uniform_(-init_range, init_range) + m.bias.data.zero_() + + +def initialize_weight_default(m, n=''): + if isinstance(m, CondConv2d): + init_fn = get_condconv_initializer(partial( + nn.init.kaiming_normal_, mode='fan_out', nonlinearity='relu'), m.num_experts, m.weight_shape) + init_fn(m.weight) + elif isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1.0) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='linear') diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/gen_efficientnet.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/gen_efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..cd170d4cc5bed6ca82b61539902b470d3320c691 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/gen_efficientnet.py @@ -0,0 +1,1450 @@ +""" Generic Efficient Networks + +A generic MobileNet class with building blocks to support a variety of models: + +* EfficientNet (B0-B8, L2 + Tensorflow pretrained AutoAug/RandAug/AdvProp/NoisyStudent ports) + - EfficientNet: Rethinking Model Scaling for CNNs - https://arxiv.org/abs/1905.11946 + - CondConv: Conditionally Parameterized Convolutions for Efficient Inference - https://arxiv.org/abs/1904.04971 + - Adversarial Examples Improve Image Recognition - https://arxiv.org/abs/1911.09665 + - Self-training with Noisy Student improves ImageNet classification - https://arxiv.org/abs/1911.04252 + +* EfficientNet-Lite + +* MixNet (Small, Medium, and Large) + - MixConv: Mixed Depthwise Convolutional Kernels - https://arxiv.org/abs/1907.09595 + +* MNasNet B1, A1 (SE), Small + - MnasNet: Platform-Aware Neural Architecture Search for Mobile - https://arxiv.org/abs/1807.11626 + +* FBNet-C + - FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable NAS - https://arxiv.org/abs/1812.03443 + +* Single-Path NAS Pixel1 + - Single-Path NAS: Designing Hardware-Efficient ConvNets - https://arxiv.org/abs/1904.02877 + +* And likely more... + +Hacked together by / Copyright 2020 Ross Wightman +""" +import torch.nn as nn +import torch.nn.functional as F + +from .config import layer_config_kwargs, is_scriptable +from .conv2d_layers import select_conv2d +from .helpers import load_pretrained +from .efficientnet_builder import * + +__all__ = ['GenEfficientNet', 'mnasnet_050', 'mnasnet_075', 'mnasnet_100', 'mnasnet_b1', 'mnasnet_140', + 'semnasnet_050', 'semnasnet_075', 'semnasnet_100', 'mnasnet_a1', 'semnasnet_140', 'mnasnet_small', + 'mobilenetv2_100', 'mobilenetv2_140', 'mobilenetv2_110d', 'mobilenetv2_120d', + 'fbnetc_100', 'spnasnet_100', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', + 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b8', + 'efficientnet_l2', 'efficientnet_es', 'efficientnet_em', 'efficientnet_el', + 'efficientnet_cc_b0_4e', 'efficientnet_cc_b0_8e', 'efficientnet_cc_b1_8e', + 'efficientnet_lite0', 'efficientnet_lite1', 'efficientnet_lite2', 'efficientnet_lite3', 'efficientnet_lite4', + 'tf_efficientnet_b0', 'tf_efficientnet_b1', 'tf_efficientnet_b2', 'tf_efficientnet_b3', + 'tf_efficientnet_b4', 'tf_efficientnet_b5', 'tf_efficientnet_b6', 'tf_efficientnet_b7', 'tf_efficientnet_b8', + 'tf_efficientnet_b0_ap', 'tf_efficientnet_b1_ap', 'tf_efficientnet_b2_ap', 'tf_efficientnet_b3_ap', + 'tf_efficientnet_b4_ap', 'tf_efficientnet_b5_ap', 'tf_efficientnet_b6_ap', 'tf_efficientnet_b7_ap', + 'tf_efficientnet_b8_ap', 'tf_efficientnet_b0_ns', 'tf_efficientnet_b1_ns', 'tf_efficientnet_b2_ns', + 'tf_efficientnet_b3_ns', 'tf_efficientnet_b4_ns', 'tf_efficientnet_b5_ns', 'tf_efficientnet_b6_ns', + 'tf_efficientnet_b7_ns', 'tf_efficientnet_l2_ns', 'tf_efficientnet_l2_ns_475', + 'tf_efficientnet_es', 'tf_efficientnet_em', 'tf_efficientnet_el', + 'tf_efficientnet_cc_b0_4e', 'tf_efficientnet_cc_b0_8e', 'tf_efficientnet_cc_b1_8e', + 'tf_efficientnet_lite0', 'tf_efficientnet_lite1', 'tf_efficientnet_lite2', 'tf_efficientnet_lite3', + 'tf_efficientnet_lite4', + 'mixnet_s', 'mixnet_m', 'mixnet_l', 'mixnet_xl', 'tf_mixnet_s', 'tf_mixnet_m', 'tf_mixnet_l'] + + +model_urls = { + 'mnasnet_050': None, + 'mnasnet_075': None, + 'mnasnet_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_b1-74cb7081.pth', + 'mnasnet_140': None, + 'mnasnet_small': None, + + 'semnasnet_050': None, + 'semnasnet_075': None, + 'semnasnet_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_a1-d9418771.pth', + 'semnasnet_140': None, + + 'mobilenetv2_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_100_ra-b33bc2c4.pth', + 'mobilenetv2_110d': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_110d_ra-77090ade.pth', + 'mobilenetv2_120d': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_120d_ra-5987e2ed.pth', + 'mobilenetv2_140': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_140_ra-21a4e913.pth', + + 'fbnetc_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetc_100-c345b898.pth', + 'spnasnet_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/spnasnet_100-048bc3f4.pth', + + 'efficientnet_b0': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b0_ra-3dd342df.pth', + 'efficientnet_b1': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth', + 'efficientnet_b2': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b2_ra-bcdf34b7.pth', + 'efficientnet_b3': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth', + 'efficientnet_b4': None, + 'efficientnet_b5': None, + 'efficientnet_b6': None, + 'efficientnet_b7': None, + 'efficientnet_b8': None, + 'efficientnet_l2': None, + + 'efficientnet_es': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_es_ra-f111e99c.pth', + 'efficientnet_em': None, + 'efficientnet_el': None, + + 'efficientnet_cc_b0_4e': None, + 'efficientnet_cc_b0_8e': None, + 'efficientnet_cc_b1_8e': None, + + 'efficientnet_lite0': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_lite0_ra-37913777.pth', + 'efficientnet_lite1': None, + 'efficientnet_lite2': None, + 'efficientnet_lite3': None, + 'efficientnet_lite4': None, + + 'tf_efficientnet_b0': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_aa-827b6e33.pth', + 'tf_efficientnet_b1': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_aa-ea7a6ee0.pth', + 'tf_efficientnet_b2': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_aa-60c94f97.pth', + 'tf_efficientnet_b3': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_aa-84b4657e.pth', + 'tf_efficientnet_b4': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_aa-818f208c.pth', + 'tf_efficientnet_b5': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ra-9a3e5369.pth', + 'tf_efficientnet_b6': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_aa-80ba17e4.pth', + 'tf_efficientnet_b7': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ra-6c08e654.pth', + 'tf_efficientnet_b8': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ra-572d5dd9.pth', + + 'tf_efficientnet_b0_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ap-f262efe1.pth', + 'tf_efficientnet_b1_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ap-44ef0a3d.pth', + 'tf_efficientnet_b2_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ap-2f8e7636.pth', + 'tf_efficientnet_b3_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ap-aad25bdd.pth', + 'tf_efficientnet_b4_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ap-dedb23e6.pth', + 'tf_efficientnet_b5_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth', + 'tf_efficientnet_b6_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ap-4ffb161f.pth', + 'tf_efficientnet_b7_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ap-ddb28fec.pth', + 'tf_efficientnet_b8_ap': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth', + + 'tf_efficientnet_b0_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth', + 'tf_efficientnet_b1_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ns-99dd0c41.pth', + 'tf_efficientnet_b2_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ns-00306e48.pth', + 'tf_efficientnet_b3_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ns-9d44bf68.pth', + 'tf_efficientnet_b4_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ns-d6313a46.pth', + 'tf_efficientnet_b5_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth', + 'tf_efficientnet_b6_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ns-51548356.pth', + 'tf_efficientnet_b7_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth', + 'tf_efficientnet_l2_ns_475': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns_475-bebbd00a.pth', + 'tf_efficientnet_l2_ns': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns-df73bb44.pth', + + 'tf_efficientnet_es': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_es-ca1afbfe.pth', + 'tf_efficientnet_em': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_em-e78cfe58.pth', + 'tf_efficientnet_el': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_el-5143854e.pth', + + 'tf_efficientnet_cc_b0_4e': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_4e-4362b6b2.pth', + 'tf_efficientnet_cc_b0_8e': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_8e-66184a25.pth', + 'tf_efficientnet_cc_b1_8e': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b1_8e-f7c79ae1.pth', + + 'tf_efficientnet_lite0': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite0-0aa007d2.pth', + 'tf_efficientnet_lite1': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite1-bde8b488.pth', + 'tf_efficientnet_lite2': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite2-dcccb7df.pth', + 'tf_efficientnet_lite3': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth', + 'tf_efficientnet_lite4': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite4-741542c3.pth', + + 'mixnet_s': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_s-a907afbc.pth', + 'mixnet_m': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_m-4647fc68.pth', + 'mixnet_l': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_l-5a9a2ed8.pth', + 'mixnet_xl': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_xl_ra-aac3c00c.pth', + + 'tf_mixnet_s': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_s-89d3354b.pth', + 'tf_mixnet_m': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_m-0f4d8805.pth', + 'tf_mixnet_l': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_l-6c92e0c8.pth', +} + + +class GenEfficientNet(nn.Module): + """ Generic EfficientNets + + An implementation of mobile optimized networks that covers: + * EfficientNet (B0-B8, L2, CondConv, EdgeTPU) + * MixNet (Small, Medium, and Large, XL) + * MNASNet A1, B1, and small + * FBNet C + * Single-Path NAS Pixel1 + """ + + def __init__(self, block_args, num_classes=1000, in_chans=3, num_features=1280, stem_size=32, fix_stem=False, + channel_multiplier=1.0, channel_divisor=8, channel_min=None, + pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., + se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, + weight_init='goog'): + super(GenEfficientNet, self).__init__() + self.drop_rate = drop_rate + + if not fix_stem: + stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min) + self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type) + self.bn1 = norm_layer(stem_size, **norm_kwargs) + self.act1 = act_layer(inplace=True) + in_chs = stem_size + + builder = EfficientNetBuilder( + channel_multiplier, channel_divisor, channel_min, + pad_type, act_layer, se_kwargs, norm_layer, norm_kwargs, drop_connect_rate) + self.blocks = nn.Sequential(*builder(in_chs, block_args)) + in_chs = builder.in_chs + + self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type) + self.bn2 = norm_layer(num_features, **norm_kwargs) + self.act2 = act_layer(inplace=True) + self.global_pool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(num_features, num_classes) + + for n, m in self.named_modules(): + if weight_init == 'goog': + initialize_weight_goog(m, n) + else: + initialize_weight_default(m, n) + + def features(self, x): + x = self.conv_stem(x) + x = self.bn1(x) + x = self.act1(x) + x = self.blocks(x) + x = self.conv_head(x) + x = self.bn2(x) + x = self.act2(x) + return x + + def as_sequential(self): + layers = [self.conv_stem, self.bn1, self.act1] + layers.extend(self.blocks) + layers.extend([ + self.conv_head, self.bn2, self.act2, + self.global_pool, nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier]) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.features(x) + x = self.global_pool(x) + x = x.flatten(1) + if self.drop_rate > 0.: + x = F.dropout(x, p=self.drop_rate, training=self.training) + return self.classifier(x) + + +def _create_model(model_kwargs, variant, pretrained=False): + as_sequential = model_kwargs.pop('as_sequential', False) + model = GenEfficientNet(**model_kwargs) + if pretrained: + load_pretrained(model, model_urls[variant]) + if as_sequential: + model = model.as_sequential() + return model + + +def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-a1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r2_k3_s2_e6_c24'], + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40_se0.25'], + # stage 3, 28x28 in + ['ir_r4_k3_s2_e6_c80'], + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c112_se0.25'], + # stage 5, 14x14in + ['ir_r3_k5_s2_e6_c160_se0.25'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-b1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r3_k3_s2_e3_c24'], + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40'], + # stage 3, 28x28 in + ['ir_r3_k5_s2_e6_c80'], + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c96'], + # stage 5, 14x14in + ['ir_r4_k5_s2_e6_c192'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320_noskip'] + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a mnasnet-b1 model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet + Paper: https://arxiv.org/pdf/1807.11626.pdf. + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + ['ds_r1_k3_s1_c8'], + ['ir_r1_k3_s2_e3_c16'], + ['ir_r2_k3_s2_e6_c16'], + ['ir_r4_k5_s2_e6_c32_se0.25'], + ['ir_r3_k3_s1_e6_c32_se0.25'], + ['ir_r3_k5_s2_e6_c88_se0.25'], + ['ir_r1_k3_s1_e6_c144'] + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=8, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mobilenet_v2( + variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs): + """ Generate MobileNet-V2 network + Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py + Paper: https://arxiv.org/abs/1801.04381 + """ + arch_def = [ + ['ds_r1_k3_s1_c16'], + ['ir_r2_k3_s2_e6_c24'], + ['ir_r3_k3_s2_e6_c32'], + ['ir_r4_k3_s2_e6_c64'], + ['ir_r3_k3_s1_e6_c96'], + ['ir_r3_k3_s2_e6_c160'], + ['ir_r1_k3_s1_e6_c320'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head), + num_features=1280 if fix_stem_head else round_channels(1280, channel_multiplier, 8, None), + stem_size=32, + fix_stem=fix_stem_head, + channel_multiplier=channel_multiplier, + norm_kwargs=resolve_bn_args(kwargs), + act_layer=nn.ReLU6, + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """ FBNet-C + + Paper: https://arxiv.org/abs/1812.03443 + Ref Impl: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py + + NOTE: the impl above does not relate to the 'C' variant here, that was derived from paper, + it was used to confirm some building block details + """ + arch_def = [ + ['ir_r1_k3_s1_e1_c16'], + ['ir_r1_k3_s2_e6_c24', 'ir_r2_k3_s1_e1_c24'], + ['ir_r1_k5_s2_e6_c32', 'ir_r1_k5_s1_e3_c32', 'ir_r1_k5_s1_e6_c32', 'ir_r1_k3_s1_e6_c32'], + ['ir_r1_k5_s2_e6_c64', 'ir_r1_k5_s1_e3_c64', 'ir_r2_k5_s1_e6_c64'], + ['ir_r3_k5_s1_e6_c112', 'ir_r1_k5_s1_e3_c112'], + ['ir_r4_k5_s2_e6_c184'], + ['ir_r1_k3_s1_e6_c352'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=16, + num_features=1984, # paper suggests this, but is not 100% clear + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates the Single-Path NAS model from search targeted for Pixel1 phone. + + Paper: https://arxiv.org/abs/1904.02877 + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_c16_noskip'], + # stage 1, 112x112 in + ['ir_r3_k3_s2_e3_c24'], + # stage 2, 56x56 in + ['ir_r1_k5_s2_e6_c40', 'ir_r3_k3_s1_e3_c40'], + # stage 3, 28x28 in + ['ir_r1_k5_s2_e6_c80', 'ir_r3_k3_s1_e3_c80'], + # stage 4, 14x14in + ['ir_r1_k5_s1_e6_c96', 'ir_r3_k5_s1_e3_c96'], + # stage 5, 14x14in + ['ir_r4_k5_s2_e6_c192'], + # stage 6, 7x7 in + ['ir_r1_k3_s1_e6_c320_noskip'] + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """Creates an EfficientNet model. + + Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py + Paper: https://arxiv.org/abs/1905.11946 + + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + 'efficientnet-b8': (2.2, 3.6, 672, 0.5), + + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16_se0.25'], + ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], + ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25'], + ['ir_r4_k5_s2_e6_c192_se0.25'], + ['ir_r1_k3_s1_e6_c320_se0.25'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=round_channels(1280, channel_multiplier, 8, None), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'swish'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_efficientnet_edge(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + arch_def = [ + # NOTE `fc` is present to override a mismatch between stem channels and in chs not + # present in other models + ['er_r1_k3_s1_e4_c24_fc24_noskip'], + ['er_r2_k3_s2_e8_c32'], + ['er_r4_k3_s2_e8_c48'], + ['ir_r5_k5_s2_e8_c96'], + ['ir_r4_k5_s1_e8_c144'], + ['ir_r2_k5_s2_e8_c192'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier), + num_features=round_channels(1280, channel_multiplier, 8, None), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_efficientnet_condconv( + variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs): + """Creates an efficientnet-condconv model.""" + arch_def = [ + ['ds_r1_k3_s1_e1_c16_se0.25'], + ['ir_r2_k3_s2_e6_c24_se0.25'], + ['ir_r2_k5_s2_e6_c40_se0.25'], + ['ir_r3_k3_s2_e6_c80_se0.25'], + ['ir_r3_k5_s1_e6_c112_se0.25_cc4'], + ['ir_r4_k5_s2_e6_c192_se0.25_cc4'], + ['ir_r1_k3_s1_e6_c320_se0.25_cc4'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, experts_multiplier=experts_multiplier), + num_features=round_channels(1280, channel_multiplier, 8, None), + stem_size=32, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'swish'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """Creates an EfficientNet-Lite model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite + Paper: https://arxiv.org/abs/1905.11946 + + EfficientNet params + name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) + 'efficientnet-lite0': (1.0, 1.0, 224, 0.2), + 'efficientnet-lite1': (1.0, 1.1, 240, 0.2), + 'efficientnet-lite2': (1.1, 1.2, 260, 0.3), + 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), + 'efficientnet-lite4': (1.4, 1.8, 300, 0.3), + + Args: + channel_multiplier: multiplier to number of channels per layer + depth_multiplier: multiplier to number of repeats per stage + """ + arch_def = [ + ['ds_r1_k3_s1_e1_c16'], + ['ir_r2_k3_s2_e6_c24'], + ['ir_r2_k5_s2_e6_c40'], + ['ir_r3_k3_s2_e6_c80'], + ['ir_r3_k5_s1_e6_c112'], + ['ir_r4_k5_s2_e6_c192'], + ['ir_r1_k3_s1_e6_c320'], + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True), + num_features=1280, + stem_size=32, + fix_stem=True, + channel_multiplier=channel_multiplier, + act_layer=nn.ReLU6, + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MixNet Small model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet + Paper: https://arxiv.org/abs/1907.09595 + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16'], # relu + # stage 1, 112x112 in + ['ir_r1_k3_a1.1_p1.1_s2_e6_c24', 'ir_r1_k3_a1.1_p1.1_s1_e3_c24'], # relu + # stage 2, 56x56 in + ['ir_r1_k3.5.7_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish + # stage 3, 28x28 in + ['ir_r1_k3.5.7_p1.1_s2_e6_c80_se0.25_nsw', 'ir_r2_k3.5_p1.1_s1_e6_c80_se0.25_nsw'], # swish + # stage 4, 14x14in + ['ir_r1_k3.5.7_a1.1_p1.1_s1_e6_c120_se0.5_nsw', 'ir_r2_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish + # stage 5, 14x14in + ['ir_r1_k3.5.7.9.11_s2_e6_c200_se0.5_nsw', 'ir_r2_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish + # 7x7 + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + num_features=1536, + stem_size=16, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MixNet Medium-Large model. + + Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet + Paper: https://arxiv.org/abs/1907.09595 + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c24'], # relu + # stage 1, 112x112 in + ['ir_r1_k3.5.7_a1.1_p1.1_s2_e6_c32', 'ir_r1_k3_a1.1_p1.1_s1_e3_c32'], # relu + # stage 2, 56x56 in + ['ir_r1_k3.5.7.9_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish + # stage 3, 28x28 in + ['ir_r1_k3.5.7_s2_e6_c80_se0.25_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e6_c80_se0.25_nsw'], # swish + # stage 4, 14x14in + ['ir_r1_k3_s1_e6_c120_se0.5_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish + # stage 5, 14x14in + ['ir_r1_k3.5.7.9_s2_e6_c200_se0.5_nsw', 'ir_r3_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish + # 7x7 + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'), + num_features=1536, + stem_size=24, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'relu'), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def mnasnet_050(pretrained=False, **kwargs): + """ MNASNet B1, depth multiplier of 0.5. """ + model = _gen_mnasnet_b1('mnasnet_050', 0.5, pretrained=pretrained, **kwargs) + return model + + +def mnasnet_075(pretrained=False, **kwargs): + """ MNASNet B1, depth multiplier of 0.75. """ + model = _gen_mnasnet_b1('mnasnet_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def mnasnet_100(pretrained=False, **kwargs): + """ MNASNet B1, depth multiplier of 1.0. """ + model = _gen_mnasnet_b1('mnasnet_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mnasnet_b1(pretrained=False, **kwargs): + """ MNASNet B1, depth multiplier of 1.0. """ + return mnasnet_100(pretrained, **kwargs) + + +def mnasnet_140(pretrained=False, **kwargs): + """ MNASNet B1, depth multiplier of 1.4 """ + model = _gen_mnasnet_b1('mnasnet_140', 1.4, pretrained=pretrained, **kwargs) + return model + + +def semnasnet_050(pretrained=False, **kwargs): + """ MNASNet A1 (w/ SE), depth multiplier of 0.5 """ + model = _gen_mnasnet_a1('semnasnet_050', 0.5, pretrained=pretrained, **kwargs) + return model + + +def semnasnet_075(pretrained=False, **kwargs): + """ MNASNet A1 (w/ SE), depth multiplier of 0.75. """ + model = _gen_mnasnet_a1('semnasnet_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def semnasnet_100(pretrained=False, **kwargs): + """ MNASNet A1 (w/ SE), depth multiplier of 1.0. """ + model = _gen_mnasnet_a1('semnasnet_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mnasnet_a1(pretrained=False, **kwargs): + """ MNASNet A1 (w/ SE), depth multiplier of 1.0. """ + return semnasnet_100(pretrained, **kwargs) + + +def semnasnet_140(pretrained=False, **kwargs): + """ MNASNet A1 (w/ SE), depth multiplier of 1.4. """ + model = _gen_mnasnet_a1('semnasnet_140', 1.4, pretrained=pretrained, **kwargs) + return model + + +def mnasnet_small(pretrained=False, **kwargs): + """ MNASNet Small, depth multiplier of 1.0. """ + model = _gen_mnasnet_small('mnasnet_small', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv2_100(pretrained=False, **kwargs): + """ MobileNet V2 w/ 1.0 channel multiplier """ + model = _gen_mobilenet_v2('mobilenetv2_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv2_140(pretrained=False, **kwargs): + """ MobileNet V2 w/ 1.4 channel multiplier """ + model = _gen_mobilenet_v2('mobilenetv2_140', 1.4, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv2_110d(pretrained=False, **kwargs): + """ MobileNet V2 w/ 1.1 channel, 1.2 depth multipliers""" + model = _gen_mobilenet_v2( + 'mobilenetv2_110d', 1.1, depth_multiplier=1.2, fix_stem_head=True, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv2_120d(pretrained=False, **kwargs): + """ MobileNet V2 w/ 1.2 channel, 1.4 depth multipliers """ + model = _gen_mobilenet_v2( + 'mobilenetv2_120d', 1.2, depth_multiplier=1.4, fix_stem_head=True, pretrained=pretrained, **kwargs) + return model + + +def fbnetc_100(pretrained=False, **kwargs): + """ FBNet-C """ + if pretrained: + # pretrained model trained with non-default BN epsilon + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + model = _gen_fbnetc('fbnetc_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def spnasnet_100(pretrained=False, **kwargs): + """ Single-Path NAS Pixel1""" + model = _gen_spnasnet('spnasnet_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b0(pretrained=False, **kwargs): + """ EfficientNet-B0 """ + # NOTE for train set drop_rate=0.2, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b1(pretrained=False, **kwargs): + """ EfficientNet-B1 """ + # NOTE for train set drop_rate=0.2, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b2(pretrained=False, **kwargs): + """ EfficientNet-B2 """ + # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b3(pretrained=False, **kwargs): + """ EfficientNet-B3 """ + # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b4(pretrained=False, **kwargs): + """ EfficientNet-B4 """ + # NOTE for train set drop_rate=0.4, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b5(pretrained=False, **kwargs): + """ EfficientNet-B5 """ + # NOTE for train set drop_rate=0.4, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b6(pretrained=False, **kwargs): + """ EfficientNet-B6 """ + # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b7(pretrained=False, **kwargs): + """ EfficientNet-B7 """ + # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_b8(pretrained=False, **kwargs): + """ EfficientNet-B8 """ + # NOTE for train set drop_rate=0.5, drop_connect_rate=0.2 + model = _gen_efficientnet( + 'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_l2(pretrained=False, **kwargs): + """ EfficientNet-L2. """ + # NOTE for train, drop_rate should be 0.5 + model = _gen_efficientnet( + 'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_es(pretrained=False, **kwargs): + """ EfficientNet-Edge Small. """ + model = _gen_efficientnet_edge( + 'efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_em(pretrained=False, **kwargs): + """ EfficientNet-Edge-Medium. """ + model = _gen_efficientnet_edge( + 'efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_el(pretrained=False, **kwargs): + """ EfficientNet-Edge-Large. """ + model = _gen_efficientnet_edge( + 'efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_cc_b0_4e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B0 w/ 8 Experts """ + # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2 + model = _gen_efficientnet_condconv( + 'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_cc_b0_8e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B0 w/ 8 Experts """ + # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2 + model = _gen_efficientnet_condconv( + 'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2, + pretrained=pretrained, **kwargs) + return model + + +def efficientnet_cc_b1_8e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B1 w/ 8 Experts """ + # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2 + model = _gen_efficientnet_condconv( + 'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2, + pretrained=pretrained, **kwargs) + return model + + +def efficientnet_lite0(pretrained=False, **kwargs): + """ EfficientNet-Lite0 """ + model = _gen_efficientnet_lite( + 'efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_lite1(pretrained=False, **kwargs): + """ EfficientNet-Lite1 """ + model = _gen_efficientnet_lite( + 'efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_lite2(pretrained=False, **kwargs): + """ EfficientNet-Lite2 """ + model = _gen_efficientnet_lite( + 'efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_lite3(pretrained=False, **kwargs): + """ EfficientNet-Lite3 """ + model = _gen_efficientnet_lite( + 'efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def efficientnet_lite4(pretrained=False, **kwargs): + """ EfficientNet-Lite4 """ + model = _gen_efficientnet_lite( + 'efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b0(pretrained=False, **kwargs): + """ EfficientNet-B0 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b1(pretrained=False, **kwargs): + """ EfficientNet-B1 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b2(pretrained=False, **kwargs): + """ EfficientNet-B2 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b3(pretrained=False, **kwargs): + """ EfficientNet-B3 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b4(pretrained=False, **kwargs): + """ EfficientNet-B4 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b5(pretrained=False, **kwargs): + """ EfficientNet-B5 RandAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b6(pretrained=False, **kwargs): + """ EfficientNet-B6 AutoAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b7(pretrained=False, **kwargs): + """ EfficientNet-B7 RandAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b8(pretrained=False, **kwargs): + """ EfficientNet-B8 RandAug. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b0_ap(pretrained=False, **kwargs): + """ EfficientNet-B0 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b0_ap', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b1_ap(pretrained=False, **kwargs): + """ EfficientNet-B1 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b1_ap', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b2_ap(pretrained=False, **kwargs): + """ EfficientNet-B2 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b2_ap', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b3_ap(pretrained=False, **kwargs): + """ EfficientNet-B3 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b3_ap', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b4_ap(pretrained=False, **kwargs): + """ EfficientNet-B4 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b4_ap', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b5_ap(pretrained=False, **kwargs): + """ EfficientNet-B5 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b5_ap', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b6_ap(pretrained=False, **kwargs): + """ EfficientNet-B6 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b6_ap', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b7_ap(pretrained=False, **kwargs): + """ EfficientNet-B7 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b7_ap', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b8_ap(pretrained=False, **kwargs): + """ EfficientNet-B8 AdvProp. Tensorflow compatible variant + Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b8_ap', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b0_ns(pretrained=False, **kwargs): + """ EfficientNet-B0 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b0_ns', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b1_ns(pretrained=False, **kwargs): + """ EfficientNet-B1 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b1_ns', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b2_ns(pretrained=False, **kwargs): + """ EfficientNet-B2 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b2_ns', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b3_ns(pretrained=False, **kwargs): + """ EfficientNet-B3 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b3_ns', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b4_ns(pretrained=False, **kwargs): + """ EfficientNet-B4 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b4_ns', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b5_ns(pretrained=False, **kwargs): + """ EfficientNet-B5 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b5_ns', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b6_ns(pretrained=False, **kwargs): + """ EfficientNet-B6 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b6_ns', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_b7_ns(pretrained=False, **kwargs): + """ EfficientNet-B7 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_b7_ns', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_l2_ns_475(pretrained=False, **kwargs): + """ EfficientNet-L2 NoisyStudent @ 475x475. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_l2_ns_475', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_l2_ns(pretrained=False, **kwargs): + """ EfficientNet-L2 NoisyStudent. Tensorflow compatible variant + Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252) + """ + # NOTE for train, drop_rate should be 0.5 + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet( + 'tf_efficientnet_l2_ns', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_es(pretrained=False, **kwargs): + """ EfficientNet-Edge Small. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_edge( + 'tf_efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_em(pretrained=False, **kwargs): + """ EfficientNet-Edge-Medium. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_edge( + 'tf_efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_el(pretrained=False, **kwargs): + """ EfficientNet-Edge-Large. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_edge( + 'tf_efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B0 w/ 4 Experts """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_condconv( + 'tf_efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B0 w/ 8 Experts """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_condconv( + 'tf_efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2, + pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs): + """ EfficientNet-CondConv-B1 w/ 8 Experts """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_condconv( + 'tf_efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2, + pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_lite0(pretrained=False, **kwargs): + """ EfficientNet-Lite0. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_lite( + 'tf_efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_lite1(pretrained=False, **kwargs): + """ EfficientNet-Lite1. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_lite( + 'tf_efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_lite2(pretrained=False, **kwargs): + """ EfficientNet-Lite2. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_lite( + 'tf_efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_lite3(pretrained=False, **kwargs): + """ EfficientNet-Lite3. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_lite( + 'tf_efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) + return model + + +def tf_efficientnet_lite4(pretrained=False, **kwargs): + """ EfficientNet-Lite4. Tensorflow compatible variant """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_efficientnet_lite( + 'tf_efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) + return model + + +def mixnet_s(pretrained=False, **kwargs): + """Creates a MixNet Small model. + """ + # NOTE for train set drop_rate=0.2 + model = _gen_mixnet_s( + 'mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def mixnet_m(pretrained=False, **kwargs): + """Creates a MixNet Medium model. + """ + # NOTE for train set drop_rate=0.25 + model = _gen_mixnet_m( + 'mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def mixnet_l(pretrained=False, **kwargs): + """Creates a MixNet Large model. + """ + # NOTE for train set drop_rate=0.25 + model = _gen_mixnet_m( + 'mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs) + return model + + +def mixnet_xl(pretrained=False, **kwargs): + """Creates a MixNet Extra-Large model. + Not a paper spec, experimental def by RW w/ depth scaling. + """ + # NOTE for train set drop_rate=0.25, drop_connect_rate=0.2 + model = _gen_mixnet_m( + 'mixnet_xl', channel_multiplier=1.6, depth_multiplier=1.2, pretrained=pretrained, **kwargs) + return model + + +def mixnet_xxl(pretrained=False, **kwargs): + """Creates a MixNet Double Extra Large model. + Not a paper spec, experimental def by RW w/ depth scaling. + """ + # NOTE for train set drop_rate=0.3, drop_connect_rate=0.2 + model = _gen_mixnet_m( + 'mixnet_xxl', channel_multiplier=2.4, depth_multiplier=1.3, pretrained=pretrained, **kwargs) + return model + + +def tf_mixnet_s(pretrained=False, **kwargs): + """Creates a MixNet Small model. Tensorflow compatible variant + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mixnet_s( + 'tf_mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mixnet_m(pretrained=False, **kwargs): + """Creates a MixNet Medium model. Tensorflow compatible variant + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mixnet_m( + 'tf_mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mixnet_l(pretrained=False, **kwargs): + """Creates a MixNet Large model. Tensorflow compatible variant + """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mixnet_m( + 'tf_mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs) + return model diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/helpers.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..3f83a07d690c7ad681c777c19b1e7a5bb95da007 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/helpers.py @@ -0,0 +1,71 @@ +""" Checkpoint loading / state_dict helpers +Copyright 2020 Ross Wightman +""" +import torch +import os +from collections import OrderedDict +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + + +def load_checkpoint(model, checkpoint_path): + if checkpoint_path and os.path.isfile(checkpoint_path): + print("=> Loading checkpoint '{}'".format(checkpoint_path)) + checkpoint = torch.load(checkpoint_path) + if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + if k.startswith('module'): + name = k[7:] # remove `module.` + else: + name = k + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + else: + model.load_state_dict(checkpoint) + print("=> Loaded checkpoint '{}'".format(checkpoint_path)) + else: + print("=> Error: No checkpoint found at '{}'".format(checkpoint_path)) + raise FileNotFoundError() + + +def load_pretrained(model, url, filter_fn=None, strict=True): + if not url: + print("=> Warning: Pretrained model URL is empty, using random initialization.") + return + + state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu') + + input_conv = 'conv_stem' + classifier = 'classifier' + in_chans = getattr(model, input_conv).weight.shape[1] + num_classes = getattr(model, classifier).weight.shape[0] + + input_conv_weight = input_conv + '.weight' + pretrained_in_chans = state_dict[input_conv_weight].shape[1] + if in_chans != pretrained_in_chans: + if in_chans == 1: + print('=> Converting pretrained input conv {} from {} to 1 channel'.format( + input_conv_weight, pretrained_in_chans)) + conv1_weight = state_dict[input_conv_weight] + state_dict[input_conv_weight] = conv1_weight.sum(dim=1, keepdim=True) + else: + print('=> Discarding pretrained input conv {} since input channel count != {}'.format( + input_conv_weight, pretrained_in_chans)) + del state_dict[input_conv_weight] + strict = False + + classifier_weight = classifier + '.weight' + pretrained_num_classes = state_dict[classifier_weight].shape[0] + if num_classes != pretrained_num_classes: + print('=> Discarding pretrained classifier since num_classes != {}'.format(pretrained_num_classes)) + del state_dict[classifier_weight] + del state_dict[classifier + '.bias'] + strict = False + + if filter_fn is not None: + state_dict = filter_fn(state_dict) + + model.load_state_dict(state_dict, strict=strict) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/mobilenetv3.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/mobilenetv3.py new file mode 100644 index 0000000000000000000000000000000000000000..b5966c28f7207e98ee50745b1bc8f3663c650f9d --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/mobilenetv3.py @@ -0,0 +1,364 @@ +""" MobileNet-V3 + +A PyTorch impl of MobileNet-V3, compatible with TF weights from official impl. + +Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244 + +Hacked together by / Copyright 2020 Ross Wightman +""" +import torch.nn as nn +import torch.nn.functional as F + +from .activations import get_act_fn, get_act_layer, HardSwish +from .config import layer_config_kwargs +from .conv2d_layers import select_conv2d +from .helpers import load_pretrained +from .efficientnet_builder import * + +__all__ = ['mobilenetv3_rw', 'mobilenetv3_large_075', 'mobilenetv3_large_100', 'mobilenetv3_large_minimal_100', + 'mobilenetv3_small_075', 'mobilenetv3_small_100', 'mobilenetv3_small_minimal_100', + 'tf_mobilenetv3_large_075', 'tf_mobilenetv3_large_100', 'tf_mobilenetv3_large_minimal_100', + 'tf_mobilenetv3_small_075', 'tf_mobilenetv3_small_100', 'tf_mobilenetv3_small_minimal_100'] + +model_urls = { + 'mobilenetv3_rw': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_100-35495452.pth', + 'mobilenetv3_large_075': None, + 'mobilenetv3_large_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth', + 'mobilenetv3_large_minimal_100': None, + 'mobilenetv3_small_075': None, + 'mobilenetv3_small_100': None, + 'mobilenetv3_small_minimal_100': None, + 'tf_mobilenetv3_large_075': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth', + 'tf_mobilenetv3_large_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth', + 'tf_mobilenetv3_large_minimal_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth', + 'tf_mobilenetv3_small_075': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth', + 'tf_mobilenetv3_small_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth', + 'tf_mobilenetv3_small_minimal_100': + 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth', +} + + +class MobileNetV3(nn.Module): + """ MobileNet-V3 + + A this model utilizes the MobileNet-v3 specific 'efficient head', where global pooling is done before the + head convolution without a final batch-norm layer before the classifier. + + Paper: https://arxiv.org/abs/1905.02244 + """ + + def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True, + channel_multiplier=1.0, pad_type='', act_layer=HardSwish, drop_rate=0., drop_connect_rate=0., + se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, weight_init='goog'): + super(MobileNetV3, self).__init__() + self.drop_rate = drop_rate + + stem_size = round_channels(stem_size, channel_multiplier) + self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type) + self.bn1 = nn.BatchNorm2d(stem_size, **norm_kwargs) + self.act1 = act_layer(inplace=True) + in_chs = stem_size + + builder = EfficientNetBuilder( + channel_multiplier, pad_type=pad_type, act_layer=act_layer, se_kwargs=se_kwargs, + norm_layer=norm_layer, norm_kwargs=norm_kwargs, drop_connect_rate=drop_connect_rate) + self.blocks = nn.Sequential(*builder(in_chs, block_args)) + in_chs = builder.in_chs + + self.global_pool = nn.AdaptiveAvgPool2d(1) + self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type, bias=head_bias) + self.act2 = act_layer(inplace=True) + self.classifier = nn.Linear(num_features, num_classes) + + for m in self.modules(): + if weight_init == 'goog': + initialize_weight_goog(m) + else: + initialize_weight_default(m) + + def as_sequential(self): + layers = [self.conv_stem, self.bn1, self.act1] + layers.extend(self.blocks) + layers.extend([ + self.global_pool, self.conv_head, self.act2, + nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier]) + return nn.Sequential(*layers) + + def features(self, x): + x = self.conv_stem(x) + x = self.bn1(x) + x = self.act1(x) + x = self.blocks(x) + x = self.global_pool(x) + x = self.conv_head(x) + x = self.act2(x) + return x + + def forward(self, x): + x = self.features(x) + x = x.flatten(1) + if self.drop_rate > 0.: + x = F.dropout(x, p=self.drop_rate, training=self.training) + return self.classifier(x) + + +def _create_model(model_kwargs, variant, pretrained=False): + as_sequential = model_kwargs.pop('as_sequential', False) + model = MobileNetV3(**model_kwargs) + if pretrained and model_urls[variant]: + load_pretrained(model, model_urls[variant]) + if as_sequential: + model = model.as_sequential() + return model + + +def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MobileNet-V3 model (RW variant). + + Paper: https://arxiv.org/abs/1905.02244 + + This was my first attempt at reproducing the MobileNet-V3 from paper alone. It came close to the + eventual Tensorflow reference impl but has a few differences: + 1. This model has no bias on the head convolution + 2. This model forces no residual (noskip) on the first DWS block, this is different than MnasNet + 3. This model always uses ReLU for the SE activation layer, other models in the family inherit their act layer + from their parent block + 4. This model does not enforce divisible by 8 limitation on the SE reduction channel count + + Overall the changes are fairly minor and result in a very small parameter count difference and no + top-1/5 + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16_nre_noskip'], # relu + # stage 1, 112x112 in + ['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'], # relu + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40_se0.25_nre'], # relu + # stage 3, 28x28 in + ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'], # hard-swish + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c112_se0.25'], # hard-swish + # stage 5, 14x14in + ['ir_r3_k5_s2_e6_c160_se0.25'], # hard-swish + # stage 6, 7x7 in + ['cn_r1_k1_s1_c960'], # hard-swish + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + head_bias=False, # one of my mistakes + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, 'hard_swish'), + se_kwargs=dict(gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwargs): + """Creates a MobileNet-V3 large/small/minimal models. + + Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v3.py + Paper: https://arxiv.org/abs/1905.02244 + + Args: + channel_multiplier: multiplier to number of channels per layer. + """ + if 'small' in variant: + num_features = 1024 + if 'minimal' in variant: + act_layer = 'relu' + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s2_e1_c16'], + # stage 1, 56x56 in + ['ir_r1_k3_s2_e4.5_c24', 'ir_r1_k3_s1_e3.67_c24'], + # stage 2, 28x28 in + ['ir_r1_k3_s2_e4_c40', 'ir_r2_k3_s1_e6_c40'], + # stage 3, 14x14 in + ['ir_r2_k3_s1_e3_c48'], + # stage 4, 14x14in + ['ir_r3_k3_s2_e6_c96'], + # stage 6, 7x7 in + ['cn_r1_k1_s1_c576'], + ] + else: + act_layer = 'hard_swish' + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s2_e1_c16_se0.25_nre'], # relu + # stage 1, 56x56 in + ['ir_r1_k3_s2_e4.5_c24_nre', 'ir_r1_k3_s1_e3.67_c24_nre'], # relu + # stage 2, 28x28 in + ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r2_k5_s1_e6_c40_se0.25'], # hard-swish + # stage 3, 14x14 in + ['ir_r2_k5_s1_e3_c48_se0.25'], # hard-swish + # stage 4, 14x14in + ['ir_r3_k5_s2_e6_c96_se0.25'], # hard-swish + # stage 6, 7x7 in + ['cn_r1_k1_s1_c576'], # hard-swish + ] + else: + num_features = 1280 + if 'minimal' in variant: + act_layer = 'relu' + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16'], + # stage 1, 112x112 in + ['ir_r1_k3_s2_e4_c24', 'ir_r1_k3_s1_e3_c24'], + # stage 2, 56x56 in + ['ir_r3_k3_s2_e3_c40'], + # stage 3, 28x28 in + ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'], + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c112'], + # stage 5, 14x14in + ['ir_r3_k3_s2_e6_c160'], + # stage 6, 7x7 in + ['cn_r1_k1_s1_c960'], + ] + else: + act_layer = 'hard_swish' + arch_def = [ + # stage 0, 112x112 in + ['ds_r1_k3_s1_e1_c16_nre'], # relu + # stage 1, 112x112 in + ['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'], # relu + # stage 2, 56x56 in + ['ir_r3_k5_s2_e3_c40_se0.25_nre'], # relu + # stage 3, 28x28 in + ['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'], # hard-swish + # stage 4, 14x14in + ['ir_r2_k3_s1_e6_c112_se0.25'], # hard-swish + # stage 5, 14x14in + ['ir_r3_k5_s2_e6_c160_se0.25'], # hard-swish + # stage 6, 7x7 in + ['cn_r1_k1_s1_c960'], # hard-swish + ] + with layer_config_kwargs(kwargs): + model_kwargs = dict( + block_args=decode_arch_def(arch_def), + num_features=num_features, + stem_size=16, + channel_multiplier=channel_multiplier, + act_layer=resolve_act_layer(kwargs, act_layer), + se_kwargs=dict( + act_layer=get_act_layer('relu'), gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True, divisor=8), + norm_kwargs=resolve_bn_args(kwargs), + **kwargs, + ) + model = _create_model(model_kwargs, variant, pretrained) + return model + + +def mobilenetv3_rw(pretrained=False, **kwargs): + """ MobileNet-V3 RW + Attn: See note in gen function for this variant. + """ + # NOTE for train set drop_rate=0.2 + if pretrained: + # pretrained model trained with non-default BN epsilon + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + model = _gen_mobilenet_v3_rw('mobilenetv3_rw', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_large_075(pretrained=False, **kwargs): + """ MobileNet V3 Large 0.75""" + # NOTE for train set drop_rate=0.2 + model = _gen_mobilenet_v3('mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_large_100(pretrained=False, **kwargs): + """ MobileNet V3 Large 1.0 """ + # NOTE for train set drop_rate=0.2 + model = _gen_mobilenet_v3('mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_large_minimal_100(pretrained=False, **kwargs): + """ MobileNet V3 Large (Minimalistic) 1.0 """ + # NOTE for train set drop_rate=0.2 + model = _gen_mobilenet_v3('mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_small_075(pretrained=False, **kwargs): + """ MobileNet V3 Small 0.75 """ + model = _gen_mobilenet_v3('mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_small_100(pretrained=False, **kwargs): + """ MobileNet V3 Small 1.0 """ + model = _gen_mobilenet_v3('mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def mobilenetv3_small_minimal_100(pretrained=False, **kwargs): + """ MobileNet V3 Small (Minimalistic) 1.0 """ + model = _gen_mobilenet_v3('mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_large_075(pretrained=False, **kwargs): + """ MobileNet V3 Large 0.75. Tensorflow compat variant. """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_large_100(pretrained=False, **kwargs): + """ MobileNet V3 Large 1.0. Tensorflow compat variant. """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_large_minimal_100(pretrained=False, **kwargs): + """ MobileNet V3 Large Minimalistic 1.0. Tensorflow compat variant. """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_small_075(pretrained=False, **kwargs): + """ MobileNet V3 Small 0.75. Tensorflow compat variant. """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_small_100(pretrained=False, **kwargs): + """ MobileNet V3 Small 1.0. Tensorflow compat variant.""" + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs) + return model + + +def tf_mobilenetv3_small_minimal_100(pretrained=False, **kwargs): + """ MobileNet V3 Small Minimalistic 1.0. Tensorflow compat variant. """ + kwargs['bn_eps'] = BN_EPS_TF_DEFAULT + kwargs['pad_type'] = 'same' + model = _gen_mobilenet_v3('tf_mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs) + return model diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/model_factory.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/model_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..4d46ea8baedaf3d787826eb3bb314b4230514647 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/model_factory.py @@ -0,0 +1,27 @@ +from .config import set_layer_config +from .helpers import load_checkpoint + +from .gen_efficientnet import * +from .mobilenetv3 import * + + +def create_model( + model_name='mnasnet_100', + pretrained=None, + num_classes=1000, + in_chans=3, + checkpoint_path='', + **kwargs): + + model_kwargs = dict(num_classes=num_classes, in_chans=in_chans, pretrained=pretrained, **kwargs) + + if model_name in globals(): + create_fn = globals()[model_name] + model = create_fn(**model_kwargs) + else: + raise RuntimeError('Unknown model (%s)' % model_name) + + if checkpoint_path and not pretrained: + load_checkpoint(model, checkpoint_path) + + return model diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/version.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/version.py new file mode 100644 index 0000000000000000000000000000000000000000..a6221b3de7b1490c5e712e8b5fcc94c3d9d04295 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/version.py @@ -0,0 +1 @@ +__version__ = '1.0.2' diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/hubconf.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/hubconf.py new file mode 100644 index 0000000000000000000000000000000000000000..45b17b99bbeba34596569e6e50f6e8a2ebc45c54 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/hubconf.py @@ -0,0 +1,84 @@ +dependencies = ['torch', 'math'] + +from geffnet import efficientnet_b0 +from geffnet import efficientnet_b1 +from geffnet import efficientnet_b2 +from geffnet import efficientnet_b3 + +from geffnet import efficientnet_es + +from geffnet import efficientnet_lite0 + +from geffnet import mixnet_s +from geffnet import mixnet_m +from geffnet import mixnet_l +from geffnet import mixnet_xl + +from geffnet import mobilenetv2_100 +from geffnet import mobilenetv2_110d +from geffnet import mobilenetv2_120d +from geffnet import mobilenetv2_140 + +from geffnet import mobilenetv3_large_100 +from geffnet import mobilenetv3_rw +from geffnet import mnasnet_a1 +from geffnet import mnasnet_b1 +from geffnet import fbnetc_100 +from geffnet import spnasnet_100 + +from geffnet import tf_efficientnet_b0 +from geffnet import tf_efficientnet_b1 +from geffnet import tf_efficientnet_b2 +from geffnet import tf_efficientnet_b3 +from geffnet import tf_efficientnet_b4 +from geffnet import tf_efficientnet_b5 +from geffnet import tf_efficientnet_b6 +from geffnet import tf_efficientnet_b7 +from geffnet import tf_efficientnet_b8 + +from geffnet import tf_efficientnet_b0_ap +from geffnet import tf_efficientnet_b1_ap +from geffnet import tf_efficientnet_b2_ap +from geffnet import tf_efficientnet_b3_ap +from geffnet import tf_efficientnet_b4_ap +from geffnet import tf_efficientnet_b5_ap +from geffnet import tf_efficientnet_b6_ap +from geffnet import tf_efficientnet_b7_ap +from geffnet import tf_efficientnet_b8_ap + +from geffnet import tf_efficientnet_b0_ns +from geffnet import tf_efficientnet_b1_ns +from geffnet import tf_efficientnet_b2_ns +from geffnet import tf_efficientnet_b3_ns +from geffnet import tf_efficientnet_b4_ns +from geffnet import tf_efficientnet_b5_ns +from geffnet import tf_efficientnet_b6_ns +from geffnet import tf_efficientnet_b7_ns +from geffnet import tf_efficientnet_l2_ns_475 +from geffnet import tf_efficientnet_l2_ns + +from geffnet import tf_efficientnet_es +from geffnet import tf_efficientnet_em +from geffnet import tf_efficientnet_el + +from geffnet import tf_efficientnet_cc_b0_4e +from geffnet import tf_efficientnet_cc_b0_8e +from geffnet import tf_efficientnet_cc_b1_8e + +from geffnet import tf_efficientnet_lite0 +from geffnet import tf_efficientnet_lite1 +from geffnet import tf_efficientnet_lite2 +from geffnet import tf_efficientnet_lite3 +from geffnet import tf_efficientnet_lite4 + +from geffnet import tf_mixnet_s +from geffnet import tf_mixnet_m +from geffnet import tf_mixnet_l + +from geffnet import tf_mobilenetv3_large_075 +from geffnet import tf_mobilenetv3_large_100 +from geffnet import tf_mobilenetv3_large_minimal_100 +from geffnet import tf_mobilenetv3_small_075 +from geffnet import tf_mobilenetv3_small_100 +from geffnet import tf_mobilenetv3_small_minimal_100 + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_export.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_export.py new file mode 100644 index 0000000000000000000000000000000000000000..7a5162ce214830df501bdb81edb66c095122f69d --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_export.py @@ -0,0 +1,120 @@ +""" ONNX export script + +Export PyTorch models as ONNX graphs. + +This export script originally started as an adaptation of code snippets found at +https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html + +The default parameters work with PyTorch 1.6 and ONNX 1.7 and produce an optimal ONNX graph +for hosting in the ONNX runtime (see onnx_validate.py). To export an ONNX model compatible +with caffe2 (see caffe2_benchmark.py and caffe2_validate.py), the --keep-init and --aten-fallback +flags are currently required. + +Older versions of PyTorch/ONNX (tested PyTorch 1.4, ONNX 1.5) do not need extra flags for +caffe2 compatibility, but they produce a model that isn't as fast running on ONNX runtime. + +Most new release of PyTorch and ONNX cause some sort of breakage in the export / usage of ONNX models. +Please do your research and search ONNX and PyTorch issue tracker before asking me. Thanks. + +Copyright 2020 Ross Wightman +""" +import argparse +import torch +import numpy as np + +import onnx +import geffnet + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') +parser.add_argument('output', metavar='ONNX_FILE', + help='output model filename') +parser.add_argument('--model', '-m', metavar='MODEL', default='mobilenetv3_large_100', + help='model architecture (default: mobilenetv3_large_100)') +parser.add_argument('--opset', type=int, default=10, + help='ONNX opset to use (default: 10)') +parser.add_argument('--keep-init', action='store_true', default=False, + help='Keep initializers as input. Needed for Caffe2 compatible export in newer PyTorch/ONNX.') +parser.add_argument('--aten-fallback', action='store_true', default=False, + help='Fallback to ATEN ops. Helps fix AdaptiveAvgPool issue with Caffe2 in newer PyTorch/ONNX.') +parser.add_argument('--dynamic-size', action='store_true', default=False, + help='Export model width dynamic width/height. Not recommended for "tf" models with SAME padding.') +parser.add_argument('-b', '--batch-size', default=1, type=int, + metavar='N', help='mini-batch size (default: 1)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--num-classes', type=int, default=1000, + help='Number classes in dataset') +parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to checkpoint (default: none)') + + +def main(): + args = parser.parse_args() + + args.pretrained = True + if args.checkpoint: + args.pretrained = False + + print("==> Creating PyTorch {} model".format(args.model)) + # NOTE exportable=True flag disables autofn/jit scripted activations and uses Conv2dSameExport layers + # for models using SAME padding + model = geffnet.create_model( + args.model, + num_classes=args.num_classes, + in_chans=3, + pretrained=args.pretrained, + checkpoint_path=args.checkpoint, + exportable=True) + + model.eval() + + example_input = torch.randn((args.batch_size, 3, args.img_size or 224, args.img_size or 224), requires_grad=True) + + # Run model once before export trace, sets padding for models with Conv2dSameExport. This means + # that the padding for models with Conv2dSameExport (most models with tf_ prefix) is fixed for + # the input img_size specified in this script. + # Opset >= 11 should allow for dynamic padding, however I cannot get it to work due to + # issues in the tracing of the dynamic padding or errors attempting to export the model after jit + # scripting it (an approach that should work). Perhaps in a future PyTorch or ONNX versions... + model(example_input) + + print("==> Exporting model to ONNX format at '{}'".format(args.output)) + input_names = ["input0"] + output_names = ["output0"] + dynamic_axes = {'input0': {0: 'batch'}, 'output0': {0: 'batch'}} + if args.dynamic_size: + dynamic_axes['input0'][2] = 'height' + dynamic_axes['input0'][3] = 'width' + if args.aten_fallback: + export_type = torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK + else: + export_type = torch.onnx.OperatorExportTypes.ONNX + + torch_out = torch.onnx._export( + model, example_input, args.output, export_params=True, verbose=True, input_names=input_names, + output_names=output_names, keep_initializers_as_inputs=args.keep_init, dynamic_axes=dynamic_axes, + opset_version=args.opset, operator_export_type=export_type) + + print("==> Loading and checking exported model from '{}'".format(args.output)) + onnx_model = onnx.load(args.output) + onnx.checker.check_model(onnx_model) # assuming throw on error + print("==> Passed") + + if args.keep_init and args.aten_fallback: + import caffe2.python.onnx.backend as onnx_caffe2 + # Caffe2 loading only works properly in newer PyTorch/ONNX combos when + # keep_initializers_as_inputs and aten_fallback are set to True. + print("==> Loading model into Caffe2 backend and comparing forward pass.".format(args.output)) + caffe2_backend = onnx_caffe2.prepare(onnx_model) + B = {onnx_model.graph.input[0].name: x.data.numpy()} + c2_out = caffe2_backend.run(B)[0] + np.testing.assert_almost_equal(torch_out.data.numpy(), c2_out, decimal=5) + print("==> Passed") + + +if __name__ == '__main__': + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_optimize.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_optimize.py new file mode 100644 index 0000000000000000000000000000000000000000..ee20bbf9f0f9473370489512eb96ca0b570b5388 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_optimize.py @@ -0,0 +1,84 @@ +""" ONNX optimization script + +Run ONNX models through the optimizer to prune unneeded nodes, fuse batchnorm layers into conv, etc. + +NOTE: This isn't working consistently in recent PyTorch/ONNX combos (ie PyTorch 1.6 and ONNX 1.7), +it seems time to switch to using the onnxruntime online optimizer (can also be saved for offline). + +Copyright 2020 Ross Wightman +""" +import argparse +import warnings + +import onnx +from onnx import optimizer + + +parser = argparse.ArgumentParser(description="Optimize ONNX model") + +parser.add_argument("model", help="The ONNX model") +parser.add_argument("--output", required=True, help="The optimized model output filename") + + +def traverse_graph(graph, prefix=''): + content = [] + indent = prefix + ' ' + graphs = [] + num_nodes = 0 + for node in graph.node: + pn, gs = onnx.helper.printable_node(node, indent, subgraphs=True) + assert isinstance(gs, list) + content.append(pn) + graphs.extend(gs) + num_nodes += 1 + for g in graphs: + g_count, g_str = traverse_graph(g) + content.append('\n' + g_str) + num_nodes += g_count + return num_nodes, '\n'.join(content) + + +def main(): + args = parser.parse_args() + onnx_model = onnx.load(args.model) + num_original_nodes, original_graph_str = traverse_graph(onnx_model.graph) + + # Optimizer passes to perform + passes = [ + #'eliminate_deadend', + 'eliminate_identity', + 'eliminate_nop_dropout', + 'eliminate_nop_pad', + 'eliminate_nop_transpose', + 'eliminate_unused_initializer', + 'extract_constant_to_initializer', + 'fuse_add_bias_into_conv', + 'fuse_bn_into_conv', + 'fuse_consecutive_concats', + 'fuse_consecutive_reduce_unsqueeze', + 'fuse_consecutive_squeezes', + 'fuse_consecutive_transposes', + #'fuse_matmul_add_bias_into_gemm', + 'fuse_pad_into_conv', + #'fuse_transpose_into_gemm', + #'lift_lexical_references', + ] + + # Apply the optimization on the original serialized model + # WARNING I've had issues with optimizer in recent versions of PyTorch / ONNX causing + # 'duplicate definition of name' errors, see: https://github.com/onnx/onnx/issues/2401 + # It may be better to rely on onnxruntime optimizations, see onnx_validate.py script. + warnings.warn("I've had issues with optimizer in recent versions of PyTorch / ONNX." + "Try onnxruntime optimization if this doesn't work.") + optimized_model = optimizer.optimize(onnx_model, passes) + + num_optimized_nodes, optimzied_graph_str = traverse_graph(optimized_model.graph) + print('==> The model after optimization:\n{}\n'.format(optimzied_graph_str)) + print('==> The optimized model has {} nodes, the original had {}.'.format(num_optimized_nodes, num_original_nodes)) + + # Save the ONNX model + onnx.save(optimized_model, args.output) + + +if __name__ == "__main__": + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_to_caffe.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_to_caffe.py new file mode 100644 index 0000000000000000000000000000000000000000..44399aafababcdf6b84147a0613eb0909730db4b --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_to_caffe.py @@ -0,0 +1,27 @@ +import argparse + +import onnx +from caffe2.python.onnx.backend import Caffe2Backend + + +parser = argparse.ArgumentParser(description="Convert ONNX to Caffe2") + +parser.add_argument("model", help="The ONNX model") +parser.add_argument("--c2-prefix", required=True, + help="The output file prefix for the caffe2 model init and predict file. ") + + +def main(): + args = parser.parse_args() + onnx_model = onnx.load(args.model) + caffe2_init, caffe2_predict = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) + caffe2_init_str = caffe2_init.SerializeToString() + with open(args.c2_prefix + '.init.pb', "wb") as f: + f.write(caffe2_init_str) + caffe2_predict_str = caffe2_predict.SerializeToString() + with open(args.c2_prefix + '.predict.pb', "wb") as f: + f.write(caffe2_predict_str) + + +if __name__ == "__main__": + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_validate.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_validate.py new file mode 100644 index 0000000000000000000000000000000000000000..ab3e4fb141b6ef660dcc5b447fd9f368a2ea19a0 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/onnx_validate.py @@ -0,0 +1,112 @@ +""" ONNX-runtime validation script + +This script was created to verify accuracy and performance of exported ONNX +models running with the onnxruntime. It utilizes the PyTorch dataloader/processing +pipeline for a fair comparison against the originals. + +Copyright 2020 Ross Wightman +""" +import argparse +import numpy as np +import onnxruntime +from data import create_loader, resolve_data_config, Dataset +from utils import AverageMeter +import time + +parser = argparse.ArgumentParser(description='Caffe2 ImageNet Validation') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--onnx-input', default='', type=str, metavar='PATH', + help='path to onnx model/weights file') +parser.add_argument('--onnx-output-opt', default='', type=str, metavar='PATH', + help='path to output optimized onnx graph') +parser.add_argument('--profile', action='store_true', default=False, + help='Enable profiler output.') +parser.add_argument('-j', '--workers', default=2, type=int, metavar='N', + help='number of data loading workers (default: 2)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--crop-pct', type=float, default=None, metavar='PCT', + help='Override default crop pct of 0.875') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('--tf-preprocessing', dest='tf_preprocessing', action='store_true', + help='use tensorflow mnasnet preporcessing') +parser.add_argument('--print-freq', '-p', default=10, type=int, + metavar='N', help='print frequency (default: 10)') + + +def main(): + args = parser.parse_args() + args.gpu_id = 0 + + # Set graph optimization level + sess_options = onnxruntime.SessionOptions() + sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL + if args.profile: + sess_options.enable_profiling = True + if args.onnx_output_opt: + sess_options.optimized_model_filepath = args.onnx_output_opt + + session = onnxruntime.InferenceSession(args.onnx_input, sess_options) + + data_config = resolve_data_config(None, args) + loader = create_loader( + Dataset(args.data, load_bytes=args.tf_preprocessing), + input_size=data_config['input_size'], + batch_size=args.batch_size, + use_prefetcher=False, + interpolation=data_config['interpolation'], + mean=data_config['mean'], + std=data_config['std'], + num_workers=args.workers, + crop_pct=data_config['crop_pct'], + tensorflow_preprocessing=args.tf_preprocessing) + + input_name = session.get_inputs()[0].name + + batch_time = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + end = time.time() + for i, (input, target) in enumerate(loader): + # run the net and return prediction + output = session.run([], {input_name: input.data.numpy()}) + output = output[0] + + # measure accuracy and record loss + prec1, prec5 = accuracy_np(output, target.numpy()) + top1.update(prec1.item(), input.size(0)) + top5.update(prec5.item(), input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Test: [{0}/{1}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, + ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5)) + + print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format( + top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg)) + + +def accuracy_np(output, target): + max_indices = np.argsort(output, axis=1)[:, ::-1] + top5 = 100 * np.equal(max_indices[:, :5], target[:, np.newaxis]).sum(axis=1).mean() + top1 = 100 * np.equal(max_indices[:, 0], target).mean() + return top1, top5 + + +if __name__ == '__main__': + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/requirements.txt b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac3ffc13bae15f9b11f7cbe3705760056ecd7f13 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/requirements.txt @@ -0,0 +1,2 @@ +torch>=1.2.0 +torchvision>=0.4.0 diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/setup.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..023e4c30f98164595964423e3a83eefaf7ffdad6 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/setup.py @@ -0,0 +1,47 @@ +""" Setup +""" +from setuptools import setup, find_packages +from codecs import open +from os import path + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +exec(open('geffnet/version.py').read()) +setup( + name='geffnet', + version=__version__, + description='(Generic) EfficientNets for PyTorch', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/rwightman/gen-efficientnet-pytorch', + author='Ross Wightman', + author_email='hello@rwightman.com', + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + + # Note that this is a string of words separated by whitespace, not a list. + keywords='pytorch pretrained models efficientnet mixnet mobilenetv3 mnasnet', + packages=find_packages(exclude=['data']), + install_requires=['torch >= 1.4', 'torchvision'], + python_requires='>=3.6', +) diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/utils.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d327e8bd8120c5cd09ae6c15c3991ccbe27f6c1f --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/utils.py @@ -0,0 +1,52 @@ +import os + + +class AverageMeter: + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k""" + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +def get_outdir(path, *paths, inc=False): + outdir = os.path.join(path, *paths) + if not os.path.exists(outdir): + os.makedirs(outdir) + elif inc: + count = 1 + outdir_inc = outdir + '-' + str(count) + while os.path.exists(outdir_inc): + count = count + 1 + outdir_inc = outdir + '-' + str(count) + assert count < 100 + outdir = outdir_inc + os.makedirs(outdir) + return outdir + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/validate.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/validate.py new file mode 100644 index 0000000000000000000000000000000000000000..5fd44fbb3165ef81ef81251b6299f6aaa80bf2c2 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/efficientnet_repo/validate.py @@ -0,0 +1,166 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import time +import torch +import torch.nn as nn +import torch.nn.parallel +from contextlib import suppress + +import geffnet +from data import Dataset, create_loader, resolve_data_config +from utils import accuracy, AverageMeter + +has_native_amp = False +try: + if getattr(torch.cuda.amp, 'autocast') is not None: + has_native_amp = True +except AttributeError: + pass + +torch.backends.cudnn.benchmark = True + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--model', '-m', metavar='MODEL', default='spnasnet1_00', + help='model architecture (default: dpn92)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 2)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', help='mini-batch size (default: 256)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--crop-pct', type=float, default=None, metavar='PCT', + help='Override default crop pct of 0.875') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('--num-classes', type=int, default=1000, + help='Number classes in dataset') +parser.add_argument('--print-freq', '-p', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +parser.add_argument('--num-gpu', type=int, default=1, + help='Number of GPUS to use') +parser.add_argument('--tf-preprocessing', dest='tf_preprocessing', action='store_true', + help='use tensorflow mnasnet preporcessing') +parser.add_argument('--no-cuda', dest='no_cuda', action='store_true', + help='') +parser.add_argument('--channels-last', action='store_true', default=False, + help='Use channels_last memory layout') +parser.add_argument('--amp', action='store_true', default=False, + help='Use native Torch AMP mixed precision.') + + +def main(): + args = parser.parse_args() + + if not args.checkpoint and not args.pretrained: + args.pretrained = True + + amp_autocast = suppress # do nothing + if args.amp: + if not has_native_amp: + print("Native Torch AMP is not available (requires torch >= 1.6), using FP32.") + else: + amp_autocast = torch.cuda.amp.autocast + + # create model + model = geffnet.create_model( + args.model, + num_classes=args.num_classes, + in_chans=3, + pretrained=args.pretrained, + checkpoint_path=args.checkpoint, + scriptable=args.torchscript) + + if args.channels_last: + model = model.to(memory_format=torch.channels_last) + + if args.torchscript: + torch.jit.optimized_execution(True) + model = torch.jit.script(model) + + print('Model %s created, param count: %d' % + (args.model, sum([m.numel() for m in model.parameters()]))) + + data_config = resolve_data_config(model, args) + + criterion = nn.CrossEntropyLoss() + + if not args.no_cuda: + if args.num_gpu > 1: + model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() + else: + model = model.cuda() + criterion = criterion.cuda() + + loader = create_loader( + Dataset(args.data, load_bytes=args.tf_preprocessing), + input_size=data_config['input_size'], + batch_size=args.batch_size, + use_prefetcher=not args.no_cuda, + interpolation=data_config['interpolation'], + mean=data_config['mean'], + std=data_config['std'], + num_workers=args.workers, + crop_pct=data_config['crop_pct'], + tensorflow_preprocessing=args.tf_preprocessing) + + batch_time = AverageMeter() + losses = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + + model.eval() + end = time.time() + with torch.no_grad(): + for i, (input, target) in enumerate(loader): + if not args.no_cuda: + target = target.cuda() + input = input.cuda() + if args.channels_last: + input = input.contiguous(memory_format=torch.channels_last) + + # compute output + with amp_autocast(): + output = model(input) + loss = criterion(output, target) + + # measure accuracy and record loss + prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) + losses.update(loss.item(), input.size(0)) + top1.update(prec1.item(), input.size(0)) + top5.update(prec5.item(), input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Test: [{0}/{1}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + i, len(loader), batch_time=batch_time, + rate_avg=input.size(0) / batch_time.avg, + loss=losses, top1=top1, top5=top5)) + + print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format( + top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg)) + + +if __name__ == '__main__': + main() diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/encoder.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..7f7149ca3c0cf2b6e019105af7e645cfbb3eda11 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/encoder.py @@ -0,0 +1,34 @@ +import os +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Encoder(nn.Module): + def __init__(self): + super(Encoder, self).__init__() + + basemodel_name = 'tf_efficientnet_b5_ap' + print('Loading base model ()...'.format(basemodel_name), end='') + repo_path = os.path.join(os.path.dirname(__file__), 'efficientnet_repo') + basemodel = torch.hub.load(repo_path, basemodel_name, pretrained=False, source='local') + print('Done.') + + # Remove last layer + print('Removing last two layers (global_pool & classifier).') + basemodel.global_pool = nn.Identity() + basemodel.classifier = nn.Identity() + + self.original_model = basemodel + + def forward(self, x): + features = [x] + for k, v in self.original_model._modules.items(): + if (k == 'blocks'): + for ki, vi in v._modules.items(): + features.append(vi(features[-1])) + else: + features.append(v(features[-1])) + return features + + diff --git a/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/submodules.py b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/submodules.py new file mode 100644 index 0000000000000000000000000000000000000000..409733351bd6ab5d191c800aff1bc05bfa4cb6f8 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/annotator/normalbae/models/submodules/submodules.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +######################################################################################################################## + + +# Upsample + BatchNorm +class UpSampleBN(nn.Module): + def __init__(self, skip_input, output_features): + super(UpSampleBN, self).__init__() + + self._net = nn.Sequential(nn.Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(output_features), + nn.LeakyReLU(), + nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(output_features), + nn.LeakyReLU()) + + def forward(self, x, concat_with): + up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True) + f = torch.cat([up_x, concat_with], dim=1) + return self._net(f) + + +# Upsample + GroupNorm + Weight Standardization +class UpSampleGN(nn.Module): + def __init__(self, skip_input, output_features): + super(UpSampleGN, self).__init__() + + self._net = nn.Sequential(Conv2d(skip_input, output_features, kernel_size=3, stride=1, padding=1), + nn.GroupNorm(8, output_features), + nn.LeakyReLU(), + Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1), + nn.GroupNorm(8, output_features), + nn.LeakyReLU()) + + def forward(self, x, concat_with): + up_x = F.interpolate(x, size=[concat_with.size(2), concat_with.size(3)], mode='bilinear', align_corners=True) + f = torch.cat([up_x, concat_with], dim=1) + return self._net(f) + + +# Conv2d with weight standardization +class Conv2d(nn.Conv2d): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, bias) + + def forward(self, x): + weight = self.weight + weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, + keepdim=True).mean(dim=3, keepdim=True) + weight = weight - weight_mean + std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5 + weight = weight / std.expand_as(weight) + return F.conv2d(x, weight, self.bias, self.stride, + self.padding, self.dilation, self.groups) + + +# normalize +def norm_normalize(norm_out): + min_kappa = 0.01 + norm_x, norm_y, norm_z, kappa = torch.split(norm_out, 1, dim=1) + norm = torch.sqrt(norm_x ** 2.0 + norm_y ** 2.0 + norm_z ** 2.0) + 1e-10 + kappa = F.elu(kappa) + 1.0 + min_kappa + final_out = torch.cat([norm_x / norm, norm_y / norm, norm_z / norm, kappa], dim=1) + return final_out + + +# uncertainty-guided sampling (only used during training) +@torch.no_grad() +def sample_points(init_normal, gt_norm_mask, sampling_ratio, beta): + device = init_normal.device + B, _, H, W = init_normal.shape + N = int(sampling_ratio * H * W) + beta = beta + + # uncertainty map + uncertainty_map = -1 * init_normal[:, 3, :, :] # B, H, W + + # gt_invalid_mask (B, H, W) + if gt_norm_mask is not None: + gt_invalid_mask = F.interpolate(gt_norm_mask.float(), size=[H, W], mode='nearest') + gt_invalid_mask = gt_invalid_mask[:, 0, :, :] < 0.5 + uncertainty_map[gt_invalid_mask] = -1e4 + + # (B, H*W) + _, idx = uncertainty_map.view(B, -1).sort(1, descending=True) + + # importance sampling + if int(beta * N) > 0: + importance = idx[:, :int(beta * N)] # B, beta*N + + # remaining + remaining = idx[:, int(beta * N):] # B, H*W - beta*N + + # coverage + num_coverage = N - int(beta * N) + + if num_coverage <= 0: + samples = importance + else: + coverage_list = [] + for i in range(B): + idx_c = torch.randperm(remaining.size()[1]) # shuffles "H*W - beta*N" + coverage_list.append(remaining[i, :][idx_c[:num_coverage]].view(1, -1)) # 1, N-beta*N + coverage = torch.cat(coverage_list, dim=0) # B, N-beta*N + samples = torch.cat((importance, coverage), dim=1) # B, N + + else: + # remaining + remaining = idx[:, :] # B, H*W + + # coverage + num_coverage = N + + coverage_list = [] + for i in range(B): + idx_c = torch.randperm(remaining.size()[1]) # shuffles "H*W - beta*N" + coverage_list.append(remaining[i, :][idx_c[:num_coverage]].view(1, -1)) # 1, N-beta*N + coverage = torch.cat(coverage_list, dim=0) # B, N-beta*N + samples = coverage + + # point coordinates + rows_int = samples // W # 0 for first row, H-1 for last row + rows_float = rows_int / float(H-1) # 0 to 1.0 + rows_float = (rows_float * 2.0) - 1.0 # -1.0 to 1.0 + + cols_int = samples % W # 0 for first column, W-1 for last column + cols_float = cols_int / float(W-1) # 0 to 1.0 + cols_float = (cols_float * 2.0) - 1.0 # -1.0 to 1.0 + + point_coords = torch.zeros(B, 1, N, 2) + point_coords[:, 0, :, 0] = cols_float # x coord + point_coords[:, 0, :, 1] = rows_float # y coord + point_coords = point_coords.to(device) + return point_coords, rows_int, cols_int \ No newline at end of file diff --git a/extensions-builtin/forge_preprocessor_normalbae/scripts/preprocessor_normalbae.py b/extensions-builtin/forge_preprocessor_normalbae/scripts/preprocessor_normalbae.py new file mode 100644 index 0000000000000000000000000000000000000000..1913398363c2e8cac9809a7846e607e308b6954a --- /dev/null +++ b/extensions-builtin/forge_preprocessor_normalbae/scripts/preprocessor_normalbae.py @@ -0,0 +1,77 @@ +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import preprocessor_dir, add_supported_preprocessor +from modules_forge.forge_util import resize_image_with_pad +from modules.modelloader import load_file_from_url + +import types +import torch +import numpy as np + +from einops import rearrange +from annotator.normalbae.models.NNET import NNET +from annotator.normalbae import load_checkpoint +from torchvision import transforms + + +class PreprocessorNormalBae(Preprocessor): + def __init__(self): + super().__init__() + self.name = 'normalbae' + self.tags = ['NormalMap'] + self.model_filename_filters = ['normal'] + self.slider_resolution = PreprocessorParameter( + label='Resolution', minimum=128, maximum=2048, value=512, step=8, visible=True) + self.slider_1 = PreprocessorParameter(visible=False) + self.slider_2 = PreprocessorParameter(visible=False) + self.slider_3 = PreprocessorParameter(visible=False) + self.show_control_mode = True + self.do_not_need_model = False + self.sorting_priority = 100 # higher goes to top in the list + + def load_model(self): + if self.model_patcher is not None: + return + + model_path = load_file_from_url( + "https://huggingface.co/lllyasviel/Annotators/resolve/main/scannet.pt", + model_dir=preprocessor_dir) + + args = types.SimpleNamespace() + args.mode = 'client' + args.architecture = 'BN' + args.pretrained = 'scannet' + args.sampling_ratio = 0.4 + args.importance_ratio = 0.7 + model = NNET(args) + model = load_checkpoint(model_path, model) + self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + self.model_patcher = self.setup_model_patcher(model) + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + input_image, remove_pad = resize_image_with_pad(input_image, resolution) + + self.load_model() + + self.move_all_model_patchers_to_gpu() + + assert input_image.ndim == 3 + image_normal = input_image + + with torch.no_grad(): + image_normal = self.send_tensor_to_model_device(torch.from_numpy(image_normal)) + image_normal = image_normal / 255.0 + image_normal = rearrange(image_normal, 'h w c -> 1 c h w') + image_normal = self.norm(image_normal) + + normal = self.model_patcher.model(image_normal) + normal = normal[0][-1][:, :3] + normal = ((normal + 1) * 0.5).clip(0, 1) + + normal = rearrange(normal[0], 'c h w -> h w c').cpu().numpy() + normal_image = (normal * 255.0).clip(0, 255).astype(np.uint8) + + return remove_pad(normal_image) + + +add_supported_preprocessor(PreprocessorNormalBae()) diff --git a/extensions-builtin/forge_preprocessor_recolor/scripts/preprocessor_recolor.py b/extensions-builtin/forge_preprocessor_recolor/scripts/preprocessor_recolor.py new file mode 100644 index 0000000000000000000000000000000000000000..b377091ed1efb8d0128cb4aaf47168fddb8f517d --- /dev/null +++ b/extensions-builtin/forge_preprocessor_recolor/scripts/preprocessor_recolor.py @@ -0,0 +1,67 @@ +import cv2 +import numpy as np + +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor + + +class PreprocessorRecolor(Preprocessor): + def __init__(self, name, use_intensity): + super().__init__() + self.name = name + self.use_intensity = False + self.tags = ['Recolor'] + self.model_filename_filters = ['color', 'recolor', 'grey', 'gray'] + self.slider_resolution = PreprocessorParameter(visible=False) + self.slider_1 = PreprocessorParameter( + visible=True, + label="Gamma Correction", + value=1.0, + minimum=0.1, + maximum=2.0, + step=0.001 + ) + self.current_cond = None + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + gamma = slider_1 + + if self.use_intensity: + result = cv2.cvtColor(input_image, cv2.COLOR_BGR2HSV) + result = result[:, :, 2].astype(np.float32) / 255.0 + else: + result = cv2.cvtColor(input_image, cv2.COLOR_BGR2LAB) + result = result[:, :, 0].astype(np.float32) / 255.0 + + result = result ** gamma + result = (result * 255.0).clip(0, 255).astype(np.uint8) + result = cv2.cvtColor(result, cv2.COLOR_GRAY2RGB) + return result + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + self.current_cond = cond + return cond, mask + + def process_after_every_sampling(self, process, params, *args, **kwargs): + a1111_batch_result = args[0] + new_results = [] + + for img in a1111_batch_result.images: + new_mean = self.current_cond[0].mean(dim=0, keepdim=True) + img = img - img.mean(dim=0, keepdim=True) + new_mean + img = img.clip(0, 1) + new_results.append(img) + + a1111_batch_result.images = new_results + return + + +add_supported_preprocessor(PreprocessorRecolor( + name="recolor_intensity", + use_intensity=True +)) + +add_supported_preprocessor(PreprocessorRecolor( + name="recolor_luminance", + use_intensity=False +)) diff --git a/extensions-builtin/forge_preprocessor_reference/scripts/forge_reference.py b/extensions-builtin/forge_preprocessor_reference/scripts/forge_reference.py new file mode 100644 index 0000000000000000000000000000000000000000..69550665bcd7f8548202baf6a9a8d2402038a200 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_reference/scripts/forge_reference.py @@ -0,0 +1,213 @@ +import torch + +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor +from ldm_patched.modules.samplers import sampling_function +import ldm_patched.ldm.modules.attention as attention + + +def sdp(q, k, v, transformer_options): + if q.shape[0] == 0: + return q + + return attention.optimized_attention(q, k, v, heads=transformer_options["n_heads"], mask=None) + + +def adain(x, target_std, target_mean): + if x.shape[0] == 0: + return x + + std, mean = torch.std_mean(x, dim=(2, 3), keepdim=True, correction=0) + return (((x - mean) / std) * target_std) + target_mean + + +def zero_cat(a, b, dim): + if a.shape[0] == 0: + return b + if b.shape[0] == 0: + return a + return torch.cat([a, b], dim=dim) + + +class PreprocessorReference(Preprocessor): + def __init__(self, name, use_attn=True, use_adain=True, priority=0): + super().__init__() + self.name = name + self.use_attn = use_attn + self.use_adain = use_adain + self.sorting_priority = priority + self.tags = ['Reference'] + self.slider_resolution = PreprocessorParameter(visible=False) + self.slider_1 = PreprocessorParameter(label='Style Fidelity', value=0.5, minimum=0.0, maximum=1.0, step=0.01, visible=True) + self.show_control_mode = False + self.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab = False + self.do_not_need_model = True + + self.is_recording_style = False + self.recorded_attn1 = {} + self.recorded_h = {} + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + unit = kwargs['unit'] + weight = float(unit.weight) + style_fidelity = float(unit.threshold_a) + start_percent = float(unit.guidance_start) + end_percent = float(unit.guidance_end) + + if process.sd_model.is_sdxl: + style_fidelity = style_fidelity ** 3.0 # sdxl is very sensitive to reference so we lower the weights + + vae = process.sd_model.forge_objects.vae + # This is a powerful VAE with integrated memory management, bf16, and tiled fallback. + + latent_image = vae.encode(cond.movedim(1, -1)) + latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image) + + gen_seed = process.seeds[0] + 1 + gen_cpu = torch.Generator().manual_seed(gen_seed) + + unet = process.sd_model.forge_objects.unet.clone() + sigma_max = unet.model.model_sampling.percent_to_sigma(start_percent) + sigma_min = unet.model.model_sampling.percent_to_sigma(end_percent) + + self.recorded_attn1 = {} + self.recorded_h = {} + + def conditioning_modifier(model, x, timestep, uncond, cond, cond_scale, model_options, seed): + sigma = timestep[0].item() + if not (sigma_min <= sigma <= sigma_max): + return model, x, timestep, uncond, cond, cond_scale, model_options, seed + + self.is_recording_style = True + + xt = latent_image.to(x) + torch.randn(x.size(), dtype=x.dtype, generator=gen_cpu).to(x) * sigma + sampling_function(model, xt, timestep, uncond, cond, 1, model_options, seed) + + self.is_recording_style = False + + return model, x, timestep, uncond, cond, cond_scale, model_options, seed + + def block_proc(h, flag, transformer_options): + if not self.use_adain: + return h + + if flag != 'after': + return h + + location = transformer_options['block'] + + sigma = transformer_options["sigmas"][0].item() + if not (sigma_min <= sigma <= sigma_max): + return h + + channel = int(h.shape[1]) + minimal_channel = 1500 - 1000 * weight + + if channel < minimal_channel: + return h + + if self.is_recording_style: + self.recorded_h[location] = torch.std_mean(h, dim=(2, 3), keepdim=True, correction=0) + return h + else: + cond_indices = transformer_options['cond_indices'] + uncond_indices = transformer_options['uncond_indices'] + cond_or_uncond = transformer_options['cond_or_uncond'] + r_std, r_mean = self.recorded_h[location] + + h_c = h[cond_indices] + h_uc = h[uncond_indices] + + o_c = adain(h_c, r_std, r_mean) + o_uc_strong = h_uc + o_uc_weak = adain(h_uc, r_std, r_mean) + o_uc = o_uc_weak + (o_uc_strong - o_uc_weak) * style_fidelity + + recon = [] + for cx in cond_or_uncond: + if cx == 0: + recon.append(o_c) + else: + recon.append(o_uc) + + o = torch.cat(recon, dim=0) + return o + + def attn1_proc(q, k, v, transformer_options): + if not self.use_attn: + return sdp(q, k, v, transformer_options) + + sigma = transformer_options["sigmas"][0].item() + if not (sigma_min <= sigma <= sigma_max): + return sdp(q, k, v, transformer_options) + + location = (transformer_options['block'][0], transformer_options['block'][1], + transformer_options['block_index']) + + channel = int(q.shape[2]) + minimal_channel = 1500 - 1280 * weight + + if channel < minimal_channel: + return sdp(q, k, v, transformer_options) + + if self.is_recording_style: + self.recorded_attn1[location] = (k, v) + return sdp(q, k, v, transformer_options) + else: + cond_indices = transformer_options['cond_indices'] + uncond_indices = transformer_options['uncond_indices'] + cond_or_uncond = transformer_options['cond_or_uncond'] + + q_c = q[cond_indices] + q_uc = q[uncond_indices] + + k_c = k[cond_indices] + k_uc = k[uncond_indices] + + v_c = v[cond_indices] + v_uc = v[uncond_indices] + + k_r, v_r = self.recorded_attn1[location] + + o_c = sdp(q_c, zero_cat(k_c, k_r, dim=1), zero_cat(v_c, v_r, dim=1), transformer_options) + o_uc_strong = sdp(q_uc, k_uc, v_uc, transformer_options) + o_uc_weak = sdp(q_uc, zero_cat(k_uc, k_r, dim=1), zero_cat(v_uc, v_r, dim=1), transformer_options) + o_uc = o_uc_weak + (o_uc_strong - o_uc_weak) * style_fidelity + + recon = [] + for cx in cond_or_uncond: + if cx == 0: + recon.append(o_c) + else: + recon.append(o_uc) + + o = torch.cat(recon, dim=0) + return o + + unet.add_block_modifier(block_proc) + unet.add_conditioning_modifier(conditioning_modifier) + unet.set_model_replace_all(attn1_proc, 'attn1') + + process.sd_model.forge_objects.unet = unet + + return cond, mask + + +add_supported_preprocessor(PreprocessorReference( + name='reference_only', + use_attn=True, + use_adain=False, + priority=100 +)) + +add_supported_preprocessor(PreprocessorReference( + name='reference_adain', + use_attn=False, + use_adain=True +)) + +add_supported_preprocessor(PreprocessorReference( + name='reference_adain+attn', + use_attn=True, + use_adain=True +)) diff --git a/extensions-builtin/forge_preprocessor_revision/scripts/preprocessor_revision.py b/extensions-builtin/forge_preprocessor_revision/scripts/preprocessor_revision.py new file mode 100644 index 0000000000000000000000000000000000000000..a2268f9d43e97b9e258aee49d45e6c71abd6d9eb --- /dev/null +++ b/extensions-builtin/forge_preprocessor_revision/scripts/preprocessor_revision.py @@ -0,0 +1,103 @@ +import torch +import copy + +from modules_forge.supported_preprocessor import PreprocessorClipVision, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor + + +def revision_conditioning_modifier(model, x, timestep, uncond, cond, cond_scale, model_options, seed): + revision_conditions = model_options['revision_conditions'] + noise_augmentor = model.noise_augmentor + noise_augment_merge = 0.0 + ignore_prompt = False + + adm_inputs = [] + weights = [] + noise_aug = [] + for revision_condition in revision_conditions: + adm_cond = revision_condition['cond'].image_embeds + weight = revision_condition["weight"] + noise_augment = revision_condition["noise_aug"] + noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment) + c_adm, noise_level_emb = noise_augmentor(adm_cond.to(x.device), + noise_level=torch.tensor([noise_level], device=x.device), seed=seed) + adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight + weights.append(weight) + noise_aug.append(noise_augment) + adm_inputs.append(adm_out) + if revision_condition["ignore_prompt"]: + ignore_prompt = True + + if len(noise_aug) > 1: + adm_out = torch.stack(adm_inputs).sum(0) + noise_augment = noise_augment_merge + noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment) + c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], + noise_level=torch.tensor([noise_level], device=x.device)) + adm_out = torch.cat((c_adm, noise_level_emb), 1) + + new_y = adm_out[:, :1280] + cond = copy.deepcopy(cond) + uncond = copy.deepcopy(uncond) + + for c in cond: + c['model_conds']['y'].cond[:, :1280] = new_y.clone() + + for c in uncond: + c['model_conds']['y'].cond[:, :1280] = torch.zeros_like(new_y) + + if ignore_prompt: + for c in cond + uncond: + c['model_conds']['c_crossattn'].cond = torch.zeros_like(c['model_conds']['c_crossattn'].cond) + + return model, x, timestep, uncond, cond, cond_scale, model_options, seed + + +class PreprocessorClipVisionForRevision(PreprocessorClipVision): + def __init__(self, name, url, filename, ignore_prompt=False): + super().__init__(name, url, filename) + self.tags = ['Revision'] + self.model_filename_filters = ['Revision'] + self.do_not_need_model = True + self.ignore_prompt = ignore_prompt + self.slider_1 = PreprocessorParameter( + label="Noise Augmentation", minimum=0.0, maximum=1.0, value=0.0, visible=True) + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + unit = kwargs['unit'] + + weight = float(unit.weight) + noise_aug = float(unit.threshold_a) + + unet = process.sd_model.forge_objects.unet.clone() + + if 'revision_conditions' not in unet.model_options: + unet.model_options['revision_conditions'] = [] + + unet.model_options['revision_conditions'].append(dict( + cond=cond, + weight=weight, + noise_aug=noise_aug, + ignore_prompt=self.ignore_prompt + )) + + unet.add_conditioning_modifier(revision_conditioning_modifier, ensure_uniqueness=True) + + process.sd_model.forge_objects.unet = unet + + return cond, mask + + +add_supported_preprocessor(PreprocessorClipVisionForRevision( + name='CLIP-G (Revision)', + url='https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors', + filename='CLIP-ViT-bigG.safetensors', + ignore_prompt=False +)) + +add_supported_preprocessor(PreprocessorClipVisionForRevision( + name='CLIP-G (Revision ignore prompt)', + url='https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors', + filename='CLIP-ViT-bigG.safetensors', + ignore_prompt=True +)) diff --git a/extensions-builtin/forge_preprocessor_tile/scripts/preprocessor_tile.py b/extensions-builtin/forge_preprocessor_tile/scripts/preprocessor_tile.py new file mode 100644 index 0000000000000000000000000000000000000000..083a3e8ff9803f242d4c63d2de0e8da1139faad4 --- /dev/null +++ b/extensions-builtin/forge_preprocessor_tile/scripts/preprocessor_tile.py @@ -0,0 +1,102 @@ +import torch + +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor + + +def blur(x, k): + y = torch.nn.functional.pad(x, (k, k, k, k), mode='replicate') + y = torch.nn.functional.avg_pool2d(y, (k * 2 + 1, k * 2 + 1), stride=(1, 1)) + return y + + +class PreprocessorTile(Preprocessor): + def __init__(self): + super().__init__() + self.name = 'tile_resample' + self.tags = ['Tile'] + self.model_filename_filters = ['tile'] + self.slider_resolution = PreprocessorParameter(visible=False) + self.latent = None + + def register_latent(self, process, cond): + vae = process.sd_model.forge_objects.vae + # This is a powerful VAE with integrated memory management, bf16, and tiled fallback. + + latent_image = vae.encode(cond.movedim(1, -1)) + latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image) + self.latent = latent_image + return self.latent + + +class PreprocessorTileColorFix(PreprocessorTile): + def __init__(self): + super().__init__() + self.name = 'tile_colorfix' + self.slider_1 = PreprocessorParameter(label='Variation', value=8.0, minimum=3.0, maximum=32.0, step=1.0, visible=True) + self.variation = 8 + self.sharpness = None + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + self.variation = int(kwargs['unit'].threshold_a) + + latent = self.register_latent(process, cond) + + unet = process.sd_model.forge_objects.unet.clone() + sigma_data = process.sd_model.forge_objects.unet.model.model_sampling.sigma_data + + if getattr(process, 'is_hr_pass', False): + k = int(self.variation * 2) + else: + k = int(self.variation) + + def block_proc(h, flag, transformer_options): + location, block_id = transformer_options['block'] + cond_mark = transformer_options['cond_mark'][:, None, None, None] # cond is 0 + + if location == 'input' and block_id == 0 and flag == 'before': + sigma = transformer_options['sigmas'].to(h) + self.x_input = h[:, :4] # Inpaint fix + self.x_input_sigma_space = self.x_input * (sigma ** 2 + sigma_data ** 2) ** 0.5 + + if location == 'last' and block_id == 0 and flag == 'after': + sigma = transformer_options['sigmas'].to(h) + eps_estimation = h[:, :4] + denoised = self.x_input_sigma_space - eps_estimation * sigma + + denoised = denoised - blur(denoised, k) + blur(latent.to(denoised), k) + + if isinstance(self.sharpness, float): + detail_weight = float(self.sharpness) * 0.01 + neg = detail_weight * blur(denoised, k) + (1 - detail_weight) * denoised + denoised = (1 - cond_mark) * denoised + cond_mark * neg + + eps_modified = (self.x_input_sigma_space - denoised) / sigma + + return eps_modified + + return h + + unet.add_block_modifier(block_proc) + + process.sd_model.forge_objects.unet = unet + + return cond, mask + + +class PreprocessorTileColorFixSharp(PreprocessorTileColorFix): + def __init__(self): + super().__init__() + self.name = 'tile_colorfix+sharp' + self.slider_2 = PreprocessorParameter(label='Sharpness', value=1.0, minimum=0.0, maximum=2.0, step=0.01, visible=True) + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + self.sharpness = float(kwargs['unit'].threshold_b) + return super().process_before_every_sampling(process, cond, mask, *args, **kwargs) + + +add_supported_preprocessor(PreprocessorTile()) + +add_supported_preprocessor(PreprocessorTileColorFix()) + +add_supported_preprocessor(PreprocessorTileColorFixSharp()) diff --git a/extensions-builtin/mobile/javascript/mobile.js b/extensions-builtin/mobile/javascript/mobile.js new file mode 100644 index 0000000000000000000000000000000000000000..bff1acedff37d3683a01eb2bd6be76a8d33c0296 --- /dev/null +++ b/extensions-builtin/mobile/javascript/mobile.js @@ -0,0 +1,34 @@ +var isSetupForMobile = false; + +function isMobile() { + for (var tab of ["txt2img", "img2img"]) { + var imageTab = gradioApp().getElementById(tab + '_results'); + if (imageTab && imageTab.offsetParent && imageTab.offsetLeft == 0) { + return true; + } + } + + return false; +} + +function reportWindowSize() { + if (gradioApp().querySelector('.toprow-compact-tools')) return; // not applicable for compact prompt layout + + var currentlyMobile = isMobile(); + if (currentlyMobile == isSetupForMobile) return; + isSetupForMobile = currentlyMobile; + + for (var tab of ["txt2img", "img2img"]) { + var button = gradioApp().getElementById(tab + '_generate_box'); + var target = gradioApp().getElementById(currentlyMobile ? tab + '_results' : tab + '_actions_column'); + target.insertBefore(button, target.firstElementChild); + + gradioApp().getElementById(tab + '_results').classList.toggle('mobile', currentlyMobile); + } +} + +window.addEventListener("resize", reportWindowSize); + +onUiLoaded(function() { + reportWindowSize(); +}); diff --git a/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js b/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js new file mode 100644 index 0000000000000000000000000000000000000000..114cf94ccbf69b473757f2fc46443a39723a9269 --- /dev/null +++ b/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js @@ -0,0 +1,42 @@ +// Stable Diffusion WebUI - Bracket checker +// By Hingashi no Florin/Bwin4L & @akx +// Counts open and closed brackets (round, square, curly) in the prompt and negative prompt text boxes in the txt2img and img2img tabs. +// If there's a mismatch, the keyword counter turns red and if you hover on it, a tooltip tells you what's wrong. + +function checkBrackets(textArea, counterElt) { + var counts = {}; + (textArea.value.match(/[(){}[\]]/g) || []).forEach(bracket => { + counts[bracket] = (counts[bracket] || 0) + 1; + }); + var errors = []; + + function checkPair(open, close, kind) { + if (counts[open] !== counts[close]) { + errors.push( + `${open}...${close} - Detected ${counts[open] || 0} opening and ${counts[close] || 0} closing ${kind}.` + ); + } + } + + checkPair('(', ')', 'round brackets'); + checkPair('[', ']', 'square brackets'); + checkPair('{', '}', 'curly brackets'); + counterElt.title = errors.join('\n'); + counterElt.classList.toggle('error', errors.length !== 0); +} + +function setupBracketChecking(id_prompt, id_counter) { + var textarea = gradioApp().querySelector("#" + id_prompt + " > label > textarea"); + var counter = gradioApp().getElementById(id_counter); + + if (textarea && counter) { + textarea.addEventListener("input", () => checkBrackets(textarea, counter)); + } +} + +onUiLoaded(function() { + setupBracketChecking('txt2img_prompt', 'txt2img_token_counter'); + setupBracketChecking('txt2img_neg_prompt', 'txt2img_negative_token_counter'); + setupBracketChecking('img2img_prompt', 'img2img_token_counter'); + setupBracketChecking('img2img_neg_prompt', 'img2img_negative_token_counter'); +}); diff --git a/extensions-builtin/sd_forge_controlllite/lib_controllllite/lib_controllllite.py b/extensions-builtin/sd_forge_controlllite/lib_controllllite/lib_controllllite.py new file mode 100644 index 0000000000000000000000000000000000000000..73c459215f0d5814c0cb2286b4dba3e061e7844a --- /dev/null +++ b/extensions-builtin/sd_forge_controlllite/lib_controllllite/lib_controllllite.py @@ -0,0 +1,271 @@ +import math +import torch +import os + +import ldm_patched.modules + + +def extra_options_to_module_prefix(extra_options): + # extra_options = {'transformer_index': 2, 'block_index': 8, 'original_shape': [2, 4, 128, 128], 'block': ('input', 7), 'n_heads': 20, 'dim_head': 64} + + # block is: [('input', 4), ('input', 5), ('input', 7), ('input', 8), ('middle', 0), + # ('output', 0), ('output', 1), ('output', 2), ('output', 3), ('output', 4), ('output', 5)] + # transformer_index is: [0, 1, 2, 3, 4, 5, 6, 7, 8], for each block + # block_index is: 0-1 or 0-9, depends on the block + # input 7 and 8, middle has 10 blocks + + # make module name from extra_options + block = extra_options["block"] + block_index = extra_options["block_index"] + if block[0] == "input": + module_pfx = f"lllite_unet_input_blocks_{block[1]}_1_transformer_blocks_{block_index}" + elif block[0] == "middle": + module_pfx = f"lllite_unet_middle_block_1_transformer_blocks_{block_index}" + elif block[0] == "output": + module_pfx = f"lllite_unet_output_blocks_{block[1]}_1_transformer_blocks_{block_index}" + else: + raise Exception("invalid block name") + return module_pfx + + +def load_control_net_lllite_patch(ctrl_sd, cond_image, multiplier, num_steps, start_percent, end_percent): + # calculate start and end step + start_step = math.floor(num_steps * start_percent) if start_percent > 0 else 0 + end_step = math.floor(num_steps * end_percent) if end_percent > 0 else num_steps + + # split each weights for each module + module_weights = {} + for key, value in ctrl_sd.items(): + fragments = key.split(".") + module_name = fragments[0] + weight_name = ".".join(fragments[1:]) + + if module_name not in module_weights: + module_weights[module_name] = {} + module_weights[module_name][weight_name] = value + + # load each module + modules = {} + for module_name, weights in module_weights.items(): + # ここの自動判定を何とかしたい + if "conditioning1.4.weight" in weights: + depth = 3 + elif weights["conditioning1.2.weight"].shape[-1] == 4: + depth = 2 + else: + depth = 1 + + module = LLLiteModule( + name=module_name, + is_conv2d=weights["down.0.weight"].ndim == 4, + in_dim=weights["down.0.weight"].shape[1], + depth=depth, + cond_emb_dim=weights["conditioning1.0.weight"].shape[0] * 2, + mlp_dim=weights["down.0.weight"].shape[0], + multiplier=multiplier, + num_steps=num_steps, + start_step=start_step, + end_step=end_step, + ) + info = module.load_state_dict(weights) + modules[module_name] = module + if len(modules) == 1: + module.is_first = True + + print(f"{len(modules)} modules") + + # cond imageをセットする + cond_image = cond_image.permute(0, 3, 1, 2) # b,h,w,3 -> b,3,h,w + cond_image = cond_image * 2.0 - 1.0 # 0-1 -> -1-+1 + + for module in modules.values(): + module.set_cond_image(cond_image) + + class control_net_lllite_patch: + def __init__(self, modules): + self.modules = modules + + def __call__(self, q, k, v, extra_options): + module_pfx = extra_options_to_module_prefix(extra_options) + + is_attn1 = q.shape[-1] == k.shape[-1] # self attention + if is_attn1: + module_pfx = module_pfx + "_attn1" + else: + module_pfx = module_pfx + "_attn2" + + module_pfx_to_q = module_pfx + "_to_q" + module_pfx_to_k = module_pfx + "_to_k" + module_pfx_to_v = module_pfx + "_to_v" + + if module_pfx_to_q in self.modules: + q = q + self.modules[module_pfx_to_q](q) + if module_pfx_to_k in self.modules: + k = k + self.modules[module_pfx_to_k](k) + if module_pfx_to_v in self.modules: + v = v + self.modules[module_pfx_to_v](v) + + return q, k, v + + def to(self, device): + for d in self.modules.keys(): + self.modules[d] = self.modules[d].to(device) + return self + + return control_net_lllite_patch(modules) + + +class LLLiteModule(torch.nn.Module): + def __init__( + self, + name: str, + is_conv2d: bool, + in_dim: int, + depth: int, + cond_emb_dim: int, + mlp_dim: int, + multiplier: int, + num_steps: int, + start_step: int, + end_step: int, + ): + super().__init__() + self.name = name + self.is_conv2d = is_conv2d + self.multiplier = multiplier + self.num_steps = num_steps + self.start_step = start_step + self.end_step = end_step + self.is_first = False + + modules = [] + modules.append(torch.nn.Conv2d(3, cond_emb_dim // 2, kernel_size=4, stride=4, padding=0)) # to latent (from VAE) size*2 + if depth == 1: + modules.append(torch.nn.ReLU(inplace=True)) + modules.append(torch.nn.Conv2d(cond_emb_dim // 2, cond_emb_dim, kernel_size=2, stride=2, padding=0)) + elif depth == 2: + modules.append(torch.nn.ReLU(inplace=True)) + modules.append(torch.nn.Conv2d(cond_emb_dim // 2, cond_emb_dim, kernel_size=4, stride=4, padding=0)) + elif depth == 3: + # kernel size 8は大きすぎるので、4にする / kernel size 8 is too large, so set it to 4 + modules.append(torch.nn.ReLU(inplace=True)) + modules.append(torch.nn.Conv2d(cond_emb_dim // 2, cond_emb_dim // 2, kernel_size=4, stride=4, padding=0)) + modules.append(torch.nn.ReLU(inplace=True)) + modules.append(torch.nn.Conv2d(cond_emb_dim // 2, cond_emb_dim, kernel_size=2, stride=2, padding=0)) + + self.conditioning1 = torch.nn.Sequential(*modules) + + if self.is_conv2d: + self.down = torch.nn.Sequential( + torch.nn.Conv2d(in_dim, mlp_dim, kernel_size=1, stride=1, padding=0), + torch.nn.ReLU(inplace=True), + ) + self.mid = torch.nn.Sequential( + torch.nn.Conv2d(mlp_dim + cond_emb_dim, mlp_dim, kernel_size=1, stride=1, padding=0), + torch.nn.ReLU(inplace=True), + ) + self.up = torch.nn.Sequential( + torch.nn.Conv2d(mlp_dim, in_dim, kernel_size=1, stride=1, padding=0), + ) + else: + self.down = torch.nn.Sequential( + torch.nn.Linear(in_dim, mlp_dim), + torch.nn.ReLU(inplace=True), + ) + self.mid = torch.nn.Sequential( + torch.nn.Linear(mlp_dim + cond_emb_dim, mlp_dim), + torch.nn.ReLU(inplace=True), + ) + self.up = torch.nn.Sequential( + torch.nn.Linear(mlp_dim, in_dim), + ) + + self.depth = depth + self.cond_image = None + self.cond_emb = None + self.current_step = 0 + + # @torch.inference_mode() + def set_cond_image(self, cond_image): + # print("set_cond_image", self.name) + self.cond_image = cond_image + self.cond_emb = None + self.current_step = 0 + + def forward(self, x): + if self.num_steps > 0: + if self.current_step < self.start_step: + self.current_step += 1 + return torch.zeros_like(x) + elif self.current_step >= self.end_step: + if self.is_first and self.current_step == self.end_step: + print(f"end LLLite: step {self.current_step}") + self.current_step += 1 + if self.current_step >= self.num_steps: + self.current_step = 0 # reset + return torch.zeros_like(x) + else: + if self.is_first and self.current_step == self.start_step: + print(f"start LLLite: step {self.current_step}") + self.current_step += 1 + if self.current_step >= self.num_steps: + self.current_step = 0 # reset + + if self.cond_emb is None: + # print(f"cond_emb is None, {self.name}") + cx = self.conditioning1(self.cond_image.to(x.device, dtype=x.dtype)) + if not self.is_conv2d: + # reshape / b,c,h,w -> b,h*w,c + n, c, h, w = cx.shape + cx = cx.view(n, c, h * w).permute(0, 2, 1) + self.cond_emb = cx + + cx = self.cond_emb + # print(f"forward {self.name}, {cx.shape}, {x.shape}") + + # uncond/condでxはバッチサイズが2倍 + if x.shape[0] != cx.shape[0]: + if self.is_conv2d: + cx = cx.repeat(x.shape[0] // cx.shape[0], 1, 1, 1) + else: + # print("x.shape[0] != cx.shape[0]", x.shape[0], cx.shape[0]) + cx = cx.repeat(x.shape[0] // cx.shape[0], 1, 1) + + cx = torch.cat([cx, self.down(x)], dim=1 if self.is_conv2d else 2) + cx = self.mid(cx) + cx = self.up(cx) + return cx * self.multiplier + + +class LLLiteLoader: + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "model_name": None, + "cond_image": ("IMAGE",), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "steps": ("INT", {"default": 0, "min": 0, "max": 200, "step": 1}), + "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.1}), + "end_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.1}), + } + } + + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_lllite" + CATEGORY = "loaders" + + def load_lllite(self, model, state_dict, cond_image, strength, steps, start_percent, end_percent): + # cond_image is b,h,w,3, 0-1 + + model_lllite = model.clone() + patch = load_control_net_lllite_patch(state_dict, cond_image, strength, steps, start_percent, end_percent) + if patch is not None: + model_lllite.set_model_attn1_patch(patch) + model_lllite.set_model_attn2_patch(patch) + + return (model_lllite,) diff --git a/extensions-builtin/sd_forge_controlllite/scripts/forge_controllllite.py b/extensions-builtin/sd_forge_controlllite/scripts/forge_controllllite.py new file mode 100644 index 0000000000000000000000000000000000000000..4a83a915b58d51480bf8e67be27b84d51314ce2c --- /dev/null +++ b/extensions-builtin/sd_forge_controlllite/scripts/forge_controllllite.py @@ -0,0 +1,38 @@ +from modules_forge.shared import add_supported_control_model +from modules_forge.supported_controlnet import ControlModelPatcher +from lib_controllllite.lib_controllllite import LLLiteLoader + + +opLLLiteLoader = LLLiteLoader().load_lllite + + +class ControlLLLitePatcher(ControlModelPatcher): + @staticmethod + def try_build_from_state_dict(state_dict, ckpt_path): + if not any('lllite' in k for k in state_dict.keys()): + return None + return ControlLLLitePatcher(state_dict) + + def __init__(self, state_dict): + super().__init__() + self.state_dict = state_dict + return + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + unet = process.sd_model.forge_objects.unet + + unet = opLLLiteLoader( + model=unet, + state_dict=self.state_dict, + cond_image=cond.movedim(1, -1), + strength=self.strength, + steps=process.steps, + start_percent=self.start_percent, + end_percent=self.end_percent + )[0] + + process.sd_model.forge_objects.unet = unet + return + + +add_supported_control_model(ControlLLLitePatcher) diff --git a/extensions-builtin/sd_forge_controlnet/.gitignore b/extensions-builtin/sd_forge_controlnet/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..60d06e51ec71848d6700eac9c6f3db544ef3c1a0 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/.gitignore @@ -0,0 +1,185 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea +*.pt +*.pth +*.ckpt +*.bin +*.safetensors + +# Editor setting metadata +.idea/ +.vscode/ +detected_maps/ +annotator/downloads/ + +# test results and expectations +web_tests/results/ +web_tests/expectations/ +tests/web_api/full_coverage/results/ +tests/web_api/full_coverage/expectations/ + +*_diff.png + +# Presets +presets/ + +# Ignore existing dir of hand refiner if exists. +annotator/hand_refiner_portable \ No newline at end of file diff --git a/extensions-builtin/sd_forge_controlnet/LICENSE b/extensions-builtin/sd_forge_controlnet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f288702d2fa16d3cdf0035b15a9fcbc552cd88e7 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/extensions-builtin/sd_forge_controlnet/install.py b/extensions-builtin/sd_forge_controlnet/install.py new file mode 100644 index 0000000000000000000000000000000000000000..5370d2213b8a2ac8319dc1995305cf4325e423a9 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/install.py @@ -0,0 +1,66 @@ +import launch +import pkg_resources +import sys +import os +import shutil +import platform +from pathlib import Path +from typing import Tuple, Optional + + +repo_root = Path(__file__).parent +main_req_file = repo_root / "requirements.txt" + + +def comparable_version(version: str) -> Tuple: + return tuple(version.split(".")) + + +def get_installed_version(package: str) -> Optional[str]: + try: + return pkg_resources.get_distribution(package).version + except Exception: + return None + + +def extract_base_package(package_string: str) -> str: + base_package = package_string.split("@git")[0] + return base_package + + +def install_requirements(req_file): + with open(req_file) as file: + for package in file: + try: + package = package.strip() + if "==" in package: + package_name, package_version = package.split("==") + installed_version = get_installed_version(package_name) + if installed_version != package_version: + launch.run_pip( + f"install -U {package}", + f"sd-forge-controlnet requirement: changing {package_name} version from {installed_version} to {package_version}", + ) + elif ">=" in package: + package_name, package_version = package.split(">=") + installed_version = get_installed_version(package_name) + if not installed_version or comparable_version( + installed_version + ) < comparable_version(package_version): + launch.run_pip( + f"install -U {package}", + f"sd-forge-controlnet requirement: changing {package_name} version from {installed_version} to {package_version}", + ) + elif not launch.is_installed(extract_base_package(package)): + launch.run_pip( + f"install {package}", + f"sd-forge-controlnet requirement: {package}", + ) + except Exception as e: + print(e) + print( + f"Warning: Failed to install {package}, some preprocessors may not work." + ) + + +install_requirements(main_req_file) diff --git a/extensions-builtin/sd_forge_controlnet/javascript/active_units.js b/extensions-builtin/sd_forge_controlnet/javascript/active_units.js new file mode 100644 index 0000000000000000000000000000000000000000..a3ba0fc3adf5a357c5a02465d938bae860f864b0 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/javascript/active_units.js @@ -0,0 +1,422 @@ +/** + * Give a badge on ControlNet Accordion indicating total number of active + * units. + * Make active unit's tab name green. + * Append control type to tab name. + * Disable resize mode selection when A1111 img2img input is used. + */ +(function () { + const cnetAllAccordions = new Set(); + onUiUpdate(() => { + const ImgChangeType = { + NO_CHANGE: 0, + REMOVE: 1, + ADD: 2, + SRC_CHANGE: 3, + }; + + function imgChangeObserved(mutationsList) { + // Iterate over all mutations that just occured + for (let mutation of mutationsList) { + // Check if the mutation is an addition or removal of a node + if (mutation.type === 'childList') { + // Check if nodes were added + if (mutation.addedNodes.length > 0) { + for (const node of mutation.addedNodes) { + if (node.tagName === 'IMG') { + return ImgChangeType.ADD; + } + } + } + + // Check if nodes were removed + if (mutation.removedNodes.length > 0) { + for (const node of mutation.removedNodes) { + if (node.tagName === 'IMG') { + return ImgChangeType.REMOVE; + } + } + } + } + // Check if the mutation is a change of an attribute + else if (mutation.type === 'attributes') { + if (mutation.target.tagName === 'IMG' && mutation.attributeName === 'src') { + return ImgChangeType.SRC_CHANGE; + } + } + } + return ImgChangeType.NO_CHANGE; + } + + function childIndex(element) { + // Get all child nodes of the parent + let children = Array.from(element.parentNode.childNodes); + + // Filter out non-element nodes (like text nodes and comments) + children = children.filter(child => child.nodeType === Node.ELEMENT_NODE); + + return children.indexOf(element); + } + + function imageInputDisabledAlert() { + alert('Inpaint control type must use a1111 input in img2img mode.'); + } + + class ControlNetUnitTab { + constructor(tab, accordion) { + this.tab = tab; + this.tabOpen = false; // Whether the tab is open. + this.accordion = accordion; + this.isImg2Img = tab.querySelector('.cnet-mask-upload').id.includes('img2img'); + + this.enabledAccordionCheckbox = tab.querySelector('.input-accordion-checkbox'); + this.enabledCheckbox = tab.querySelector('.cnet-unit-enabled input'); + this.inputImage = tab.querySelector('.cnet-input-image-group .cnet-image input[type="file"]'); + this.inputImageContainer = tab.querySelector('.cnet-input-image-group .cnet-image'); + this.generatedImageGroup = tab.querySelector('.cnet-generated-image-group'); + this.maskImageGroup = tab.querySelector('.cnet-mask-image-group'); + this.inputImageGroup = tab.querySelector('.cnet-input-image-group'); + this.controlTypeRadios = tab.querySelectorAll('.controlnet_control_type_filter_group input[type="radio"]'); + this.resizeModeRadios = tab.querySelectorAll('.controlnet_resize_mode_radio input[type="radio"]'); + this.runPreprocessorButton = tab.querySelector('.cnet-run-preprocessor'); + + this.tabs = tab.parentNode; + this.tabIndex = childIndex(tab); + + // By default the InputAccordion checkbox is linked with the state + // of accordion's open/close state. To disable this link, we can + // simulate click to check the checkbox and uncheck it. + this.enabledAccordionCheckbox.click(); + this.enabledAccordionCheckbox.click(); + + this.sync_enabled_checkbox(); + this.attachEnabledButtonListener(); + this.attachControlTypeRadioListener(); + this.attachImageUploadListener(); + this.attachImageStateChangeObserver(); + this.attachA1111SendInfoObserver(); + this.attachPresetDropdownObserver(); + this.attachAccordionStateObserver(); + } + + /** + * Sync the states of enabledCheckbox and enabledAccordionCheckbox. + */ + sync_enabled_checkbox() { + this.enabledCheckbox.addEventListener("change", () => { + if (this.enabledAccordionCheckbox.checked != this.enabledCheckbox.checked) { + this.enabledAccordionCheckbox.click(); + } + }); + this.enabledAccordionCheckbox.addEventListener("change", () => { + if (this.enabledCheckbox.checked != this.enabledAccordionCheckbox.checked) { + this.enabledCheckbox.click(); + } + }); + } + /** + * Get the span that has text "Unit {X}". + */ + getUnitHeaderTextElement() { + return this.tab.querySelector( + `:nth-child(${this.tabIndex + 1}) span.svelte-s1r2yt` + ); + } + + getActiveControlType() { + for (let radio of this.controlTypeRadios) { + if (radio.checked) { + return radio.value; + } + } + return undefined; + } + + updateActiveState() { + const unitHeader = this.getUnitHeaderTextElement(); + if (!unitHeader) return; + + if (this.enabledCheckbox.checked) { + unitHeader.classList.add('cnet-unit-active'); + } else { + unitHeader.classList.remove('cnet-unit-active'); + } + } + + updateActiveUnitCount() { + function getActiveUnitCount(checkboxes) { + let activeUnitCount = 0; + for (const checkbox of checkboxes) { + if (checkbox.checked) + activeUnitCount++; + } + return activeUnitCount; + } + + const checkboxes = this.accordion.querySelectorAll('.cnet-unit-enabled input'); + const span = this.accordion.querySelector('.label-wrap span'); + + // Remove existing badge. + if (span.childNodes.length !== 1) { + span.removeChild(span.lastChild); + } + // Add new badge if necessary. + const activeUnitCount = getActiveUnitCount(checkboxes); + if (activeUnitCount > 0) { + const div = document.createElement('div'); + div.classList.add('cnet-badge'); + div.classList.add('primary'); + div.innerHTML = `${activeUnitCount} unit${activeUnitCount > 1 ? 's' : ''}`; + span.appendChild(div); + } + } + + /** + * Add the active control type to tab displayed text. + */ + updateActiveControlType() { + const unitHeader = this.getUnitHeaderTextElement(); + if (!unitHeader) return; + + // Remove the control if exists + const controlTypeSuffix = unitHeader.querySelector('.control-type-suffix'); + if (controlTypeSuffix) controlTypeSuffix.remove(); + + // Add new suffix. + const controlType = this.getActiveControlType(); + if (controlType === 'All') return; + + const span = document.createElement('span'); + span.innerHTML = `[${controlType}]`; + span.classList.add('control-type-suffix'); + unitHeader.appendChild(span); + } + getInputImageSrc() { + const img = this.inputImageGroup.querySelector('.cnet-image img'); + return img ? img.src : null; + } + getPreprocessorPreviewImageSrc() { + const img = this.generatedImageGroup.querySelector('.cnet-image img'); + return img ? img.src : null; + } + getMaskImageSrc() { + function isEmptyCanvas(canvas) { + if (!canvas) return true; + const ctx = canvas.getContext('2d'); + // Get the image data + const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); + const data = imageData.data; // This is a Uint8ClampedArray + // Check each pixel + let isPureBlack = true; + for (let i = 0; i < data.length; i += 4) { + if (data[i] !== 0 || data[i + 1] !== 0 || data[i + 2] !== 0) { // Check RGB values + isPureBlack = false; + break; + } + } + return isPureBlack; + } + const maskImg = this.maskImageGroup.querySelector('.cnet-mask-image img'); + // Hand-drawn mask on mask upload. + const handDrawnMaskCanvas = this.maskImageGroup.querySelector('.cnet-mask-image canvas[key="mask"]'); + // Hand-drawn mask on input image upload. + const inputImageHandDrawnMaskCanvas = this.inputImageGroup.querySelector('.cnet-image canvas[key="mask"]'); + if (!isEmptyCanvas(handDrawnMaskCanvas)) { + return handDrawnMaskCanvas.toDataURL(); + } else if (maskImg) { + return maskImg.src; + } else if (!isEmptyCanvas(inputImageHandDrawnMaskCanvas)) { + return inputImageHandDrawnMaskCanvas.toDataURL(); + } else { + return null; + } + } + setThumbnail(imgSrc, maskSrc) { + if (!imgSrc) return; + const unitHeader = this.getUnitHeaderTextElement(); + if (!unitHeader) return; + const img = document.createElement('img'); + img.src = imgSrc; + img.classList.add('cnet-thumbnail'); + unitHeader.appendChild(img); + + if (maskSrc) { + const mask = document.createElement('img'); + mask.src = maskSrc; + mask.classList.add('cnet-thumbnail'); + unitHeader.appendChild(mask); + } + } + removeThumbnail() { + const unitHeader = this.getUnitHeaderTextElement(); + if (!unitHeader) return; + const imgs = unitHeader.querySelectorAll('.cnet-thumbnail'); + for (const img of imgs) { + img.remove(); + } + } + /** + * When the accordion is folded, display a thumbnail of input image + * and mask on the accordion header. + */ + updateInputImageThumbnail() { + if (!opts.controlnet_input_thumbnail) return; + if (this.tabOpen) { + this.removeThumbnail(); + } else { + this.setThumbnail(this.getInputImageSrc(), this.getMaskImageSrc()); + } + } + + attachEnabledButtonListener() { + this.enabledCheckbox.addEventListener('change', () => { + this.updateActiveState(); + this.updateActiveUnitCount(); + }); + } + + attachControlTypeRadioListener() { + for (const radio of this.controlTypeRadios) { + radio.addEventListener('change', () => { + this.updateActiveControlType(); + }); + } + } + + attachImageUploadListener() { + // Automatically check `enable` checkbox when image is uploaded. + this.inputImage.addEventListener('change', (event) => { + if (!event.target.files) return; + if (!this.enabledCheckbox.checked) + this.enabledCheckbox.click(); + }); + + // Automatically check `enable` checkbox when JSON pose file is uploaded. + this.tab.querySelector('.cnet-upload-pose input').addEventListener('change', (event) => { + if (!event.target.files) return; + if (!this.enabledCheckbox.checked) + this.enabledCheckbox.click(); + }); + } + + attachImageStateChangeObserver() { + new MutationObserver((mutationsList) => { + const changeObserved = imgChangeObserved(mutationsList); + + if (changeObserved === ImgChangeType.ADD) { + // enabling the run preprocessor button + this.runPreprocessorButton.removeAttribute("disabled"); + this.runPreprocessorButton.title = 'Run preprocessor'; + } + + if (changeObserved === ImgChangeType.REMOVE) { + // disabling the run preprocessor button + this.runPreprocessorButton.setAttribute("disabled", true); + this.runPreprocessorButton.title = "No ControlNet input image available"; + } + }).observe(this.inputImageContainer, { + childList: true, + subtree: true, + }); + } + + /** + * Observe send PNG info buttons in A1111, as they can also directly + * set states of ControlNetUnit. + */ + attachA1111SendInfoObserver() { + const pasteButtons = gradioApp().querySelectorAll('#paste'); + const pngButtons = gradioApp().querySelectorAll( + this.isImg2Img ? + '#img2img_tab, #inpaint_tab' : + '#txt2img_tab' + ); + + for (const button of [...pasteButtons, ...pngButtons]) { + button.addEventListener('click', () => { + // The paste/send img generation info feature goes + // though gradio, which is pretty slow. Ideally we should + // observe the event when gradio has done the job, but + // that is not an easy task. + // Here we just do a 2 second delay until the refresh. + setTimeout(() => { + this.updateActiveState(); + this.updateActiveUnitCount(); + }, 2000); + }); + } + } + + attachPresetDropdownObserver() { + const presetDropDown = this.tab.querySelector('.cnet-preset-dropdown'); + + new MutationObserver((mutationsList) => { + for (const mutation of mutationsList) { + if (mutation.removedNodes.length > 0) { + setTimeout(() => { + this.updateActiveState(); + this.updateActiveUnitCount(); + this.updateActiveControlType(); + }, 1000); + return; + } + } + }).observe(presetDropDown, { + childList: true, + subtree: true, + }); + } + /** + * Observer that triggers when the ControlNetUnit's accordion(tab) closes. + */ + attachAccordionStateObserver() { + new MutationObserver((mutationsList) => { + for(const mutation of mutationsList) { + if (mutation.type === 'attributes' && mutation.attributeName === 'class') { + const newState = mutation.target.classList.contains('open'); + if (this.tabOpen != newState) { + this.tabOpen = newState; + if (newState) { + this.onAccordionOpen(); + } else { + this.onAccordionClose(); + } + } + } + } + }).observe(this.tab.querySelector('.label-wrap'), { attributes: true, attributeFilter: ['class'] }); + } + + onAccordionOpen() { + this.updateInputImageThumbnail(); + } + + onAccordionClose() { + this.updateInputImageThumbnail(); + } + } + + gradioApp().querySelectorAll('#controlnet').forEach(accordion => { + if (cnetAllAccordions.has(accordion)) return; + const tabs = [...accordion.querySelectorAll('.input-accordion')] + .map(tab => new ControlNetUnitTab(tab, accordion)); + + // On open of main extension accordion, if no unit is enabled, + // open unit 0 for edit. + const labelWrap = accordion.querySelector('.label-wrap'); + const observerAccordionOpen = new MutationObserver(function (mutations) { + for (const mutation of mutations) { + if (mutation.target.classList.contains('open') && + tabs.every(tab => !tab.enabledCheckbox.checked && + !tab.tab.querySelector('.label-wrap').classList.contains('open')) + ) { + tabs[0].tab.querySelector('.label-wrap').click(); + } + } + }); + observerAccordionOpen.observe(labelWrap, { attributes: true, attributeFilter: ['class'] }); + + cnetAllAccordions.add(accordion); + }); + }); +})(); \ No newline at end of file diff --git a/extensions-builtin/sd_forge_controlnet/javascript/canvas.js b/extensions-builtin/sd_forge_controlnet/javascript/canvas.js new file mode 100644 index 0000000000000000000000000000000000000000..a122c9fa24adb71b0050abaadf650b4d6da4e5f1 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/javascript/canvas.js @@ -0,0 +1,17 @@ +(function () { + var hasApplied = false; + onUiUpdate(function () { + if (!hasApplied) { + if (typeof window.applyZoomAndPanIntegration === "function") { + hasApplied = true; + window.applyZoomAndPanIntegration("#txt2img_controlnet", Array.from({ length: 20 }, (_, i) => `#txt2img_controlnet_ControlNet-${i}_input_image`)); + window.applyZoomAndPanIntegration("#img2img_controlnet", Array.from({ length: 20 }, (_, i) => `#img2img_controlnet_ControlNet-${i}_input_image`)); + window.applyZoomAndPanIntegration("#txt2img_controlnet", ["#txt2img_controlnet_ControlNet_input_image"]); + window.applyZoomAndPanIntegration("#img2img_controlnet", ["#img2img_controlnet_ControlNet_input_image"]); + //console.log("window.applyZoomAndPanIntegration applied."); + } else { + //console.log("window.applyZoomAndPanIntegration is not available."); + } + } + }); +})(); diff --git a/extensions-builtin/sd_forge_controlnet/javascript/modal.js b/extensions-builtin/sd_forge_controlnet/javascript/modal.js new file mode 100644 index 0000000000000000000000000000000000000000..dc6190de86f70a9a773dd63963c3b79998b13119 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/javascript/modal.js @@ -0,0 +1,33 @@ +(function () { + const cnetModalRegisteredElements = new Set(); + onUiUpdate(() => { + // Get all the buttons that open a modal + const btns = gradioApp().querySelectorAll(".cnet-modal-open"); + + // Get all the elements that close a modal + const spans = document.querySelectorAll(".cnet-modal-close"); + + // For each button, add a click event listener that opens the corresponding modal + btns.forEach((btn) => { + if (cnetModalRegisteredElements.has(btn)) return; + cnetModalRegisteredElements.add(btn); + + const modalId = btn.id.replace('cnet-modal-open-', ''); + const modal = document.getElementById("cnet-modal-" + modalId); + btn.addEventListener('click', () => { + modal.style.display = "block"; + }); + }); + + // For each element, add a click event listener that closes the corresponding modal + spans.forEach((span) => { + if (cnetModalRegisteredElements.has(span)) return; + cnetModalRegisteredElements.add(span); + + const modal = span.parentNode; + span.addEventListener('click', () => { + modal.style.display = "none"; + }); + }); + }); +})(); diff --git a/extensions-builtin/sd_forge_controlnet/javascript/openpose_editor.js b/extensions-builtin/sd_forge_controlnet/javascript/openpose_editor.js new file mode 100644 index 0000000000000000000000000000000000000000..350b831bb0373fab550f2cf2e8549cc0f9df85ed --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/javascript/openpose_editor.js @@ -0,0 +1,152 @@ +(function () { + async function checkEditorAvailable() { + const LOCAL_EDITOR_PATH = '/openpose_editor_index'; + const REMOTE_EDITOR_PATH = 'https://huchenlei.github.io/sd-webui-openpose-editor/'; + + async function testEditorPath(path) { + const res = await fetch(path); + return res.status === 200 ? path : null; + } + + // Use local editor if the user has the extension installed. Fallback + // onto remote editor if the local editor is not ready yet. + // See https://github.com/huchenlei/sd-webui-openpose-editor/issues/53 + // for more details. + return await testEditorPath(LOCAL_EDITOR_PATH) || await testEditorPath(REMOTE_EDITOR_PATH); + } + + const cnetOpenposeEditorRegisteredElements = new Set(); + let editorURL = null; + function loadOpenposeEditor() { + // Simulate an `input` DOM event for Gradio Textbox component. Needed after you edit its contents in javascript, otherwise your edits + // will only visible on web page and not sent to python. + function updateInput(target) { + let e = new Event("input", { bubbles: true }) + Object.defineProperty(e, "target", { value: target }) + target.dispatchEvent(e); + } + + function navigateIframe(iframe, editorURL) { + function getPathname(rawURL) { + try { + return new URL(rawURL).pathname; + } catch (e) { + return rawURL; + } + } + + return new Promise((resolve) => { + const darkThemeParam = document.body.classList.contains('dark') ? + new URLSearchParams({ theme: 'dark' }).toString() : + ''; + + window.addEventListener('message', (event) => { + const message = event.data; + if (message['ready']) resolve(); + }, { once: true }); + + if ((editorURL.startsWith("http") ? iframe.src : getPathname(iframe.src)) !== editorURL) { + iframe.src = `${editorURL}?${darkThemeParam}`; + // By default assume 5 second is enough for the openpose editor + // to load. + setTimeout(resolve, 5000); + } else { + // If no navigation is required, immediately return. + resolve(); + } + }); + } + const tabs = gradioApp().querySelectorAll('#controlnet .input-accordion'); + tabs.forEach(tab => { + if (cnetOpenposeEditorRegisteredElements.has(tab)) return; + cnetOpenposeEditorRegisteredElements.add(tab); + + const generatedImageGroup = tab.querySelector('.cnet-generated-image-group'); + const editButton = generatedImageGroup.querySelector('.cnet-edit-pose'); + + editButton.addEventListener('click', async () => { + const inputImageGroup = tab.querySelector('.cnet-input-image-group'); + const inputImage = inputImageGroup.querySelector('.cnet-image img'); + const downloadLink = generatedImageGroup.querySelector('.cnet-download-pose a'); + const modalId = editButton.id.replace('cnet-modal-open-', ''); + const modalIframe = generatedImageGroup.querySelector('.cnet-modal iframe'); + + if (!editorURL) { + editorURL = await checkEditorAvailable(); + if (!editorURL) { + alert("No openpose editor available.") + } + } + + await navigateIframe(modalIframe, editorURL); + modalIframe.contentWindow.postMessage({ + modalId, + imageURL: inputImage ? inputImage.src : undefined, + poseURL: downloadLink.href, + }, '*'); + // Focus the iframe so that the focus is no longer on the `Edit` button. + // Pressing space when the focus is on `Edit` button will trigger + // the click again to resend the frame message. + modalIframe.contentWindow.focus(); + }); + /* + * Writes the pose data URL to an link element on input image group. + * Click a hidden button to trigger a backend rendering of the pose JSON. + * + * The backend should: + * - Set the rendered pose image as preprocessor generated image. + */ + function updatePreviewPose(poseURL) { + const downloadLink = generatedImageGroup.querySelector('.cnet-download-pose a'); + const renderButton = generatedImageGroup.querySelector('.cnet-render-pose'); + const poseTextbox = generatedImageGroup.querySelector('.cnet-pose-json textarea'); + const allowPreviewCheckbox = tab.querySelector('.cnet-allow-preview input'); + + if (!allowPreviewCheckbox.checked) + allowPreviewCheckbox.click(); + + // Only set href when download link exists and needs an update. `downloadLink` + // can be null when user closes preview and click `Upload JSON` button again. + // https://github.com/Mikubill/sd-webui-controlnet/issues/2308 + if (downloadLink !== null) + downloadLink.href = poseURL; + + poseTextbox.value = poseURL; + updateInput(poseTextbox); + renderButton.click(); + } + + // Updates preview image when edit is done. + window.addEventListener('message', (event) => { + const message = event.data; + const modalId = editButton.id.replace('cnet-modal-open-', ''); + if (message.modalId !== modalId) return; + updatePreviewPose(message.poseURL); + + const closeModalButton = generatedImageGroup.querySelector('.cnet-modal .cnet-modal-close'); + closeModalButton.click(); + }); + + const inputImageGroup = tab.querySelector('.cnet-input-image-group'); + const uploadButton = inputImageGroup.querySelector('.cnet-upload-pose input'); + // Updates preview image when JSON file is uploaded. + uploadButton.addEventListener('change', (event) => { + const file = event.target.files[0]; + if (!file) + return; + + const reader = new FileReader(); + reader.onload = function (e) { + const contents = e.target.result; + const poseURL = `data:application/json;base64,${btoa(contents)}`; + updatePreviewPose(poseURL); + }; + reader.readAsText(file); + // Reset the file input value so that uploading the same file still triggers callback. + event.target.value = ''; + }); + }); + } + + onUiUpdate(loadOpenposeEditor); +})(); \ No newline at end of file diff --git a/extensions-builtin/sd_forge_controlnet/javascript/photopea.js b/extensions-builtin/sd_forge_controlnet/javascript/photopea.js new file mode 100644 index 0000000000000000000000000000000000000000..f765fc3afa9763d9fdbe70b61dec65438fc15241 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/javascript/photopea.js @@ -0,0 +1,435 @@ +(function () { + /* + MIT LICENSE + Copyright 2011 Jon Leighton + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and + associated documentation files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, publish, distribute, + sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + // From: https://gist.github.com/jonleighton/958841 + function base64ArrayBuffer(arrayBuffer) { + var base64 = '' + var encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + + var bytes = new Uint8Array(arrayBuffer) + var byteLength = bytes.byteLength + var byteRemainder = byteLength % 3 + var mainLength = byteLength - byteRemainder + + var a, b, c, d + var chunk + + // Main loop deals with bytes in chunks of 3 + for (var i = 0; i < mainLength; i = i + 3) { + // Combine the three bytes into a single integer + chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2] + + // Use bitmasks to extract 6-bit segments from the triplet + a = (chunk & 16515072) >> 18 // 16515072 = (2^6 - 1) << 18 + b = (chunk & 258048) >> 12 // 258048 = (2^6 - 1) << 12 + c = (chunk & 4032) >> 6 // 4032 = (2^6 - 1) << 6 + d = chunk & 63 // 63 = 2^6 - 1 + + // Convert the raw binary segments to the appropriate ASCII encoding + base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d] + } + + // Deal with the remaining bytes and padding + if (byteRemainder == 1) { + chunk = bytes[mainLength] + + a = (chunk & 252) >> 2 // 252 = (2^6 - 1) << 2 + + // Set the 4 least significant bits to zero + b = (chunk & 3) << 4 // 3 = 2^2 - 1 + + base64 += encodings[a] + encodings[b] + '==' + } else if (byteRemainder == 2) { + chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1] + + a = (chunk & 64512) >> 10 // 64512 = (2^6 - 1) << 10 + b = (chunk & 1008) >> 4 // 1008 = (2^6 - 1) << 4 + + // Set the 2 least significant bits to zero + c = (chunk & 15) << 2 // 15 = 2^4 - 1 + + base64 += encodings[a] + encodings[b] + encodings[c] + '=' + } + + return base64 + } + + // Turn a base64 string into a blob. + // From https://gist.github.com/gauravmehla/7a7dfd87dd7d1b13697b6e894426615f + function b64toBlob(b64Data, contentType, sliceSize) { + var contentType = contentType || ''; + var sliceSize = sliceSize || 512; + var byteCharacters = atob(b64Data); + var byteArrays = []; + for (var offset = 0; offset < byteCharacters.length; offset += sliceSize) { + var slice = byteCharacters.slice(offset, offset + sliceSize); + var byteNumbers = new Array(slice.length); + for (var i = 0; i < slice.length; i++) { + byteNumbers[i] = slice.charCodeAt(i); + } + var byteArray = new Uint8Array(byteNumbers); + byteArrays.push(byteArray); + } + return new Blob(byteArrays, { type: contentType }); + } + + function createBlackImageBase64(width, height) { + // Create a canvas element + var canvas = document.createElement('canvas'); + canvas.width = width; + canvas.height = height; + + // Get the context of the canvas + var ctx = canvas.getContext('2d'); + + // Fill the canvas with black color + ctx.fillStyle = 'black'; + ctx.fillRect(0, 0, width, height); + + // Get the base64 encoded string + var base64Image = canvas.toDataURL('image/png'); + + return base64Image; + } + + // Functions to be called within photopea context. + // Start of photopea functions + function pasteImage(base64image) { + app.open(base64image, null, /* asSmart */ true); + app.echoToOE("success"); + } + + function setLayerNames(names) { + const layers = app.activeDocument.layers; + if (layers.length !== names.length) { + console.error("layer length does not match names length"); + echoToOE("error"); + return; + } + + for (let i = 0; i < names.length; i++) { + const layer = layers[i]; + layer.name = names[i]; + } + app.echoToOE("success"); + } + + function removeLayersWithNames(names) { + const layers = app.activeDocument.layers; + for (let i = 0; i < layers.length; i++) { + const layer = layers[i]; + if (names.includes(layer.name)) { + layer.remove(); + } + } + app.echoToOE("success"); + } + + function getAllLayerNames() { + const layers = app.activeDocument.layers; + const names = []; + for (let i = 0; i < layers.length; i++) { + const layer = layers[i]; + names.push(layer.name); + } + app.echoToOE(JSON.stringify(names)); + } + + // Hides all layers except the current one, outputs the whole image, then restores the previous + // layers state. + function exportSelectedLayerOnly(format, layerName) { + // Gets all layers recursively, including the ones inside folders. + function getAllArtLayers(document) { + let allArtLayers = []; + + for (let i = 0; i < document.layers.length; i++) { + const currentLayer = document.layers[i]; + allArtLayers.push(currentLayer); + if (currentLayer.typename === "LayerSet") { + allArtLayers = allArtLayers.concat(getAllArtLayers(currentLayer)); + } + } + return allArtLayers; + } + + function makeLayerVisible(layer) { + let currentLayer = layer; + while (currentLayer != app.activeDocument) { + currentLayer.visible = true; + if (currentLayer.parent.typename != 'Document') { + currentLayer = currentLayer.parent; + } else { + break; + } + } + } + + + const allLayers = getAllArtLayers(app.activeDocument); + // Make all layers except the currently selected one invisible, and store + // their initial state. + const layerStates = []; + for (let i = 0; i < allLayers.length; i++) { + const layer = allLayers[i]; + layerStates.push(layer.visible); + } + // Hide all layers to begin with + for (let i = 0; i < allLayers.length; i++) { + const layer = allLayers[i]; + layer.visible = false; + } + for (let i = 0; i < allLayers.length; i++) { + const layer = allLayers[i]; + const selected = layer.name === layerName; + if (selected) { + makeLayerVisible(layer); + } + } + app.activeDocument.saveToOE(format); + + for (let i = 0; i < allLayers.length; i++) { + const layer = allLayers[i]; + layer.visible = layerStates[i]; + } + } + + function hasActiveDocument() { + app.echoToOE(app.documents.length > 0 ? "true" : "false"); + } + // End of photopea functions + + const MESSAGE_END_ACK = "done"; + const MESSAGE_ERROR = "error"; + const PHOTOPEA_URL = "https://www.photopea.com/"; + class PhotopeaContext { + constructor(photopeaIframe) { + this.photopeaIframe = photopeaIframe; + this.timeout = 1000; + } + + navigateIframe() { + const iframe = this.photopeaIframe; + const editorURL = PHOTOPEA_URL; + + return new Promise(async (resolve) => { + if (iframe.src !== editorURL) { + iframe.src = editorURL; + // Stop waiting after 10s. + setTimeout(resolve, 10000); + + // Testing whether photopea is able to accept message. + while (true) { + try { + await this.invoke(hasActiveDocument); + break; + } catch (e) { + console.log("Keep waiting for photopea to accept message."); + } + } + this.timeout = 5000; // Restore to a longer timeout in normal messaging. + } + resolve(); + }); + } + + // From https://github.com/huchenlei/stable-diffusion-ps-pea/blob/main/src/Photopea.ts + postMessageToPhotopea(message) { + return new Promise((resolve, reject) => { + const responseDataPieces = []; + let hasError = false; + const photopeaMessageHandle = (event) => { + if (event.source !== this.photopeaIframe.contentWindow) { + return; + } + // Filter out the ping messages + if (typeof event.data === 'string' && event.data.includes('MSFAPI#')) { + return; + } + // Ignore "done" when no data has been received. The "done" can come from + // MSFAPI ping. + if (event.data === MESSAGE_END_ACK && responseDataPieces.length === 0) { + return; + } + if (event.data === MESSAGE_END_ACK) { + window.removeEventListener("message", photopeaMessageHandle); + if (hasError) { + reject('Photopea Error.'); + } else { + resolve(responseDataPieces.length === 1 ? responseDataPieces[0] : responseDataPieces); + } + } else if (event.data === MESSAGE_ERROR) { + responseDataPieces.push(event.data); + hasError = true; + } else { + responseDataPieces.push(event.data); + } + }; + + window.addEventListener("message", photopeaMessageHandle); + setTimeout(() => reject("Photopea message timeout"), this.timeout); + this.photopeaIframe.contentWindow.postMessage(message, "*"); + }); + } + + // From https://github.com/huchenlei/stable-diffusion-ps-pea/blob/main/src/Photopea.ts + async invoke(func, ...args) { + await this.navigateIframe(); + const message = `${func.toString()} ${func.name}(${args.map(arg => JSON.stringify(arg)).join(',')});`; + try { + return await this.postMessageToPhotopea(message); + } catch (e) { + throw `Failed to invoke ${func.name}. ${e}.`; + } + } + + /** + * Fetch detected maps from each ControlNet units. + * Create a new photopea document. + * Add those detected maps to the created document. + */ + async fetchFromControlNet(tabs) { + if (tabs.length === 0) return; + const isImg2Img = tabs[0].querySelector('.cnet-mask-upload').id.includes('img2img'); + const generationType = isImg2Img ? 'img2img' : 'txt2img'; + const width = gradioApp().querySelector(`#${generationType}_width input[type=number]`).value; + const height = gradioApp().querySelector(`#${generationType}_height input[type=number]`).value; + + const layerNames = ["background"]; + await this.invoke(pasteImage, createBlackImageBase64(width, height)); + await new Promise(r => setTimeout(r, 200)); + for (const [i, tab] of tabs.entries()) { + const generatedImage = tab.querySelector('.cnet-generated-image-group .cnet-image img'); + if (!generatedImage) continue; + await this.invoke(pasteImage, generatedImage.src); + // Wait 200ms for pasting to fully complete so that we do not ended up with 2 separate + // documents. + await new Promise(r => setTimeout(r, 200)); + layerNames.push(`unit-${i}`); + } + await this.invoke(removeLayersWithNames, layerNames); + await this.invoke(setLayerNames, layerNames.reverse()); + } + + /** + * Send the images in the active photopea document back to each ControlNet units. + */ + async sendToControlNet(tabs) { + // Gradio's image widgets are inputs. To set the image in one, we set the image on the input and + // force it to refresh. + function setImageOnInput(imageInput, file) { + // Createa a data transfer element to set as the data in the input. + const dt = new DataTransfer(); + dt.items.add(file); + const list = dt.files; + + // Actually set the image in the image widget. + imageInput.files = list; + + // Foce the image widget to update with the new image, after setting its source files. + const event = new Event('change', { + 'bubbles': true, + "composed": true + }); + imageInput.dispatchEvent(event); + } + + function sendToControlNetUnit(b64Image, index) { + const tab = tabs[index]; + // Upload image to output image element. + const outputImage = tab.querySelector('.cnet-photopea-output'); + const outputImageUpload = outputImage.querySelector('input[type="file"]'); + setImageOnInput(outputImageUpload, new File([b64toBlob(b64Image, "image/png")], "photopea_output.png")); + + // Make sure `UsePreviewAsInput` checkbox is checked. + const checkbox = tab.querySelector('.cnet-preview-as-input input[type="checkbox"]'); + if (!checkbox.checked) { + checkbox.click(); + } + } + + const layerNames = + JSON.parse(await this.invoke(getAllLayerNames)) + .filter(name => /unit-\d+/.test(name)); + + for (const layerName of layerNames) { + const arrayBuffer = await this.invoke(exportSelectedLayerOnly, 'PNG', layerName); + const b64Image = base64ArrayBuffer(arrayBuffer); + const layerIndex = Number.parseInt(layerName.split('-')[1]); + sendToControlNetUnit(b64Image, layerIndex); + } + } + } + + let photopeaWarningShown = false; + + function firstTimeUserPrompt() { + if (opts.controlnet_photopea_warning){ + const photopeaPopupMsg = "you are about to connect to https://photopea.com\n" + + "- Click OK: proceed.\n" + + "- Click Cancel: abort.\n" + + "Photopea integration can be disabled in Settings > ControlNet > Disable photopea edit.\n" + + "This popup can be disabled in Settings > ControlNet > Photopea popup warning."; + if (photopeaWarningShown || confirm(photopeaPopupMsg)) photopeaWarningShown = true; + else return false; + } + return true; + } + + const cnetRegisteredAccordions = new Set(); + function loadPhotopea() { + function registerCallbacks(accordion) { + const photopeaMainTrigger = accordion.querySelector('.cnet-photopea-main-trigger'); + // Photopea edit feature disabled. + if (!photopeaMainTrigger) { + console.log("ControlNet photopea edit disabled."); + return; + } + + const closeModalButton = accordion.querySelector('.cnet-photopea-edit .cnet-modal-close'); + const tabs = accordion.querySelectorAll('.controlnet .input-accordion'); + const photopeaIframe = accordion.querySelector('.photopea-iframe'); + const photopeaContext = new PhotopeaContext(photopeaIframe, tabs); + + tabs.forEach(tab => { + const photopeaChildTrigger = tab.querySelector('.cnet-photopea-child-trigger'); + photopeaChildTrigger.addEventListener('click', async () => { + if (!firstTimeUserPrompt()) return; + + photopeaMainTrigger.click(); + if (await photopeaContext.invoke(hasActiveDocument) === "false") { + await photopeaContext.fetchFromControlNet(tabs); + } + }); + }); + accordion.querySelector('.photopea-fetch').addEventListener('click', () => photopeaContext.fetchFromControlNet(tabs)); + accordion.querySelector('.photopea-send').addEventListener('click', () => { + photopeaContext.sendToControlNet(tabs) + closeModalButton.click(); + }); + } + + const accordions = gradioApp().querySelectorAll('#controlnet'); + accordions.forEach(accordion => { + if (cnetRegisteredAccordions.has(accordion)) return; + registerCallbacks(accordion); + cnetRegisteredAccordions.add(accordion); + }); + } + + onUiUpdate(loadPhotopea); +})(); \ No newline at end of file diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/api.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/api.py new file mode 100644 index 0000000000000000000000000000000000000000..29e7d662d2100423e86acd2193b1c5d87bb9ed0e --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/api.py @@ -0,0 +1,112 @@ +from typing import List + +import numpy as np +from fastapi import FastAPI, Body +from fastapi.exceptions import HTTPException +from PIL import Image +import gradio as gr + +from modules.api import api +from .global_state import ( + get_all_preprocessor_names, + get_all_controlnet_names, + get_preprocessor, +) +from .utils import judge_image_type +from .logging import logger + + +def encode_to_base64(image): + if isinstance(image, str): + return image + elif not judge_image_type(image): + return "Detect result is not image" + elif isinstance(image, Image.Image): + return api.encode_pil_to_base64(image) + elif isinstance(image, np.ndarray): + return encode_np_to_base64(image) + else: + logger.warn("Unable to encode image.") + return "" + + +def encode_np_to_base64(image): + pil = Image.fromarray(image) + return api.encode_pil_to_base64(pil) + + +def controlnet_api(_: gr.Blocks, app: FastAPI): + @app.get("/controlnet/model_list") + async def model_list(): + up_to_date_model_list = get_all_controlnet_names() + logger.debug(up_to_date_model_list) + return {"model_list": up_to_date_model_list} + + @app.get("/controlnet/module_list") + async def module_list(): + module_list = get_all_preprocessor_names() + logger.debug(module_list) + + return { + "module_list": module_list, + # TODO: Add back module detail. + # "module_detail": external_code.get_modules_detail(alias_names), + } + + @app.post("/controlnet/detect") + async def detect( + controlnet_module: str = Body("none", title="Controlnet Module"), + controlnet_input_images: List[str] = Body([], title="Controlnet Input Images"), + controlnet_processor_res: int = Body( + 512, title="Controlnet Processor Resolution" + ), + controlnet_threshold_a: float = Body(64, title="Controlnet Threshold a"), + controlnet_threshold_b: float = Body(64, title="Controlnet Threshold b"), + ): + processor_module = get_preprocessor(controlnet_module) + if processor_module is None: + raise HTTPException(status_code=422, detail="Module not available") + + if len(controlnet_input_images) == 0: + raise HTTPException(status_code=422, detail="No image selected") + + logger.debug( + f"Detecting {str(len(controlnet_input_images))} images with the {controlnet_module} module." + ) + + results = [] + poses = [] + + for input_image in controlnet_input_images: + img = np.array(api.decode_base64_to_image(input_image)).astype('uint8') + + class JsonAcceptor: + def __init__(self) -> None: + self.value = None + + def accept(self, json_dict: dict) -> None: + self.value = json_dict + + json_acceptor = JsonAcceptor() + + results.append( + processor_module( + img, + resolution=controlnet_processor_res, + slider_1=controlnet_threshold_a, + slider_2=controlnet_threshold_b, + json_pose_callback=json_acceptor.accept, + ) + ) + + if "openpose" in controlnet_module: + assert json_acceptor.value is not None + poses.append(json_acceptor.value) + + results64 = [encode_to_base64(img) for img in results] + res = {"images": results64, "info": "Success"} + if poses: + res["poses"] = poses + + return res + diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/controlnet_ui_group.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/controlnet_ui_group.py new file mode 100644 index 0000000000000000000000000000000000000000..b3e16df6868fd110d99208c88a985193746fe6d1 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/controlnet_ui_group.py @@ -0,0 +1,1263 @@ +import json +import gradio as gr +import functools +from copy import copy +from typing import List, Optional, Union, Callable, Dict, Tuple, Literal +from dataclasses import dataclass +import numpy as np + +from lib_controlnet.utils import svg_preprocess, read_image, judge_image_type +from lib_controlnet import ( + global_state, + external_code, +) +from lib_controlnet.logging import logger +from lib_controlnet.controlnet_ui.openpose_editor import OpenposeEditor +from lib_controlnet.controlnet_ui.preset import ControlNetPresetUI +from lib_controlnet.controlnet_ui.tool_button import ToolButton +from lib_controlnet.controlnet_ui.photopea import Photopea +from lib_controlnet.enums import InputMode, HiResFixOption +from modules import shared, script_callbacks +from modules.ui_components import FormRow +from modules_forge.forge_util import HWC3 +from lib_controlnet.external_code import UiControlNetUnit + + +@dataclass +class A1111Context: + """Contains all components from A1111.""" + + img2img_batch_input_dir: Optional[gr.components.IOComponent] = None + img2img_batch_output_dir: Optional[gr.components.IOComponent] = None + txt2img_submit_button: Optional[gr.components.IOComponent] = None + img2img_submit_button: Optional[gr.components.IOComponent] = None + + # Slider controls from A1111 WebUI. + txt2img_w_slider: Optional[gr.components.IOComponent] = None + txt2img_h_slider: Optional[gr.components.IOComponent] = None + img2img_w_slider: Optional[gr.components.IOComponent] = None + img2img_h_slider: Optional[gr.components.IOComponent] = None + + img2img_img2img_tab: Optional[gr.components.IOComponent] = None + img2img_img2img_sketch_tab: Optional[gr.components.IOComponent] = None + img2img_batch_tab: Optional[gr.components.IOComponent] = None + img2img_inpaint_tab: Optional[gr.components.IOComponent] = None + img2img_inpaint_sketch_tab: Optional[gr.components.IOComponent] = None + img2img_inpaint_upload_tab: Optional[gr.components.IOComponent] = None + + img2img_inpaint_area: Optional[gr.components.IOComponent] = None + txt2img_enable_hr: Optional[gr.components.IOComponent] = None + + @property + def img2img_inpaint_tabs(self) -> Tuple[gr.components.IOComponent]: + return ( + self.img2img_inpaint_tab, + self.img2img_inpaint_sketch_tab, + self.img2img_inpaint_upload_tab, + ) + + @property + def img2img_non_inpaint_tabs(self) -> Tuple[gr.components.IOComponent]: + return ( + self.img2img_img2img_tab, + self.img2img_img2img_sketch_tab, + self.img2img_batch_tab, + ) + + @property + def ui_initialized(self) -> bool: + optional_components = { + # Optional components are only available after A1111 v1.7.0. + "img2img_img2img_tab": "img2img_img2img_tab", + "img2img_img2img_sketch_tab": "img2img_img2img_sketch_tab", + "img2img_batch_tab": "img2img_batch_tab", + "img2img_inpaint_tab": "img2img_inpaint_tab", + "img2img_inpaint_sketch_tab": "img2img_inpaint_sketch_tab", + "img2img_inpaint_upload_tab": "img2img_inpaint_upload_tab", + } + return all( + c + for name, c in vars(self).items() + if name not in optional_components.values() + ) + + def set_component(self, component: gr.components.IOComponent): + id_mapping = { + "img2img_batch_input_dir": "img2img_batch_input_dir", + "img2img_batch_output_dir": "img2img_batch_output_dir", + "txt2img_generate": "txt2img_submit_button", + "img2img_generate": "img2img_submit_button", + "txt2img_width": "txt2img_w_slider", + "txt2img_height": "txt2img_h_slider", + "img2img_width": "img2img_w_slider", + "img2img_height": "img2img_h_slider", + "img2img_img2img_tab": "img2img_img2img_tab", + "img2img_img2img_sketch_tab": "img2img_img2img_sketch_tab", + "img2img_batch_tab": "img2img_batch_tab", + "img2img_inpaint_tab": "img2img_inpaint_tab", + "img2img_inpaint_sketch_tab": "img2img_inpaint_sketch_tab", + "img2img_inpaint_upload_tab": "img2img_inpaint_upload_tab", + "img2img_inpaint_full_res": "img2img_inpaint_area", + "txt2img_hr-checkbox": "txt2img_enable_hr", + } + elem_id = getattr(component, "elem_id", None) + # Do not set component if it has already been set. + # https://github.com/Mikubill/sd-webui-controlnet/issues/2587 + if elem_id in id_mapping and getattr(self, id_mapping[elem_id]) is None: + setattr(self, id_mapping[elem_id], component) + logger.debug(f"Setting {elem_id}.") + logger.debug( + f"A1111 initialized {sum(c is not None for c in vars(self).values())}/{len(vars(self).keys())}." + ) + + +class ControlNetUiGroup(object): + refresh_symbol = "\U0001f504" # 🔄 + switch_values_symbol = "\U000021C5" # ⇅ + camera_symbol = "\U0001F4F7" # 📷 + reverse_symbol = "\U000021C4" # ⇄ + tossup_symbol = "\u2934" + trigger_symbol = "\U0001F4A5" # 💥 + open_symbol = "\U0001F4DD" # 📝 + + tooltips = { + "🔄": "Refresh", + "\u2934": "Send dimensions to stable diffusion", + "💥": "Run preprocessor", + "📝": "Open new canvas", + "📷": "Enable webcam", + "⇄": "Mirror webcam", + } + + global_batch_input_dir = gr.Textbox( + label="Controlnet input directory", + placeholder="Leave empty to use input directory", + **shared.hide_dirs, + elem_id="controlnet_batch_input_dir", + ) + a1111_context = A1111Context() + # All ControlNetUiGroup instances created. + all_ui_groups: List["ControlNetUiGroup"] = [] + + @property + def width_slider(self): + if self.is_img2img: + return ControlNetUiGroup.a1111_context.img2img_w_slider + else: + return ControlNetUiGroup.a1111_context.txt2img_w_slider + + @property + def height_slider(self): + if self.is_img2img: + return ControlNetUiGroup.a1111_context.img2img_h_slider + else: + return ControlNetUiGroup.a1111_context.txt2img_h_slider + + def __init__( + self, + is_img2img: bool, + default_unit: external_code.ControlNetUnit, + photopea: Optional[Photopea] = None, + ): + # Whether callbacks have been registered. + self.callbacks_registered: bool = False + # Whether the render method on this object has been called. + self.ui_initialized: bool = False + + self.is_img2img = is_img2img + self.default_unit = default_unit + self.photopea = photopea + self.webcam_enabled = False + self.webcam_mirrored = False + + # Note: All gradio elements declared in `render` will be defined as member variable. + # Update counter to trigger a force update of UiControlNetUnit. + # dummy_gradio_update_trigger is useful when a field with no event subscriber available changes. + # e.g. gr.Gallery, gr.State, etc. After an update to gr.State / gr.Gallery, please increment + # this counter to trigger a sync update of UiControlNetUnit. + self.dummy_gradio_update_trigger = None + self.enabled = None + self.upload_tab = None + self.image = None + self.generated_image_group = None + self.generated_image = None + self.mask_image_group = None + self.mask_image = None + self.batch_tab = None + self.batch_image_dir = None + self.merge_tab = None + self.batch_input_gallery = None + self.merge_upload_button = None + self.merge_clear_button = None + self.create_canvas = None + self.canvas_width = None + self.canvas_height = None + self.canvas_create_button = None + self.canvas_cancel_button = None + self.open_new_canvas_button = None + self.webcam_enable = None + self.webcam_mirror = None + self.send_dimen_button = None + self.pixel_perfect = None + self.preprocessor_preview = None + self.mask_upload = None + self.type_filter = None + self.module = None + self.trigger_preprocessor = None + self.model = None + self.refresh_models = None + self.weight = None + self.guidance_start = None + self.guidance_end = None + self.advanced = None + self.processor_res = None + self.threshold_a = None + self.threshold_b = None + self.control_mode = None + self.resize_mode = None + self.use_preview_as_input = None + self.openpose_editor = None + self.preset_panel = None + self.upload_independent_img_in_img2img = None + self.image_upload_panel = None + self.save_detected_map = None + self.input_mode = gr.State(InputMode.SIMPLE) + self.hr_option = None + self.batch_image_dir_state = None + self.output_dir_state = None + + # Internal states for UI state pasting. + self.prevent_next_n_module_update = 0 + self.prevent_next_n_slider_value_update = 0 + + ControlNetUiGroup.all_ui_groups.append(self) + + def render(self, tabname: str, elem_id_tabname: str) -> None: + """The pure HTML structure of a single ControlNetUnit. Calling this + function will populate `self` with all gradio element declared + in local scope. + + Args: + tabname: + elem_id_tabname: + + Returns: + None + """ + self.dummy_gradio_update_trigger = gr.Number(value=0, visible=False) + self.openpose_editor = OpenposeEditor() + + with gr.Group(visible=not self.is_img2img) as self.image_upload_panel: + self.save_detected_map = gr.Checkbox(value=True, visible=False) + with gr.Tabs(): + with gr.Tab(label="Single Image") as self.upload_tab: + with gr.Row(elem_classes=["cnet-image-row"], equal_height=True): + with gr.Group(elem_classes=["cnet-input-image-group"]): + self.image = gr.Image( + source="upload", + brush_radius=20, + mirror_webcam=False, + type="numpy", + tool="sketch", + elem_id=f"{elem_id_tabname}_{tabname}_input_image", + elem_classes=["cnet-image"], + brush_color=shared.opts.img2img_inpaint_mask_brush_color + if hasattr( + shared.opts, "img2img_inpaint_mask_brush_color" + ) + else None, + ) + self.image.preprocess = functools.partial( + svg_preprocess, preprocess=self.image.preprocess + ) + self.openpose_editor.render_upload() + + with gr.Group( + visible=False, elem_classes=["cnet-generated-image-group"] + ) as self.generated_image_group: + self.generated_image = gr.Image( + value=None, + label="Preprocessor Preview", + elem_id=f"{elem_id_tabname}_{tabname}_generated_image", + elem_classes=["cnet-image"], + interactive=True, + height=242, + ) # Gradio's magic number. Only 242 works. + + with gr.Group( + elem_classes=["cnet-generated-image-control-group"] + ): + if self.photopea: + self.photopea.render_child_trigger() + self.openpose_editor.render_edit() + preview_check_elem_id = f"{elem_id_tabname}_{tabname}_controlnet_preprocessor_preview_checkbox" + preview_close_button_js = f"document.querySelector('#{preview_check_elem_id} input[type=\\'checkbox\\']').click();" + gr.HTML( + value=f"""Close""", + visible=True, + elem_classes=["cnet-close-preview"], + ) + + with gr.Group( + visible=False, elem_classes=["cnet-mask-image-group"] + ) as self.mask_image_group: + self.mask_image = gr.Image( + value=None, + label="Mask", + elem_id=f"{elem_id_tabname}_{tabname}_mask_image", + elem_classes=["cnet-mask-image"], + interactive=True, + brush_radius=20, + type="numpy", + tool="sketch", + brush_color=shared.opts.img2img_inpaint_mask_brush_color + if hasattr( + shared.opts, "img2img_inpaint_mask_brush_color" + ) + else None, + ) + + with gr.Tab(label="Batch Folder") as self.batch_tab: + with gr.Row(): + self.batch_image_dir = gr.Textbox( + label="Input Directory", + placeholder="Input directory path to the control images.", + elem_id=f"{elem_id_tabname}_{tabname}_batch_image_dir", + ) + self.batch_mask_dir = gr.Textbox( + label="Mask Directory", + placeholder="Mask directory path to the control images.", + elem_id=f"{elem_id_tabname}_{tabname}_batch_mask_dir", + visible=False, + ) + + with gr.Tab(label="Batch Upload") as self.merge_tab: + with gr.Row(): + with gr.Column(): + self.batch_input_gallery = gr.Gallery( + columns=[4], rows=[2], object_fit="contain", height="auto", label="Images" + ) + with gr.Row(): + self.merge_upload_button = gr.UploadButton( + "Upload Images", + file_types=["image"], + file_count="multiple", + ) + self.merge_clear_button = gr.Button("Clear Images") + with gr.Group(visible=False, elem_classes=["cnet-mask-gallery-group"]) as self.batch_mask_gallery_group: + with gr.Column(): + self.batch_mask_gallery = gr.Gallery( + columns=[4], rows=[2], object_fit="contain", height="auto", label="Masks" + ) + with gr.Row(): + self.mask_merge_upload_button = gr.UploadButton( + "Upload Masks", + file_types=["image"], + file_count="multiple", + ) + self.mask_merge_clear_button = gr.Button("Clear Masks") + + if self.photopea: + self.photopea.attach_photopea_output(self.generated_image) + + with gr.Accordion( + label="Open New Canvas", visible=False + ) as self.create_canvas: + self.canvas_width = gr.Slider( + label="New Canvas Width", + minimum=256, + maximum=1024, + value=512, + step=64, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_canvas_width", + ) + self.canvas_height = gr.Slider( + label="New Canvas Height", + minimum=256, + maximum=1024, + value=512, + step=64, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_canvas_height", + ) + with gr.Row(): + self.canvas_create_button = gr.Button( + value="Create New Canvas", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_canvas_create_button", + ) + self.canvas_cancel_button = gr.Button( + value="Cancel", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_canvas_cancel_button", + ) + + with gr.Row(elem_classes="controlnet_image_controls"): + gr.HTML( + value="

Set the preprocessor to [invert] If your image has white background and black lines.

", + elem_classes="controlnet_invert_warning", + ) + self.open_new_canvas_button = ToolButton( + value=ControlNetUiGroup.open_symbol, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_open_new_canvas_button", + tooltip=ControlNetUiGroup.tooltips[ControlNetUiGroup.open_symbol], + ) + self.webcam_enable = ToolButton( + value=ControlNetUiGroup.camera_symbol, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_webcam_enable", + tooltip=ControlNetUiGroup.tooltips[ControlNetUiGroup.camera_symbol], + ) + self.webcam_mirror = ToolButton( + value=ControlNetUiGroup.reverse_symbol, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_webcam_mirror", + tooltip=ControlNetUiGroup.tooltips[ + ControlNetUiGroup.reverse_symbol + ], + ) + self.send_dimen_button = ToolButton( + value=ControlNetUiGroup.tossup_symbol, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_send_dimen_button", + tooltip=ControlNetUiGroup.tooltips[ControlNetUiGroup.tossup_symbol], + ) + + with FormRow(elem_classes=["controlnet_main_options"]): + self.enabled = gr.Checkbox( + label="Enable", + value=self.default_unit.enabled, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_enable_checkbox", + elem_classes=["cnet-unit-enabled"], + ) + self.pixel_perfect = gr.Checkbox( + label="Pixel Perfect", + value=self.default_unit.pixel_perfect, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_pixel_perfect_checkbox", + ) + self.preprocessor_preview = gr.Checkbox( + label="Allow Preview", + value=False, + elem_classes=["cnet-allow-preview"], + elem_id=preview_check_elem_id, + visible=not self.is_img2img, + ) + self.mask_upload = gr.Checkbox( + label="Use Mask", + value=False, + elem_classes=["cnet-mask-upload"], + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_mask_upload_checkbox", + visible=not self.is_img2img, + ) + self.use_preview_as_input = gr.Checkbox( + label="Preview as Input", + value=False, + elem_classes=["cnet-preview-as-input"], + visible=False, + ) + + with gr.Row(elem_classes="controlnet_img2img_options"): + if self.is_img2img: + self.upload_independent_img_in_img2img = gr.Checkbox( + label="Upload independent control image", + value=False, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_same_img2img_checkbox", + elem_classes=["cnet-unit-same_img2img"], + ) + else: + self.upload_independent_img_in_img2img = None + + with gr.Row(elem_classes=["controlnet_control_type", "controlnet_row"]): + self.type_filter = gr.Radio( + global_state.get_all_preprocessor_tags(), + label=f"Control Type", + value="All", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_type_filter_radio", + elem_classes="controlnet_control_type_filter_group", + ) + + with gr.Row(elem_classes=["controlnet_preprocessor_model", "controlnet_row"]): + self.module = gr.Dropdown( + global_state.get_all_preprocessor_names(), + label=f"Preprocessor", + value=self.default_unit.module, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_preprocessor_dropdown", + ) + self.trigger_preprocessor = ToolButton( + value=ControlNetUiGroup.trigger_symbol, + visible=not self.is_img2img, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_trigger_preprocessor", + elem_classes=["cnet-run-preprocessor"], + tooltip=ControlNetUiGroup.tooltips[ControlNetUiGroup.trigger_symbol], + ) + self.model = gr.Dropdown( + global_state.get_all_controlnet_names(), + label=f"Model", + value=self.default_unit.model, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_model_dropdown", + ) + self.refresh_models = ToolButton( + value=ControlNetUiGroup.refresh_symbol, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_refresh_models", + tooltip=ControlNetUiGroup.tooltips[ControlNetUiGroup.refresh_symbol], + ) + + with gr.Row(elem_classes=["controlnet_weight_steps", "controlnet_row"]): + self.weight = gr.Slider( + label=f"Control Weight", + value=self.default_unit.weight, + minimum=0.0, + maximum=2.0, + step=0.05, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_control_weight_slider", + elem_classes="controlnet_control_weight_slider", + ) + self.guidance_start = gr.Slider( + label="Starting Control Step", + value=self.default_unit.guidance_start, + minimum=0.0, + maximum=1.0, + interactive=True, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_start_control_step_slider", + elem_classes="controlnet_start_control_step_slider", + ) + self.guidance_end = gr.Slider( + label="Ending Control Step", + value=self.default_unit.guidance_end, + minimum=0.0, + maximum=1.0, + interactive=True, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_ending_control_step_slider", + elem_classes="controlnet_ending_control_step_slider", + ) + + # advanced options + with gr.Column(visible=False) as self.advanced: + self.processor_res = gr.Slider( + label="Preprocessor resolution", + value=self.default_unit.processor_res, + minimum=64, + maximum=2048, + visible=False, + interactive=True, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_preprocessor_resolution_slider", + ) + self.threshold_a = gr.Slider( + label="Threshold A", + value=self.default_unit.threshold_a, + minimum=64, + maximum=1024, + visible=False, + interactive=True, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_threshold_A_slider", + ) + self.threshold_b = gr.Slider( + label="Threshold B", + value=self.default_unit.threshold_b, + minimum=64, + maximum=1024, + visible=False, + interactive=True, + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_threshold_B_slider", + ) + + self.control_mode = gr.Radio( + choices=[e.value for e in external_code.ControlMode], + value=self.default_unit.control_mode.value, + label="Control Mode", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_control_mode_radio", + elem_classes="controlnet_control_mode_radio", + ) + + self.resize_mode = gr.Radio( + choices=[e.value for e in external_code.ResizeMode], + value=self.default_unit.resize_mode.value, + label="Resize Mode", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_resize_mode_radio", + elem_classes="controlnet_resize_mode_radio", + visible=not self.is_img2img, + ) + + self.hr_option = gr.Radio( + choices=[e.value for e in HiResFixOption], + value=self.default_unit.hr_option.value, + label="Hires-Fix Option", + elem_id=f"{elem_id_tabname}_{tabname}_controlnet_hr_option_radio", + elem_classes="controlnet_hr_option_radio", + visible=False, + ) + + # self.loopback = gr.Checkbox( + # label="[Batch Loopback] Automatically send generated images to this ControlNet unit in batch generation", + # value=self.default_unit.loopback, + # elem_id=f"{elem_id_tabname}_{tabname}_controlnet_automatically_send_generated_images_checkbox", + # elem_classes="controlnet_loopback_checkbox", + # visible=False, + # ) + + self.preset_panel = ControlNetPresetUI( + id_prefix=f"{elem_id_tabname}_{tabname}_" + ) + + self.batch_image_dir_state = gr.State("") + self.output_dir_state = gr.State("") + unit_args = ( + self.input_mode, + self.use_preview_as_input, + self.batch_image_dir, + self.batch_mask_dir, + self.batch_input_gallery, + self.batch_mask_gallery, + self.generated_image, + self.mask_image, + self.hr_option, + self.enabled, + self.module, + self.model, + self.weight, + self.image, + self.resize_mode, + self.processor_res, + self.threshold_a, + self.threshold_b, + self.guidance_start, + self.guidance_end, + self.pixel_perfect, + self.control_mode, + ) + + unit = gr.State(self.default_unit) + for comp in unit_args + (self.dummy_gradio_update_trigger,): + event_subscribers = [] + if hasattr(comp, "edit"): + event_subscribers.append(comp.edit) + elif hasattr(comp, "click"): + event_subscribers.append(comp.click) + elif isinstance(comp, gr.Slider) and hasattr(comp, "release"): + event_subscribers.append(comp.release) + elif hasattr(comp, "change"): + event_subscribers.append(comp.change) + + if hasattr(comp, "clear"): + event_subscribers.append(comp.clear) + + for event_subscriber in event_subscribers: + event_subscriber( + fn=UiControlNetUnit, inputs=list(unit_args), outputs=unit + ) + + ( + ControlNetUiGroup.a1111_context.img2img_submit_button + if self.is_img2img + else ControlNetUiGroup.a1111_context.txt2img_submit_button + ).click( + fn=UiControlNetUnit, + inputs=list(unit_args), + outputs=unit, + queue=False, + ) + self.register_core_callbacks() + self.ui_initialized = True + return unit + + def register_send_dimensions(self): + """Register event handler for send dimension button.""" + + def send_dimensions(image): + def closesteight(num): + rem = num % 8 + if rem <= 4: + return round(num - rem) + else: + return round(num + (8 - rem)) + + if image: + interm = np.asarray(image.get("image")) + return closesteight(interm.shape[1]), closesteight(interm.shape[0]) + else: + return gr.Slider.update(), gr.Slider.update() + + self.send_dimen_button.click( + fn=send_dimensions, + inputs=[self.image], + outputs=[self.width_slider, self.height_slider], + show_progress=False, + ) + + def register_webcam_toggle(self): + def webcam_toggle(): + self.webcam_enabled = not self.webcam_enabled + return { + "value": None, + "source": "webcam" if self.webcam_enabled else "upload", + "__type__": "update", + } + + self.webcam_enable.click( + webcam_toggle, inputs=None, outputs=self.image, show_progress=False + ) + + def register_webcam_mirror_toggle(self): + def webcam_mirror_toggle(): + self.webcam_mirrored = not self.webcam_mirrored + return {"mirror_webcam": self.webcam_mirrored, "__type__": "update"} + + self.webcam_mirror.click( + webcam_mirror_toggle, inputs=None, outputs=self.image, show_progress=False + ) + + def register_refresh_all_models(self): + def refresh_all_models(): + global_state.update_controlnet_filenames() + return gr.Dropdown.update( + choices=global_state.get_all_controlnet_names(), + ) + + self.refresh_models.click( + refresh_all_models, + outputs=[self.model], + show_progress=False, + ) + + def register_build_sliders(self): + def build_sliders(module: str, pp: bool): + + logger.debug( + f"Prevent update slider value: {self.prevent_next_n_slider_value_update}" + ) + logger.debug(f"Build slider for module: {module} - {pp}") + + preprocessor = global_state.get_preprocessor(module) + + slider_resolution_kwargs = preprocessor.slider_resolution.gradio_update_kwargs.copy() + + if pp: + slider_resolution_kwargs['visible'] = False + + grs = [ + gr.update(**slider_resolution_kwargs), + gr.update(**preprocessor.slider_1.gradio_update_kwargs.copy()), + gr.update(**preprocessor.slider_2.gradio_update_kwargs.copy()), + gr.update(visible=True), + gr.update(visible=not preprocessor.do_not_need_model), + gr.update(visible=not preprocessor.do_not_need_model), + gr.update(visible=preprocessor.show_control_mode), + ] + + return grs + + inputs = [ + self.module, + self.pixel_perfect, + ] + outputs = [ + self.processor_res, + self.threshold_a, + self.threshold_b, + self.advanced, + self.model, + self.refresh_models, + self.control_mode, + ] + self.module.change( + build_sliders, inputs=inputs, outputs=outputs, show_progress=False + ) + self.pixel_perfect.change( + build_sliders, inputs=inputs, outputs=outputs, show_progress=False + ) + + def filter_selected(k: str): + logger.debug(f"Prevent update {self.prevent_next_n_module_update}") + logger.debug(f"Switch to control type {k}") + + filtered_preprocessor_list = global_state.get_filtered_preprocessor_names(k) + filtered_controlnet_names = global_state.get_filtered_controlnet_names(k) + default_preprocessor = filtered_preprocessor_list[0] + default_controlnet_name = filtered_controlnet_names[0] + + if k != 'All': + if len(filtered_preprocessor_list) > 1: + default_preprocessor = filtered_preprocessor_list[1] + if len(filtered_controlnet_names) > 1: + default_controlnet_name = filtered_controlnet_names[1] + + if self.prevent_next_n_module_update > 0: + self.prevent_next_n_module_update -= 1 + return [ + gr.Dropdown.update(choices=filtered_preprocessor_list), + gr.Dropdown.update(choices=filtered_controlnet_names), + ] + else: + return [ + gr.Dropdown.update( + value=default_preprocessor, choices=filtered_preprocessor_list + ), + gr.Dropdown.update( + value=default_controlnet_name, choices=filtered_controlnet_names + ), + ] + + self.type_filter.change( + fn=filter_selected, + inputs=[self.type_filter], + outputs=[self.module, self.model], + show_progress=False, + ) + + def register_run_annotator(self): + def run_annotator(image, module, pres, pthr_a, pthr_b, t2i_w, t2i_h, pp, rm): + if image is None: + return ( + gr.update(value=None, visible=True), + gr.update(), + *self.openpose_editor.update(""), + ) + + img = HWC3(image["image"]) + mask = HWC3(image["mask"]) + + if not (mask > 5).any(): + mask = None + + preprocessor = global_state.get_preprocessor(module) + + if pp: + pres = external_code.pixel_perfect_resolution( + img, + target_H=t2i_h, + target_W=t2i_w, + resize_mode=external_code.resize_mode_from_value(rm), + ) + + class JsonAcceptor: + def __init__(self) -> None: + self.value = "" + + def accept(self, json_dict: dict) -> None: + self.value = json.dumps(json_dict) + + json_acceptor = JsonAcceptor() + + logger.info(f"Preview Resolution = {pres}") + + def is_openpose(module: str): + return "openpose" in module + + # Only openpose preprocessor returns a JSON output, pass json_acceptor + # only when a JSON output is expected. This will make preprocessor cache + # work for all other preprocessors other than openpose ones. JSON acceptor + # instance are different every call, which means cache will never take + # effect. + # TODO: Maybe we should let `preprocessor` return a Dict to alleviate this issue? + # This requires changing all callsites though. + result = preprocessor( + input_image=img, + resolution=pres, + slider_1=pthr_a, + slider_2=pthr_b, + input_mask=mask, + json_pose_callback=json_acceptor.accept + if is_openpose(module) + else None, + ) + + is_image = judge_image_type(result) + + if not is_image: + result = img + + result = external_code.visualize_inpaint_mask(result) + return ( + # Update to `generated_image` + gr.update(value=result, visible=True, interactive=False), + # preprocessor_preview + gr.update(value=True), + # openpose editor + *self.openpose_editor.update(json_acceptor.value), + ) + + self.trigger_preprocessor.click( + fn=run_annotator, + inputs=[ + self.image, + self.module, + self.processor_res, + self.threshold_a, + self.threshold_b, + self.width_slider, + self.height_slider, + self.pixel_perfect, + self.resize_mode, + ], + outputs=[ + self.generated_image, + self.preprocessor_preview, + *self.openpose_editor.outputs(), + ], + ) + + def register_shift_preview(self): + def shift_preview(is_on): + return ( + # generated_image + gr.update() if is_on else gr.update(value=None), + # generated_image_group + gr.update(visible=is_on), + # use_preview_as_input, + gr.update(visible=False), # Now this is automatically managed + # download_pose_link + gr.update() if is_on else gr.update(value=None), + # modal edit button + gr.update() if is_on else gr.update(visible=False), + ) + + self.preprocessor_preview.change( + fn=shift_preview, + inputs=[self.preprocessor_preview], + outputs=[ + self.generated_image, + self.generated_image_group, + self.use_preview_as_input, + self.openpose_editor.download_link, + self.openpose_editor.modal, + ], + show_progress=False, + ) + + def register_create_canvas(self): + self.open_new_canvas_button.click( + lambda: gr.Accordion.update(visible=True), + inputs=None, + outputs=self.create_canvas, + show_progress=False, + ) + self.canvas_cancel_button.click( + lambda: gr.Accordion.update(visible=False), + inputs=None, + outputs=self.create_canvas, + show_progress=False, + ) + + def fn_canvas(h, w): + return np.zeros(shape=(h, w, 3), dtype=np.uint8), gr.Accordion.update( + visible=False + ) + + self.canvas_create_button.click( + fn=fn_canvas, + inputs=[self.canvas_height, self.canvas_width], + outputs=[self.image, self.create_canvas], + show_progress=False, + ) + + def register_img2img_same_input(self): + def fn_same_checked(x): + return [ + gr.update(value=None), + gr.update(value=None), + gr.update(value=False, visible=x), + ] + [gr.update(visible=x)] * 3 + + self.upload_independent_img_in_img2img.change( + fn_same_checked, + inputs=self.upload_independent_img_in_img2img, + outputs=[ + self.image, + self.batch_image_dir, + self.preprocessor_preview, + self.image_upload_panel, + self.trigger_preprocessor, + self.resize_mode, + ], + show_progress=False, + ) + + def register_shift_crop_input_image(self): + return + + def register_shift_hr_options(self): + ControlNetUiGroup.a1111_context.txt2img_enable_hr.change( + fn=lambda checked: gr.update(visible=checked), + inputs=[ControlNetUiGroup.a1111_context.txt2img_enable_hr], + outputs=[self.hr_option], + show_progress=False, + ) + + def register_shift_upload_mask(self): + """Controls whether the upload mask input should be visible.""" + def on_checkbox_click(checked: bool, canvas_height: int, canvas_width: int): + if not checked: + # Clear mask_image if unchecked. + return gr.update(visible=False), gr.update(value=None), gr.update(value=None, visible=False), \ + gr.update(visible=False), gr.update(value=None) + else: + # Init an empty canvas the same size as the generation target. + empty_canvas = np.zeros(shape=(canvas_height, canvas_width, 3), dtype=np.uint8) + return gr.update(visible=True), gr.update(value=empty_canvas), gr.update(visible=True), \ + gr.update(visible=True), gr.update() + + self.mask_upload.change( + fn=on_checkbox_click, + inputs=[self.mask_upload, self.height_slider, self.width_slider], + outputs=[self.mask_image_group, self.mask_image, self.batch_mask_dir, + self.batch_mask_gallery_group, self.batch_mask_gallery], + show_progress=False, + ) + + if self.upload_independent_img_in_img2img is not None: + self.upload_independent_img_in_img2img.change( + fn=lambda checked: ( + # Uncheck `upload_mask` when not using independent input. + gr.update(visible=False, value=False) + if not checked + else gr.update(visible=True) + ), + inputs=[self.upload_independent_img_in_img2img], + outputs=[self.mask_upload], + show_progress=False, + ) + + def register_sync_batch_dir(self): + def determine_batch_dir(batch_dir, fallback_dir, fallback_fallback_dir): + if batch_dir: + return batch_dir + elif fallback_dir: + return fallback_dir + else: + return fallback_fallback_dir + + batch_dirs = [ + self.batch_image_dir, + ControlNetUiGroup.global_batch_input_dir, + ControlNetUiGroup.a1111_context.img2img_batch_input_dir, + ] + for batch_dir_comp in batch_dirs: + subscriber = getattr(batch_dir_comp, "blur", None) + if subscriber is None: + continue + subscriber( + fn=determine_batch_dir, + inputs=batch_dirs, + outputs=[self.batch_image_dir_state], + queue=False, + ) + + ControlNetUiGroup.a1111_context.img2img_batch_output_dir.blur( + fn=lambda a: a, + inputs=[ControlNetUiGroup.a1111_context.img2img_batch_output_dir], + outputs=[self.output_dir_state], + queue=False, + ) + + def register_clear_preview(self): + def clear_preview(x): + if x: + logger.info("Preview as input is cancelled.") + return gr.update(value=False), gr.update(value=None) + + for comp in ( + self.pixel_perfect, + self.module, + self.image, + self.processor_res, + self.threshold_a, + self.threshold_b, + self.upload_independent_img_in_img2img, + ): + event_subscribers = [] + if hasattr(comp, "edit"): + event_subscribers.append(comp.edit) + elif hasattr(comp, "click"): + event_subscribers.append(comp.click) + elif isinstance(comp, gr.Slider) and hasattr(comp, "release"): + event_subscribers.append(comp.release) + elif hasattr(comp, "change"): + event_subscribers.append(comp.change) + if hasattr(comp, "clear"): + event_subscribers.append(comp.clear) + for event_subscriber in event_subscribers: + event_subscriber( + fn=clear_preview, + inputs=self.use_preview_as_input, + outputs=[self.use_preview_as_input, self.generated_image], + show_progress=False + ) + + def register_multi_images_upload(self): + """Register callbacks on merge tab multiple images upload.""" + self.merge_clear_button.click( + fn=lambda: [], + inputs=[], + outputs=[self.batch_input_gallery], + ).then( + fn=lambda x: gr.update(value=x + 1), + inputs=[self.dummy_gradio_update_trigger], + outputs=[self.dummy_gradio_update_trigger], + ) + self.mask_merge_clear_button.click( + fn=lambda: [], + inputs=[], + outputs=[self.batch_mask_gallery], + ).then( + fn=lambda x: gr.update(value=x + 1), + inputs=[self.dummy_gradio_update_trigger], + outputs=[self.dummy_gradio_update_trigger], + ) + + def upload_file(files, current_files): + return {file_d["name"] for file_d in current_files} | { + file.name for file in files + } + + self.merge_upload_button.upload( + upload_file, + inputs=[self.merge_upload_button, self.batch_input_gallery], + outputs=[self.batch_input_gallery], + queue=False, + ).then( + fn=lambda x: gr.update(value=x + 1), + inputs=[self.dummy_gradio_update_trigger], + outputs=[self.dummy_gradio_update_trigger], + ) + self.mask_merge_upload_button.upload( + upload_file, + inputs=[self.mask_merge_upload_button, self.batch_mask_gallery], + outputs=[self.batch_mask_gallery], + queue=False, + ).then( + fn=lambda x: gr.update(value=x + 1), + inputs=[self.dummy_gradio_update_trigger], + outputs=[self.dummy_gradio_update_trigger], + ) + return + + def register_core_callbacks(self): + """Register core callbacks that only involves gradio components defined + within this ui group.""" + self.register_webcam_toggle() + self.register_webcam_mirror_toggle() + self.register_refresh_all_models() + self.register_build_sliders() + self.register_shift_preview() + self.register_create_canvas() + self.register_clear_preview() + self.register_multi_images_upload() + self.openpose_editor.register_callbacks( + self.generated_image, + self.use_preview_as_input, + self.model, + ) + assert self.type_filter is not None + self.preset_panel.register_callbacks( + self, + self.type_filter, + *[ + getattr(self, key) + for key in external_code.ControlNetUnit.infotext_fields() + ], + ) + if self.is_img2img: + self.register_img2img_same_input() + + def register_sd_model_changed(self): + def sd_version_changed(type_filter: str, current_model: str, setting_value: str, setting_name: str): + """When SD version changes, update model dropdown choices.""" + if setting_name != "sd_model_checkpoint": + return gr.update() + + filtered_model_list = global_state.get_filtered_controlnet_names(type_filter) + assert len(filtered_model_list) > 0 + default_model = filtered_model_list[1] if len(filtered_model_list) > 1 else filtered_model_list[0] + return gr.Dropdown.update( + choices=filtered_model_list, + value=current_model if current_model in filtered_model_list else default_model + ) + + script_callbacks.on_setting_updated_subscriber(dict( + fn=sd_version_changed, + inputs=[self.type_filter, self.model], + outputs=[self.model], + )) + + def register_callbacks(self): + """Register callbacks that involves A1111 context gradio components.""" + # Prevent infinite recursion. + if self.callbacks_registered: + return + + self.callbacks_registered = True + self.register_send_dimensions() + self.register_run_annotator() + self.register_sync_batch_dir() + self.register_shift_upload_mask() + self.register_sd_model_changed() + if self.is_img2img: + self.register_shift_crop_input_image() + else: + self.register_shift_hr_options() + + @staticmethod + def register_input_mode_sync(ui_groups: List["ControlNetUiGroup"]): + """ + - ui_group.input_mode should be updated when user switch tabs. + - Loopback checkbox should only be visible if at least one ControlNet unit + is set to batch mode. + + Argument: + ui_groups: All ControlNetUiGroup instances defined in current Script context. + + Returns: + None + """ + if not ui_groups: + return + + for ui_group in ui_groups: + batch_fn = lambda: InputMode.BATCH + simple_fn = lambda: InputMode.SIMPLE + merge_fn = lambda: InputMode.MERGE + for input_tab, fn in ( + (ui_group.upload_tab, simple_fn), + (ui_group.batch_tab, batch_fn), + (ui_group.merge_tab, merge_fn), + ): + # Sync input_mode. + input_tab.select( + fn=fn, + inputs=[], + outputs=[ui_group.input_mode], + show_progress=False, + ) + + @staticmethod + def reset(): + ControlNetUiGroup.a1111_context = A1111Context() + ControlNetUiGroup.all_ui_groups = [] + + @staticmethod + def try_register_all_callbacks(): + unit_count = shared.opts.data.get("control_net_unit_count", 3) + all_unit_count = unit_count * 2 # txt2img + img2img. + if ( + # All A1111 components ControlNet units care about are all registered. + ControlNetUiGroup.a1111_context.ui_initialized + and all_unit_count == len(ControlNetUiGroup.all_ui_groups) + and all( + g.ui_initialized and (not g.callbacks_registered) + for g in ControlNetUiGroup.all_ui_groups + ) + ): + for ui_group in ControlNetUiGroup.all_ui_groups: + ui_group.register_callbacks() + + ControlNetUiGroup.register_input_mode_sync( + [g for g in ControlNetUiGroup.all_ui_groups if g.is_img2img] + ) + ControlNetUiGroup.register_input_mode_sync( + [g for g in ControlNetUiGroup.all_ui_groups if not g.is_img2img] + ) + logger.info("ControlNet UI callback registered.") + + @staticmethod + def on_after_component(component, **_kwargs): + """Register the A1111 component.""" + if getattr(component, "elem_id", None) == "img2img_batch_inpaint_mask_dir": + ControlNetUiGroup.global_batch_input_dir.render() + return + + ControlNetUiGroup.a1111_context.set_component(component) + ControlNetUiGroup.try_register_all_callbacks() diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/modal.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/modal.py new file mode 100644 index 0000000000000000000000000000000000000000..17ea4d6751708a2ff038f749dcc24ff78e670f21 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/modal.py @@ -0,0 +1,38 @@ +import gradio as gr +from typing import List + + +class ModalInterface(gr.Interface): + modal_id_counter = 0 + + def __init__( + self, + html_content: str, + open_button_text: str, + open_button_classes: List[str] = [], + open_button_extra_attrs: str = '' + ): + self.html_content = html_content + self.open_button_text = open_button_text + self.open_button_classes = open_button_classes + self.open_button_extra_attrs = open_button_extra_attrs + self.modal_id = ModalInterface.modal_id_counter + ModalInterface.modal_id_counter += 1 + + def __call__(self): + return self.create_modal() + + def create_modal(self, visible=True): + html_code = f""" +
+ × +
+ {self.html_content} +
+
+
{self.open_button_text}
+ """ + return gr.HTML(value=html_code, visible=visible) diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/openpose_editor.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/openpose_editor.py new file mode 100644 index 0000000000000000000000000000000000000000..4146018a1ca4cb0b449d7e5703c92dc2c74b43d8 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/openpose_editor.py @@ -0,0 +1,154 @@ +import base64 +import gradio as gr +import json +from typing import List, Dict, Any, Tuple + +from annotator.openpose import decode_json_as_poses, draw_poses +from annotator.openpose.animalpose import draw_animalposes +from lib_controlnet.controlnet_ui.modal import ModalInterface +from modules import shared +from lib_controlnet.logging import logger + + +def parse_data_url(data_url: str): + # Split the URL at the comma + media_type, data = data_url.split(",", 1) + + # Check if the data is base64-encoded + assert ";base64" in media_type + + # Decode the base64 data + return base64.b64decode(data) + + +def encode_data_url(json_string: str) -> str: + base64_encoded_json = base64.b64encode(json_string.encode("utf-8")).decode("utf-8") + return f"data:application/json;base64,{base64_encoded_json}" + + +class OpenposeEditor(object): + # Filename used when user click the download link. + download_file = "pose.json" + # URL the openpose editor is mounted on. + editor_url = "/openpose_editor_index" + + def __init__(self) -> None: + self.render_button = None + self.pose_input = None + self.download_link = None + self.upload_link = None + self.modal = None + + def render_edit(self): + """Renders the buttons in preview image control button group.""" + # The hidden button to trigger a re-render of generated image. + self.render_button = gr.Button(visible=False, elem_classes=["cnet-render-pose"]) + # The hidden element that stores the pose json for backend retrieval. + # The front-end javascript will write the edited JSON data to the element. + self.pose_input = gr.Textbox(visible=False, elem_classes=["cnet-pose-json"]) + + self.modal = ModalInterface( + # Use about:blank here as placeholder so that the iframe does not + # immediately navigate. Most of controlnet units do not need + # openpose editor active. Only navigate when the user first click + # 'Edit'. The navigation logic is in `openpose_editor.js`. + f'', + open_button_text="Edit", + open_button_classes=["cnet-edit-pose"], + open_button_extra_attrs=f'title="Send pose to {OpenposeEditor.editor_url} for edit."', + ).create_modal(visible=False) + self.download_link = gr.HTML( + value=f"""JSON""", + visible=False, + elem_classes=["cnet-download-pose"], + ) + + def render_upload(self): + """Renders the button in input image control button group.""" + self.upload_link = gr.HTML( + value=""" + + + """, + visible=False, + elem_classes=["cnet-upload-pose"], + ) + + def register_callbacks( + self, + generated_image: gr.Image, + use_preview_as_input: gr.Checkbox, + model: gr.Dropdown, + ): + def render_pose(pose_url: str) -> Tuple[Dict, Dict]: + json_string = parse_data_url(pose_url).decode("utf-8") + poses, animals, height, width = decode_json_as_poses( + json.loads(json_string) + ) + logger.info("Preview as input is enabled.") + return ( + # Generated image. + gr.update( + value=( + draw_poses( + poses, + height, + width, + draw_body=True, + draw_hand=True, + draw_face=True, + ) + if poses + else draw_animalposes(animals, height, width) + ), + visible=True, + ), + # Use preview as input. + gr.update(value=True), + # Self content. + *self.update(json_string), + ) + + self.render_button.click( + fn=render_pose, + inputs=[self.pose_input], + outputs=[generated_image, use_preview_as_input, *self.outputs()], + ) + + def update_upload_link(model: str) -> Dict: + return gr.update(visible="openpose" in model.lower()) + + model.change(fn=update_upload_link, inputs=[model], outputs=[self.upload_link]) + + def outputs(self) -> List[Any]: + return [ + self.download_link, + self.modal, + ] + + def update(self, json_string: str) -> List[Dict]: + """ + Called when there is a new JSON pose value generated by running + preprocessor. + + Args: + json_string: The new JSON string generated by preprocessor. + + Returns: + An gr.update event. + """ + hint = "Download the pose as .json file" + html = f""" + JSON""" + + visible = json_string != "" + return [ + # Download link update. + gr.update(value=html, visible=visible), + # Modal update. + gr.update( + visible=visible + and not shared.opts.data.get("controlnet_disable_openpose_edit", False) + ), + ] diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/photopea.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/photopea.py new file mode 100644 index 0000000000000000000000000000000000000000..5bea02e891e22e020dfb28113a68416754765e38 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/photopea.py @@ -0,0 +1,182 @@ +import gradio as gr + +from lib_controlnet.controlnet_ui.modal import ModalInterface + +PHOTOPEA_LOGO = """ + + + + + + + +""" + + +class Photopea(object): + def __init__(self) -> None: + self.modal = None + self.triggers = [] + self.render_editor() + + def render_editor(self): + """Render the editor modal.""" + with gr.Group(elem_classes=["cnet-photopea-edit"]): + self.modal = ModalInterface( + # Use about:blank here as placeholder so that the iframe does not + # immediately navigate. Only navigate when the user first click + # 'Edit'. The navigation logic is in `photopea.js`. + f""" +
+ + +
+ + """, + open_button_text="Edit", + open_button_classes=["cnet-photopea-main-trigger"], + open_button_extra_attrs="hidden", + ).create_modal(visible=True) + + def render_child_trigger(self): + self.triggers.append( + gr.HTML( + f"""
+ Edit {PHOTOPEA_LOGO} +
""" + ) + ) + + def attach_photopea_output(self, generated_image: gr.Image): + """Called in ControlNetUiGroup to attach preprocessor preview image Gradio element + as the photopea output. If the front-end directly change the img HTML element's src + to reflect the edited image result from photopea, the backend won't be notified. + + In this method we let the front-end upload the result image an invisible gr.Image + instance and mirrors the value to preprocessor preview gr.Image. This is because + the generated image gr.Image instance is inferred to be an output image by Gradio + and has no ability to accept image upload directly. + + Arguments: + generated_image: preprocessor result Gradio Image output element. + + Returns: + None + """ + output = gr.Image( + visible=False, + source="upload", + type="numpy", + elem_classes=[f"cnet-photopea-output"], + ) + + output.upload( + fn=lambda img: img, + inputs=[output], + outputs=[generated_image], + ) diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/preset.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/preset.py new file mode 100644 index 0000000000000000000000000000000000000000..15a9f24ca501a4a0b256db44d20f6b09d5c399cb --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/preset.py @@ -0,0 +1,313 @@ +import os +import gradio as gr + +from typing import Dict, List + +from modules import scripts +from lib_controlnet.infotext import parse_unit, serialize_unit +from lib_controlnet.controlnet_ui.tool_button import ToolButton +from lib_controlnet.logging import logger +from lib_controlnet.external_code import ControlNetUnit, UiControlNetUnit +from lib_controlnet.global_state import get_preprocessor +from modules_forge.supported_preprocessor import Preprocessor + +save_symbol = "\U0001f4be" # 💾 +delete_symbol = "\U0001f5d1\ufe0f" # 🗑️ +refresh_symbol = "\U0001f504" # 🔄 +reset_symbol = "\U000021A9" # ↩ + +NEW_PRESET = "New Preset" + + +def load_presets(preset_dir: str) -> Dict[str, str]: + if not os.path.exists(preset_dir): + os.makedirs(preset_dir) + return {} + + presets = {} + for filename in os.listdir(preset_dir): + if filename.endswith(".txt"): + with open(os.path.join(preset_dir, filename), "r") as f: + name = filename.replace(".txt", "") + if name == NEW_PRESET: + continue + presets[name] = f.read() + return presets + + +def infer_control_type(module: str) -> str: + preprocessor: Preprocessor = get_preprocessor(module) + assert preprocessor is not None + return preprocessor.tags[0] if preprocessor.tags else "All" + + +class ControlNetPresetUI(object): + preset_directory = os.path.join(scripts.basedir(), "presets") + presets = load_presets(preset_directory) + + def __init__(self, id_prefix: str): + with gr.Row(): + self.dropdown = gr.Dropdown( + label="Presets", + show_label=True, + elem_classes=["cnet-preset-dropdown"], + choices=ControlNetPresetUI.dropdown_choices(), + value=NEW_PRESET, + ) + self.reset_button = ToolButton( + value=reset_symbol, + elem_classes=["cnet-preset-reset"], + tooltip="Reset preset", + visible=False, + ) + self.save_button = ToolButton( + value=save_symbol, + elem_classes=["cnet-preset-save"], + tooltip="Save preset", + ) + self.delete_button = ToolButton( + value=delete_symbol, + elem_classes=["cnet-preset-delete"], + tooltip="Delete preset", + ) + self.refresh_button = ToolButton( + value=refresh_symbol, + elem_classes=["cnet-preset-refresh"], + tooltip="Refresh preset", + ) + + with gr.Box( + elem_classes=["popup-dialog", "cnet-preset-enter-name"], + elem_id=f"{id_prefix}_cnet_preset_enter_name", + ) as self.name_dialog: + with gr.Row(): + self.preset_name = gr.Textbox( + label="Preset name", + show_label=True, + lines=1, + elem_classes=["cnet-preset-name"], + ) + self.confirm_preset_name = ToolButton( + value=save_symbol, + elem_classes=["cnet-preset-confirm-name"], + tooltip="Save preset", + ) + + def register_callbacks( + self, + uigroup, + control_type: gr.Radio, + *ui_states, + ): + def init_with_ui_states(*ui_states) -> ControlNetUnit: + return ControlNetUnit(**{ + field: value + for field, value in zip(ControlNetUnit.infotext_fields(), ui_states) + }) + + def apply_preset(name: str, control_type: str, *ui_states): + if name == NEW_PRESET: + return ( + gr.update(visible=False), + *( + (gr.skip(),) + * (len(ControlNetUnit.infotext_fields()) + 1) + ), + ) + + assert name in ControlNetPresetUI.presets + + infotext = ControlNetPresetUI.presets[name] + preset_unit = parse_unit(infotext) + current_unit = init_with_ui_states(*ui_states) + preset_unit.image = None + current_unit.image = None + + # Do not compare module param that are not used in preset. + for module_param in ("processor_res", "threshold_a", "threshold_b"): + if getattr(preset_unit, module_param) == -1: + setattr(current_unit, module_param, -1) + + # No update necessary. + if vars(current_unit) == vars(preset_unit): + return ( + gr.update(visible=False), + *( + (gr.skip(),) + * (len(ControlNetUnit.infotext_fields()) + 1) + ), + ) + + unit = preset_unit + + try: + new_control_type = infer_control_type(unit.module) + except ValueError as e: + logger.error(e) + new_control_type = control_type + + if new_control_type != control_type: + uigroup.prevent_next_n_module_update += 1 + + if preset_unit.module != current_unit.module: + uigroup.prevent_next_n_slider_value_update += 1 + + if preset_unit.pixel_perfect != current_unit.pixel_perfect: + uigroup.prevent_next_n_slider_value_update += 1 + + return ( + gr.update(visible=True), + gr.update(value=new_control_type), + *[ + gr.update(value=value) if value is not None else gr.update() + for field in ControlNetUnit.infotext_fields() + for value in (getattr(unit, field),) + ], + ) + + for element, action in ( + (self.dropdown, "change"), + (self.reset_button, "click"), + ): + getattr(element, action)( + fn=apply_preset, + inputs=[self.dropdown, control_type, *ui_states], + outputs=[self.delete_button, control_type, *ui_states], + show_progress="hidden", + ).then( + fn=lambda: gr.update(visible=False), + inputs=None, + outputs=[self.reset_button], + ) + + def save_preset(name: str, *ui_states): + if name == NEW_PRESET: + return gr.update(visible=True), gr.update(), gr.update() + + ControlNetPresetUI.save_preset( + name, init_with_ui_states(*ui_states) + ) + return ( + gr.update(), # name dialog + gr.update(choices=ControlNetPresetUI.dropdown_choices(), value=name), + gr.update(visible=False), # Reset button + ) + + self.save_button.click( + fn=save_preset, + inputs=[self.dropdown, *ui_states], + outputs=[self.name_dialog, self.dropdown, self.reset_button], + show_progress="hidden", + ).then( + fn=None, + _js=f""" + (name) => {{ + if (name === "{NEW_PRESET}") + popup(gradioApp().getElementById('{self.name_dialog.elem_id}')); + }}""", + inputs=[self.dropdown], + ) + + def delete_preset(name: str): + ControlNetPresetUI.delete_preset(name) + return gr.Dropdown.update( + choices=ControlNetPresetUI.dropdown_choices(), + value=NEW_PRESET, + ), gr.update(visible=False) + + self.delete_button.click( + fn=delete_preset, + inputs=[self.dropdown], + outputs=[self.dropdown, self.reset_button], + show_progress="hidden", + ) + + self.name_dialog.visible = False + + def save_new_preset(new_name: str, *ui_states): + if new_name == NEW_PRESET: + logger.warn(f"Cannot save preset with reserved name '{NEW_PRESET}'") + return gr.update(visible=False), gr.update() + + ControlNetPresetUI.save_preset( + new_name, init_with_ui_states(*ui_states) + ) + return gr.update(visible=False), gr.update( + choices=ControlNetPresetUI.dropdown_choices(), value=new_name + ) + + self.confirm_preset_name.click( + fn=save_new_preset, + inputs=[self.preset_name, *ui_states], + outputs=[self.name_dialog, self.dropdown], + show_progress="hidden", + ).then(fn=None, _js="closePopup") + + self.refresh_button.click( + fn=ControlNetPresetUI.refresh_preset, + inputs=None, + outputs=[self.dropdown], + show_progress="hidden", + ) + + def update_reset_button(preset_name: str, *ui_states): + if preset_name == NEW_PRESET: + return gr.update(visible=False) + + infotext = ControlNetPresetUI.presets[preset_name] + preset_unit = parse_unit(infotext) + current_unit = init_with_ui_states(*ui_states) + preset_unit.image = None + current_unit.image = None + + # Do not compare module param that are not used in preset. + for module_param in ("processor_res", "threshold_a", "threshold_b"): + if getattr(preset_unit, module_param) == -1: + setattr(current_unit, module_param, -1) + + return gr.update(visible=vars(current_unit) != vars(preset_unit)) + + for ui_state in ui_states: + if isinstance(ui_state, gr.Image): + continue + + for action in ("edit", "click", "change", "clear", "release"): + if action == "release" and not isinstance(ui_state, gr.Slider): + continue + + if hasattr(ui_state, action): + getattr(ui_state, action)( + fn=update_reset_button, + inputs=[self.dropdown, *ui_states], + outputs=[self.reset_button], + ) + + @staticmethod + def dropdown_choices() -> List[str]: + return list(ControlNetPresetUI.presets.keys()) + [NEW_PRESET] + + @staticmethod + def save_preset(name: str, unit: ControlNetUnit): + infotext = serialize_unit(unit) + with open( + os.path.join(ControlNetPresetUI.preset_directory, f"{name}.txt"), "w" + ) as f: + f.write(infotext) + + ControlNetPresetUI.presets[name] = infotext + + @staticmethod + def delete_preset(name: str): + if name not in ControlNetPresetUI.presets: + return + + del ControlNetPresetUI.presets[name] + + file = os.path.join(ControlNetPresetUI.preset_directory, f"{name}.txt") + if os.path.exists(file): + os.unlink(file) + + @staticmethod + def refresh_preset(): + ControlNetPresetUI.presets = load_presets(ControlNetPresetUI.preset_directory) + return gr.update(choices=ControlNetPresetUI.dropdown_choices()) diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/tool_button.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/tool_button.py new file mode 100644 index 0000000000000000000000000000000000000000..8a38df8f43ec27ecf08ca7c2ed80fa7d493e921f --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/controlnet_ui/tool_button.py @@ -0,0 +1,12 @@ +import gradio as gr + +class ToolButton(gr.Button, gr.components.FormComponent): + """Small button with single emoji as text, fits inside gradio forms""" + + def __init__(self, **kwargs): + super().__init__(variant="tool", + elem_classes=kwargs.pop('elem_classes', []) + ["cnet-toolbutton"], + **kwargs) + + def get_block_name(self): + return "button" diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/enums.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..ad64c3213e0a75ea276a40d5757544e02f181034 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/enums.py @@ -0,0 +1,80 @@ +from enum import Enum + + +class HiResFixOption(Enum): + BOTH = "Both" + LOW_RES_ONLY = "Low res only" + HIGH_RES_ONLY = "High res only" + + @staticmethod + def from_value(value) -> "HiResFixOption": + if isinstance(value, str) and value.startswith("HiResFixOption."): + _, field = value.split(".") + return getattr(HiResFixOption, field) + if isinstance(value, str): + return HiResFixOption(value) + elif isinstance(value, int): + return [x for x in HiResFixOption][value] + else: + assert isinstance(value, HiResFixOption) + return value + + @property + def low_res_enabled(self) -> bool: + return self in (HiResFixOption.BOTH, HiResFixOption.LOW_RES_ONLY) + + @property + def high_res_enabled(self) -> bool: + return self in (HiResFixOption.BOTH, HiResFixOption.HIGH_RES_ONLY) + + +class StableDiffusionVersion(Enum): + """The version family of stable diffusion model.""" + + UNKNOWN = 0 + SD1x = 1 + SD2x = 2 + SDXL = 3 + + @staticmethod + def detect_from_model_name(model_name: str) -> "StableDiffusionVersion": + """Based on the model name provided, guess what stable diffusion version it is. + This might not be accurate without actually inspect the file content. + """ + if any(f"sd{v}" in model_name.lower() for v in ("14", "15", "16")): + return StableDiffusionVersion.SD1x + + if "sd21" in model_name or "2.1" in model_name: + return StableDiffusionVersion.SD2x + + if "xl" in model_name.lower(): + return StableDiffusionVersion.SDXL + + return StableDiffusionVersion.UNKNOWN + + def encoder_block_num(self) -> int: + if self in (StableDiffusionVersion.SD1x, StableDiffusionVersion.SD2x, StableDiffusionVersion.UNKNOWN): + return 12 + else: + return 9 # SDXL + + def controlnet_layer_num(self) -> int: + return self.encoder_block_num() + 1 + + def is_compatible_with(self, other: "StableDiffusionVersion") -> bool: + """ Incompatible only when one of version is SDXL and other is not. """ + return ( + any(v == StableDiffusionVersion.UNKNOWN for v in [self, other]) or + sum(v == StableDiffusionVersion.SDXL for v in [self, other]) != 1 + ) + + +class InputMode(Enum): + # Single image to a single ControlNet unit. + SIMPLE = "simple" + # Input is a directory. N generations. Each generation takes 1 input image + # from the directory. + BATCH = "batch" + # Input is a directory. 1 generation. Each generation takes N input image + # from the directory. + MERGE = "merge" diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/external_code.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/external_code.py new file mode 100644 index 0000000000000000000000000000000000000000..4954478ac8c9f949ecdc752657f6c4480f12d4ff --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/external_code.py @@ -0,0 +1,299 @@ +from dataclasses import dataclass +from enum import Enum +from typing import List, Optional, Union, Dict, TypedDict +import numpy as np +from modules import shared +from lib_controlnet.logging import logger +from lib_controlnet.enums import InputMode, HiResFixOption +from modules.api import api + + +def get_api_version() -> int: + return 2 + + +class ControlMode(Enum): + """ + The improved guess mode. + """ + + BALANCED = "Balanced" + PROMPT = "My prompt is more important" + CONTROL = "ControlNet is more important" + + +class BatchOption(Enum): + DEFAULT = "All ControlNet units for all images in a batch" + SEPARATE = "Each ControlNet unit for each image in a batch" + + +class ResizeMode(Enum): + """ + Resize modes for ControlNet input images. + """ + + RESIZE = "Just Resize" + INNER_FIT = "Crop and Resize" + OUTER_FIT = "Resize and Fill" + + def int_value(self): + if self == ResizeMode.RESIZE: + return 0 + elif self == ResizeMode.INNER_FIT: + return 1 + elif self == ResizeMode.OUTER_FIT: + return 2 + assert False, "NOTREACHED" + + +resize_mode_aliases = { + 'Inner Fit (Scale to Fit)': 'Crop and Resize', + 'Outer Fit (Shrink to Fit)': 'Resize and Fill', + 'Scale to Fit (Inner Fit)': 'Crop and Resize', + 'Envelope (Outer Fit)': 'Resize and Fill', +} + + +def resize_mode_from_value(value: Union[str, int, ResizeMode]) -> ResizeMode: + if isinstance(value, str): + return ResizeMode(resize_mode_aliases.get(value, value)) + elif isinstance(value, int): + assert value >= 0 + if value == 3: # 'Just Resize (Latent upscale)' + return ResizeMode.RESIZE + + if value >= len(ResizeMode): + logger.warning(f'Unrecognized ResizeMode int value {value}. Fall back to RESIZE.') + return ResizeMode.RESIZE + + return [e for e in ResizeMode][value] + else: + return value + + +def control_mode_from_value(value: Union[str, int, ControlMode]) -> ControlMode: + if isinstance(value, str): + return ControlMode(value) + elif isinstance(value, int): + return [e for e in ControlMode][value] + else: + return value + + +def visualize_inpaint_mask(img): + if img.ndim == 3 and img.shape[2] == 4: + result = img.copy() + mask = result[:, :, 3] + mask = 255 - mask // 2 + result[:, :, 3] = mask + return np.ascontiguousarray(result.copy()) + return img + + +def pixel_perfect_resolution( + image: np.ndarray, + target_H: int, + target_W: int, + resize_mode: ResizeMode, +) -> int: + """ + Calculate the estimated resolution for resizing an image while preserving aspect ratio. + + The function first calculates scaling factors for height and width of the image based on the target + height and width. Then, based on the chosen resize mode, it either takes the smaller or the larger + scaling factor to estimate the new resolution. + + If the resize mode is OUTER_FIT, the function uses the smaller scaling factor, ensuring the whole image + fits within the target dimensions, potentially leaving some empty space. + + If the resize mode is not OUTER_FIT, the function uses the larger scaling factor, ensuring the target + dimensions are fully filled, potentially cropping the image. + + After calculating the estimated resolution, the function prints some debugging information. + + Args: + image (np.ndarray): A 3D numpy array representing an image. The dimensions represent [height, width, channels]. + target_H (int): The target height for the image. + target_W (int): The target width for the image. + resize_mode (ResizeMode): The mode for resizing. + + Returns: + int: The estimated resolution after resizing. + """ + raw_H, raw_W, _ = image.shape + + k0 = float(target_H) / float(raw_H) + k1 = float(target_W) / float(raw_W) + + if resize_mode == ResizeMode.OUTER_FIT: + estimation = min(k0, k1) * float(min(raw_H, raw_W)) + else: + estimation = max(k0, k1) * float(min(raw_H, raw_W)) + + logger.debug(f"Pixel Perfect Computation:") + logger.debug(f"resize_mode = {resize_mode}") + logger.debug(f"raw_H = {raw_H}") + logger.debug(f"raw_W = {raw_W}") + logger.debug(f"target_H = {target_H}") + logger.debug(f"target_W = {target_W}") + logger.debug(f"estimation = {estimation}") + + return int(np.round(estimation)) + + +class GradioImageMaskPair(TypedDict): + """Represents the dict object from Gradio's image component if `tool="sketch"` + is specified. + { + "image": np.ndarray, + "mask": np.ndarray, + } + """ + image: np.ndarray + mask: np.ndarray + + +@dataclass +class ControlNetUnit: + """Represents an entire ControlNet processing unit. + + To add a new field to this class + ## If the new field can be specified on UI, you need to + - Add a new field of the same name in constructor of `ControlNetUiGroup` + - Initialize the new `ControlNetUiGroup` field in `ControlNetUiGroup.render` + as a Gradio `IOComponent`. + - Add the new `ControlNetUiGroup` field to `unit_args` in + `ControlNetUiGroup.render`. The order of parameters matters. + + ## If the new field needs to appear in infotext, you need to + - Add a new item in `ControlNetUnit.infotext_fields`. + API-only fields cannot appear in infotext. + """ + # Following fields should only be used in the UI. + # ====== Start of UI only fields ====== + # Specifies the input mode for the unit, defaulting to a simple mode. + input_mode: InputMode = InputMode.SIMPLE + # Determines whether to use the preview image as input; defaults to False. + use_preview_as_input: bool = False + # Directory path for batch processing of images. + batch_image_dir: str = '' + # Directory path for batch processing of masks. + batch_mask_dir: str = '' + # Optional list of gallery images for batch input; defaults to None. + batch_input_gallery: Optional[List[str]] = None + # Optional list of gallery masks for batch processing; defaults to None. + batch_mask_gallery: Optional[List[str]] = None + # Holds the preview image as a NumPy array; defaults to None. + generated_image: Optional[np.ndarray] = None + # ====== End of UI only fields ====== + + # Following fields are used in both the API and the UI. + # Holds the mask image; defaults to None. + mask_image: Optional[GradioImageMaskPair] = None + # Specifies how this unit should be applied in each pass of high-resolution fix. + # Ignored if high-resolution fix is not enabled. + hr_option: Union[HiResFixOption, int, str] = HiResFixOption.BOTH + # Indicates whether the unit is enabled; defaults to True. + enabled: bool = True + # Name of the module being used; defaults to "None". + module: str = "None" + # Name of the model being used; defaults to "None". + model: str = "None" + # Weight of the unit in the overall processing; defaults to 1.0. + weight: float = 1.0 + # Optional image for input; defaults to None. + image: Optional[GradioImageMaskPair] = None + # Specifies the mode of image resizing; defaults to inner fit. + resize_mode: Union[ResizeMode, int, str] = ResizeMode.INNER_FIT + # Resolution for processing by the unit; defaults to -1 (unspecified). + processor_res: int = -1 + # Threshold A for processing; defaults to -1 (unspecified). + threshold_a: float = -1 + # Threshold B for processing; defaults to -1 (unspecified). + threshold_b: float = -1 + # Start value for guidance; defaults to 0.0. + guidance_start: float = 0.0 + # End value for guidance; defaults to 1.0. + guidance_end: float = 1.0 + # Enables pixel-perfect processing; defaults to False. + pixel_perfect: bool = False + # Control mode for the unit; defaults to balanced. + control_mode: Union[ControlMode, int, str] = ControlMode.BALANCED + + # Following fields should only be used in the API. + # ====== Start of API only fields ====== + # Whether to save the detected map for this unit; defaults to True. + save_detected_map: bool = True + # ====== End of API only fields ====== + + @staticmethod + def infotext_fields(): + """Fields that should be included in infotext. + You should define a Gradio element with exact same name in ControlNetUiGroup + as well, so that infotext can wire the value to correct field when pasting + infotext. + """ + return ( + "module", + "model", + "weight", + "resize_mode", + "processor_res", + "threshold_a", + "threshold_b", + "guidance_start", + "guidance_end", + "pixel_perfect", + "control_mode", + "hr_option", + ) + + @staticmethod + def from_dict(d: Dict) -> "ControlNetUnit": + """Create ControlNetUnit from dict. This is primarily used to convert + API json dict to ControlNetUnit.""" + unit = ControlNetUnit( + **{k: v for k, v in d.items() if k in vars(ControlNetUnit)} + ) + if isinstance(unit.image, str): + img = np.array(api.decode_base64_to_image(unit.image)).astype('uint8') + unit.image = { + "image": img, + "mask": np.zeros_like(img), + } + if isinstance(unit.mask_image, str): + mask = np.array(api.decode_base64_to_image(unit.mask_image)).astype('uint8') + if unit.image is not None: + # Attach mask on image if ControlNet has input image. + assert isinstance(unit.image, dict) + unit.image["mask"] = mask + unit.mask_image = None + else: + # Otherwise, wire to standalone mask. + # This happens in img2img when using A1111 img2img input. + unit.mask_image = { + "image": mask, + "mask": np.zeros_like(mask), + } + return unit + + +# Backward Compatible +UiControlNetUnit = ControlNetUnit + + +def to_base64_nparray(encoding: str): + """ + Convert a base64 image into the image type the extension uses + """ + + return np.array(api.decode_base64_to_image(encoding)).astype('uint8') + + +def get_max_models_num(): + """ + Fetch the maximum number of allowed ControlNet models. + """ + + max_models_num = shared.opts.data.get("control_net_unit_count", 3) + return max_models_num diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/global_state.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/global_state.py new file mode 100644 index 0000000000000000000000000000000000000000..dc87dda8d378653b21e601dc52cee3adf5eaf273 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/global_state.py @@ -0,0 +1,146 @@ +import os.path +import stat +from collections import OrderedDict + +from modules import shared, sd_models +from lib_controlnet.enums import StableDiffusionVersion +from modules_forge.shared import controlnet_dir, supported_preprocessors + + +CN_MODEL_EXTS = [".pt", ".pth", ".ckpt", ".safetensors", ".bin", ".patch"] + + +def traverse_all_files(curr_path, model_list): + f_list = [ + (os.path.join(curr_path, entry.name), entry.stat()) + for entry in os.scandir(curr_path) + if os.path.isdir(curr_path) + ] + for f_info in f_list: + fname, fstat = f_info + if os.path.splitext(fname)[1] in CN_MODEL_EXTS: + model_list.append(f_info) + elif stat.S_ISDIR(fstat.st_mode): + model_list = traverse_all_files(fname, model_list) + return model_list + + +def get_all_models(sort_by, filter_by, path): + res = OrderedDict() + fileinfos = traverse_all_files(path, []) + filter_by = filter_by.strip(" ") + if len(filter_by) != 0: + fileinfos = [x for x in fileinfos if filter_by.lower() + in os.path.basename(x[0]).lower()] + if sort_by == "name": + fileinfos = sorted(fileinfos, key=lambda x: os.path.basename(x[0])) + elif sort_by == "date": + fileinfos = sorted(fileinfos, key=lambda x: -x[1].st_mtime) + elif sort_by == "path name": + fileinfos = sorted(fileinfos) + + for finfo in fileinfos: + filename = finfo[0] + name = os.path.splitext(os.path.basename(filename))[0] + # Prevent a hypothetical "None.pt" from being listed. + if name != "None": + res[name + f" [{sd_models.model_hash(filename)}]"] = filename + + return res + + +controlnet_filename_dict = {'None': 'model.safetensors'} +controlnet_names = ['None'] + + +def get_preprocessor(name): + return supported_preprocessors.get(name, None) + + +def get_sorted_preprocessors(): + preprocessors = [p for k, p in supported_preprocessors.items() if k != 'None'] + preprocessors = sorted(preprocessors, key=lambda x: str(x.sorting_priority).zfill(8) + x.name)[::-1] + results = OrderedDict() + results['None'] = supported_preprocessors['None'] + for p in preprocessors: + results[p.name] = p + return results + + +def get_all_controlnet_names(): + return controlnet_names + + +def get_controlnet_filename(controlnet_name): + return controlnet_filename_dict[controlnet_name] + + +def get_all_preprocessor_names(): + return list(get_sorted_preprocessors().keys()) + + +def get_all_preprocessor_tags(): + tags = [] + for k, p in supported_preprocessors.items(): + tags += p.tags + tags = list(set(tags)) + tags = sorted(tags) + return ['All'] + tags + + +def get_filtered_preprocessors(tag): + if tag == 'All': + return supported_preprocessors + return {k: v for k, v in get_sorted_preprocessors().items() if tag in v.tags or k == 'None'} + + +def get_filtered_preprocessor_names(tag): + return list(get_filtered_preprocessors(tag).keys()) + + +def get_filtered_controlnet_names(tag): + filtered_preprocessors = get_filtered_preprocessors(tag) + model_filename_filters = [] + for p in filtered_preprocessors.values(): + model_filename_filters += p.model_filename_filters + return [ + x for x in controlnet_names + if x == 'None' or ( + any(f.lower() in x.lower() for f in model_filename_filters) and + get_sd_version().is_compatible_with(StableDiffusionVersion.detect_from_model_name(x)) + ) + ] + + +def update_controlnet_filenames(): + global controlnet_filename_dict, controlnet_names + + controlnet_filename_dict = {'None': 'model.safetensors'} + controlnet_names = ['None'] + + ext_dirs = (shared.opts.data.get("control_net_models_path", None), getattr(shared.cmd_opts, 'controlnet_dir', None)) + extra_lora_paths = (extra_lora_path for extra_lora_path in ext_dirs + if extra_lora_path is not None and os.path.exists(extra_lora_path)) + paths = [controlnet_dir, *extra_lora_paths] + + for path in paths: + sort_by = shared.opts.data.get("control_net_models_sort_models_by", "name") + filter_by = shared.opts.data.get("control_net_models_name_filter", "") + found = get_all_models(sort_by, filter_by, path) + controlnet_filename_dict.update(found) + + controlnet_names = list(controlnet_filename_dict.keys()) + return + + +def get_sd_version() -> StableDiffusionVersion: + if not shared.sd_model: + return StableDiffusionVersion.UNKNOWN + if shared.sd_model.is_sdxl: + return StableDiffusionVersion.SDXL + elif shared.sd_model.is_sd2: + return StableDiffusionVersion.SD2x + elif shared.sd_model.is_sd1: + return StableDiffusionVersion.SD1x + else: + return StableDiffusionVersion.UNKNOWN diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/infotext.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/infotext.py new file mode 100644 index 0000000000000000000000000000000000000000..78c1daafec12b6533a80229304831e45bc7e1ce3 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/infotext.py @@ -0,0 +1,121 @@ +from typing import List, Tuple, Union + +import gradio as gr + +from modules.processing import StableDiffusionProcessing + +from lib_controlnet import external_code +from lib_controlnet.logging import logger + + +def field_to_displaytext(fieldname: str) -> str: + return " ".join([word.capitalize() for word in fieldname.split("_")]) + + +def displaytext_to_field(text: str) -> str: + return "_".join([word.lower() for word in text.split(" ")]) + + +def parse_value(value: str) -> Union[str, float, int, bool]: + if value in ("True", "False"): + return value == "True" + try: + return int(value) + except ValueError: + try: + return float(value) + except ValueError: + return value # Plain string. + + +def serialize_unit(unit: external_code.ControlNetUnit) -> str: + log_value = { + field_to_displaytext(field): getattr(unit, field) + for field in external_code.ControlNetUnit.infotext_fields() + if getattr(unit, field) != -1 + # Note: exclude hidden slider values. + } + if not all("," not in str(v) and ":" not in str(v) for v in log_value.values()): + logger.error(f"Unexpected tokens encountered:\n{log_value}") + return "" + + return ", ".join(f"{field}: {value}" for field, value in log_value.items()) + + +def parse_unit(text: str) -> external_code.ControlNetUnit: + return external_code.ControlNetUnit( + enabled=True, + **{ + displaytext_to_field(key): parse_value(value) + for item in text.split(",") + for (key, value) in (item.strip().split(": "),) + }, + ) + + +class Infotext(object): + def __init__(self) -> None: + self.infotext_fields: List[Tuple[gr.components.IOComponent, str]] = [] + self.paste_field_names: List[str] = [] + + @staticmethod + def unit_prefix(unit_index: int) -> str: + return f"ControlNet {unit_index}" + + def register_unit(self, unit_index: int, uigroup) -> None: + """Register the unit's UI group. By regsitering the unit, A1111 will be + able to paste values from infotext to IOComponents. + + Args: + unit_index: The index of the ControlNet unit + uigroup: The ControlNetUiGroup instance that contains all gradio + iocomponents. + """ + unit_prefix = Infotext.unit_prefix(unit_index) + for field in external_code.ControlNetUnit.infotext_fields(): + # Every field in ControlNetUnit should have a corresponding + # IOComponent in ControlNetUiGroup. + io_component = getattr(uigroup, field) + component_locator = f"{unit_prefix} {field}" + self.infotext_fields.append((io_component, component_locator)) + self.paste_field_names.append(component_locator) + + @staticmethod + def write_infotext( + units: List[external_code.ControlNetUnit], p: StableDiffusionProcessing + ): + """Write infotext to `p`.""" + p.extra_generation_params.update( + { + Infotext.unit_prefix(i): serialize_unit(unit) + for i, unit in enumerate(units) + if unit.enabled + } + ) + + @staticmethod + def on_infotext_pasted(infotext: str, results: dict) -> None: + """Parse ControlNet infotext string and write result to `results` dict.""" + updates = {} + for k, v in results.items(): + if not k.startswith("ControlNet"): + continue + + assert isinstance(v, str), f"Expect string but got {v}." + try: + for field, value in vars(parse_unit(v)).items(): + if field == "image": + continue + if value is None: + logger.debug(f"InfoText: Skipping {field} because value is None.") + continue + + component_locator = f"{k} {field}" + updates[component_locator] = value + logger.debug(f"InfoText: Setting {component_locator} = {value}") + except Exception as e: + logger.warn( + f"Failed to parse infotext, legacy format infotext is no longer supported:\n{v}\n{e}" + ) + + results.update(updates) diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/logging.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..f30d5eecc2a3f48498a1c6957ee6a8ee7f807433 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/logging.py @@ -0,0 +1,41 @@ +import logging +import copy +import sys + +from modules import shared + + +class ColoredFormatter(logging.Formatter): + COLORS = { + "DEBUG": "\033[0;36m", # CYAN + "INFO": "\033[0;32m", # GREEN + "WARNING": "\033[0;33m", # YELLOW + "ERROR": "\033[0;31m", # RED + "CRITICAL": "\033[0;37;41m", # WHITE ON RED + "RESET": "\033[0m", # RESET COLOR + } + + def format(self, record): + colored_record = copy.copy(record) + levelname = colored_record.levelname + seq = self.COLORS.get(levelname, self.COLORS["RESET"]) + colored_record.levelname = f"{seq}{levelname}{self.COLORS['RESET']}" + return super().format(colored_record) + + +# Create a new logger +logger = logging.getLogger("ControlNet") +logger.propagate = False + +# Add handler if we don't have one. +if not logger.handlers: + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter( + ColoredFormatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + logger.addHandler(handler) + +# Configure logger +loglevel_string = getattr(shared.cmd_opts, "controlnet_loglevel", "INFO") +loglevel = getattr(logging, loglevel_string.upper(), None) +logger.setLevel(loglevel) diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/lvminthin.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/lvminthin.py new file mode 100644 index 0000000000000000000000000000000000000000..641227aaa8f736b18409e6f70f7798a6e20d00a2 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/lvminthin.py @@ -0,0 +1,88 @@ +# High Quality Edge Thinning using Pure Python +# Written by Lvmin Zhang +# 2023 April +# Stanford University +# If you use this, please Cite "High Quality Edge Thinning using Pure Python", Lvmin Zhang, In Mikubill/sd-webui-controlnet. + + +import cv2 +import numpy as np + + +lvmin_kernels_raw = [ + np.array([ + [-1, -1, -1], + [0, 1, 0], + [1, 1, 1] + ], dtype=np.int32), + np.array([ + [0, -1, -1], + [1, 1, -1], + [0, 1, 0] + ], dtype=np.int32) +] + +lvmin_kernels = [] +lvmin_kernels += [np.rot90(x, k=0, axes=(0, 1)) for x in lvmin_kernels_raw] +lvmin_kernels += [np.rot90(x, k=1, axes=(0, 1)) for x in lvmin_kernels_raw] +lvmin_kernels += [np.rot90(x, k=2, axes=(0, 1)) for x in lvmin_kernels_raw] +lvmin_kernels += [np.rot90(x, k=3, axes=(0, 1)) for x in lvmin_kernels_raw] + +lvmin_prunings_raw = [ + np.array([ + [-1, -1, -1], + [-1, 1, -1], + [0, 0, -1] + ], dtype=np.int32), + np.array([ + [-1, -1, -1], + [-1, 1, -1], + [-1, 0, 0] + ], dtype=np.int32) +] + +lvmin_prunings = [] +lvmin_prunings += [np.rot90(x, k=0, axes=(0, 1)) for x in lvmin_prunings_raw] +lvmin_prunings += [np.rot90(x, k=1, axes=(0, 1)) for x in lvmin_prunings_raw] +lvmin_prunings += [np.rot90(x, k=2, axes=(0, 1)) for x in lvmin_prunings_raw] +lvmin_prunings += [np.rot90(x, k=3, axes=(0, 1)) for x in lvmin_prunings_raw] + + +def remove_pattern(x, kernel): + objects = cv2.morphologyEx(x, cv2.MORPH_HITMISS, kernel) + objects = np.where(objects > 127) + x[objects] = 0 + return x, objects[0].shape[0] > 0 + + +def thin_one_time(x, kernels): + y = x + is_done = True + for k in kernels: + y, has_update = remove_pattern(y, k) + if has_update: + is_done = False + return y, is_done + + +def lvmin_thin(x, prunings=True): + y = x + for i in range(32): + y, is_done = thin_one_time(y, lvmin_kernels) + if is_done: + break + if prunings: + y, _ = thin_one_time(y, lvmin_prunings) + return y + + +def nake_nms(x): + f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) + f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) + f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) + f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) + y = np.zeros_like(x) + for f in [f1, f2, f3, f4]: + np.putmask(y, cv2.dilate(x, kernel=f) == x, x) + return y + diff --git a/extensions-builtin/sd_forge_controlnet/lib_controlnet/utils.py b/extensions-builtin/sd_forge_controlnet/lib_controlnet/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c911d1c4b539ca3f76f34f40ba938319928ead88 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/lib_controlnet/utils.py @@ -0,0 +1,363 @@ +from typing import Optional +from modules import processing + +from lib_controlnet import external_code + +from modules_forge.forge_util import HWC3 + +from PIL import Image, ImageFilter, ImageOps +from lib_controlnet.lvminthin import lvmin_thin, nake_nms + +import torch +import os +import functools +import time +import base64 +import numpy as np +import safetensors.torch +import cv2 +import logging + +from typing import Any, Callable, Dict, List +from modules.safe import unsafe_torch_load +from lib_controlnet.logging import logger + + +def load_state_dict(ckpt_path, location="cpu"): + _, extension = os.path.splitext(ckpt_path) + if extension.lower() == ".safetensors": + state_dict = safetensors.torch.load_file(ckpt_path, device=location) + else: + state_dict = unsafe_torch_load(ckpt_path, map_location=torch.device(location)) + state_dict = get_state_dict(state_dict) + logger.info(f"Loaded state_dict from [{ckpt_path}]") + return state_dict + + +def get_state_dict(d): + return d.get("state_dict", d) + + +def ndarray_lru_cache(max_size: int = 128, typed: bool = False): + """ + Decorator to enable caching for functions with numpy array arguments. + Numpy arrays are mutable, and thus not directly usable as hash keys. + + The idea here is to wrap the incoming arguments with type `np.ndarray` + as `HashableNpArray` so that `lru_cache` can correctly handles `np.ndarray` + arguments. + + `HashableNpArray` functions exactly the same way as `np.ndarray` except + having `__hash__` and `__eq__` overriden. + """ + + def decorator(func: Callable): + """The actual decorator that accept function as input.""" + + class HashableNpArray(np.ndarray): + def __new__(cls, input_array): + # Input array is an instance of ndarray. + # The view makes the input array and returned array share the same data. + obj = np.asarray(input_array).view(cls) + return obj + + def __eq__(self, other) -> bool: + return np.array_equal(self, other) + + def __hash__(self): + # Hash the bytes representing the data of the array. + return hash(self.tobytes()) + + @functools.lru_cache(maxsize=max_size, typed=typed) + def cached_func(*args, **kwargs): + """This function only accepts `HashableNpArray` as input params.""" + return func(*args, **kwargs) + + # Preserves original function.__name__ and __doc__. + @functools.wraps(func) + def decorated_func(*args, **kwargs): + """The decorated function that delegates the original function.""" + + def convert_item(item: Any): + if isinstance(item, np.ndarray): + return HashableNpArray(item) + if isinstance(item, tuple): + return tuple(convert_item(i) for i in item) + return item + + args = [convert_item(arg) for arg in args] + kwargs = {k: convert_item(arg) for k, arg in kwargs.items()} + return cached_func(*args, **kwargs) + + return decorated_func + + return decorator + + +def timer_decorator(func): + """Time the decorated function and output the result to debug logger.""" + if logger.level != logging.DEBUG: + return func + + @functools.wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + duration = end_time - start_time + # Only report function that are significant enough. + if duration > 1e-3: + logger.debug(f"{func.__name__} ran in: {duration:.3f} sec") + return result + + return wrapper + + +class TimeMeta(type): + """ Metaclass to record execution time on all methods of the + child class. """ + def __new__(cls, name, bases, attrs): + for attr_name, attr_value in attrs.items(): + if callable(attr_value): + attrs[attr_name] = timer_decorator(attr_value) + return super().__new__(cls, name, bases, attrs) + + +# svgsupports +svgsupport = False +try: + import io + from svglib.svglib import svg2rlg + from reportlab.graphics import renderPM + + svgsupport = True +except ImportError: + pass + + +def svg_preprocess(inputs: Dict, preprocess: Callable): + if not inputs: + return None + + if inputs["image"].startswith("data:image/svg+xml;base64,") and svgsupport: + svg_data = base64.b64decode( + inputs["image"].replace("data:image/svg+xml;base64,", "") + ) + drawing = svg2rlg(io.BytesIO(svg_data)) + png_data = renderPM.drawToString(drawing, fmt="PNG") + encoded_string = base64.b64encode(png_data) + base64_str = str(encoded_string, "utf-8") + base64_str = "data:image/png;base64," + base64_str + inputs["image"] = base64_str + return preprocess(inputs) + + +def get_unique_axis0(data): + arr = np.asanyarray(data) + idxs = np.lexsort(arr.T) + arr = arr[idxs] + unique_idxs = np.empty(len(arr), dtype=np.bool_) + unique_idxs[:1] = True + unique_idxs[1:] = np.any(arr[:-1, :] != arr[1:, :], axis=-1) + return arr[unique_idxs] + + +def read_image(img_path: str) -> str: + """Read image from specified path and return a base64 string.""" + img = cv2.imread(img_path) + _, bytes = cv2.imencode(".png", img) + encoded_image = base64.b64encode(bytes).decode("utf-8") + return encoded_image + + +def read_image_dir(img_dir: str, suffixes=('.png', '.jpg', '.jpeg', '.webp')) -> List[str]: + """Try read all images in given img_dir.""" + images = [] + for filename in os.listdir(img_dir): + if filename.endswith(suffixes): + img_path = os.path.join(img_dir, filename) + try: + images.append(read_image(img_path)) + except IOError: + logger.error(f"Error opening {img_path}") + return images + + +def align_dim_latent(x: int) -> int: + """ Align the pixel dimension (w/h) to latent dimension. + Stable diffusion 1:8 ratio for latent/pixel, i.e., + 1 latent unit == 8 pixel unit.""" + return (x // 8) * 8 + + +def prepare_mask( + mask: Image.Image, p: processing.StableDiffusionProcessing +) -> Image.Image: + """ + Prepare an image mask for the inpainting process. + + This function takes as input a PIL Image object and an instance of the + StableDiffusionProcessing class, and performs the following steps to prepare the mask: + + 1. Convert the mask to grayscale (mode "L"). + 2. If the 'inpainting_mask_invert' attribute of the processing instance is True, + invert the mask colors. + 3. If the 'mask_blur' attribute of the processing instance is greater than 0, + apply a Gaussian blur to the mask with a radius equal to 'mask_blur'. + + Args: + mask (Image.Image): The input mask as a PIL Image object. + p (processing.StableDiffusionProcessing): An instance of the StableDiffusionProcessing class + containing the processing parameters. + + Returns: + mask (Image.Image): The prepared mask as a PIL Image object. + """ + mask = mask.convert("L") + if getattr(p, "inpainting_mask_invert", False): + mask = ImageOps.invert(mask) + + if hasattr(p, 'mask_blur_x'): + if getattr(p, "mask_blur_x", 0) > 0: + np_mask = np.array(mask) + kernel_size = 2 * int(2.5 * p.mask_blur_x + 0.5) + 1 + np_mask = cv2.GaussianBlur(np_mask, (kernel_size, 1), p.mask_blur_x) + mask = Image.fromarray(np_mask) + if getattr(p, "mask_blur_y", 0) > 0: + np_mask = np.array(mask) + kernel_size = 2 * int(2.5 * p.mask_blur_y + 0.5) + 1 + np_mask = cv2.GaussianBlur(np_mask, (1, kernel_size), p.mask_blur_y) + mask = Image.fromarray(np_mask) + else: + if getattr(p, "mask_blur", 0) > 0: + mask = mask.filter(ImageFilter.GaussianBlur(p.mask_blur)) + + return mask + + +def set_numpy_seed(p: processing.StableDiffusionProcessing) -> Optional[int]: + """ + Set the random seed for NumPy based on the provided parameters. + + Args: + p (processing.StableDiffusionProcessing): The instance of the StableDiffusionProcessing class. + + Returns: + Optional[int]: The computed random seed if successful, or None if an exception occurs. + + This function sets the random seed for NumPy using the seed and subseed values from the given instance of + StableDiffusionProcessing. If either seed or subseed is -1, it uses the first value from `all_seeds`. + Otherwise, it takes the maximum of the provided seed value and 0. + + The final random seed is computed by adding the seed and subseed values, applying a bitwise AND operation + with 0xFFFFFFFF to ensure it fits within a 32-bit integer. + """ + try: + tmp_seed = int(p.all_seeds[0] if p.seed == -1 else max(int(p.seed), 0)) + tmp_subseed = int(p.all_seeds[0] if p.subseed == -1 else max(int(p.subseed), 0)) + seed = (tmp_seed + tmp_subseed) & 0xFFFFFFFF + np.random.seed(seed) + return seed + except Exception as e: + logger.warning(e) + logger.warning('Warning: Failed to use consistent random seed.') + return None + + +def safe_numpy(x): + # A very safe method to make sure that Apple/Mac works + y = x + + # below is very boring but do not change these. If you change these Apple or Mac may fail. + y = y.copy() + y = np.ascontiguousarray(y) + y = y.copy() + return y + + +def high_quality_resize(x, size): + # Written by lvmin + # Super high-quality control map up-scaling, considering binary, seg, and one-pixel edges + + if x.shape[0] != size[1] or x.shape[1] != size[0]: + new_size_is_smaller = (size[0] * size[1]) < (x.shape[0] * x.shape[1]) + new_size_is_bigger = (size[0] * size[1]) > (x.shape[0] * x.shape[1]) + unique_color_count = len(get_unique_axis0(x.reshape(-1, x.shape[2]))) + is_one_pixel_edge = False + is_binary = False + if unique_color_count == 2: + is_binary = np.min(x) < 16 and np.max(x) > 240 + if is_binary: + xc = x + xc = cv2.erode(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) + xc = cv2.dilate(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) + one_pixel_edge_count = np.where(xc < x)[0].shape[0] + all_edge_count = np.where(x > 127)[0].shape[0] + is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count + + if 2 < unique_color_count < 200: + interpolation = cv2.INTER_NEAREST + elif new_size_is_smaller: + interpolation = cv2.INTER_AREA + else: + interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS + + y = cv2.resize(x, size, interpolation=interpolation) + + if is_binary: + y = np.mean(y.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8) + if is_one_pixel_edge: + y = nake_nms(y) + _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + y = lvmin_thin(y, prunings=new_size_is_bigger) + else: + _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + y = np.stack([y] * 3, axis=2) + else: + y = x + + return y + + +def crop_and_resize_image(detected_map, resize_mode, h, w, fill_border_with_255=False): + if resize_mode == external_code.ResizeMode.RESIZE: + detected_map = high_quality_resize(detected_map, (w, h)) + detected_map = safe_numpy(detected_map) + return detected_map + + old_h, old_w, _ = detected_map.shape + old_w = float(old_w) + old_h = float(old_h) + k0 = float(h) / old_h + k1 = float(w) / old_w + + safeint = lambda x: int(np.round(x)) + + if resize_mode == external_code.ResizeMode.OUTER_FIT: + k = min(k0, k1) + borders = np.concatenate([detected_map[0, :, :], detected_map[-1, :, :], detected_map[:, 0, :], detected_map[:, -1, :]], axis=0) + high_quality_border_color = np.median(borders, axis=0).astype(detected_map.dtype) + if fill_border_with_255: + high_quality_border_color = np.zeros_like(high_quality_border_color) + 255 + high_quality_background = np.tile(high_quality_border_color[None, None], [h, w, 1]) + detected_map = high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k))) + new_h, new_w, _ = detected_map.shape + pad_h = max(0, (h - new_h) // 2) + pad_w = max(0, (w - new_w) // 2) + high_quality_background[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = detected_map + detected_map = high_quality_background + detected_map = safe_numpy(detected_map) + return detected_map + else: + k = max(k0, k1) + detected_map = high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k))) + new_h, new_w, _ = detected_map.shape + pad_h = max(0, (new_h - h) // 2) + pad_w = max(0, (new_w - w) // 2) + detected_map = detected_map[pad_h:pad_h+h, pad_w:pad_w+w] + detected_map = safe_numpy(detected_map) + return detected_map + + +def judge_image_type(img): + return isinstance(img, np.ndarray) and img.ndim == 3 and int(img.shape[2]) in [3, 4] diff --git a/extensions-builtin/sd_forge_controlnet/preload.py b/extensions-builtin/sd_forge_controlnet/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..965884e72825bbe37fbd74e313ca10e9191fb654 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/preload.py @@ -0,0 +1,13 @@ +def preload(parser): + parser.add_argument( + "--controlnet-loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)", + ) + parser.add_argument( + "--controlnet-tracemalloc", + action="store_true", + help="Enable memory tracing.", + default=None, + ) diff --git a/extensions-builtin/sd_forge_controlnet/requirements.txt b/extensions-builtin/sd_forge_controlnet/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d12e85b09d7b0d997bbb1a8f5d56bb2a2ec1ef01 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/requirements.txt @@ -0,0 +1,5 @@ +fvcore +mediapipe +onnxruntime +opencv-python>=4.8.0 +svglib diff --git a/extensions-builtin/sd_forge_controlnet/scripts/controlnet.py b/extensions-builtin/sd_forge_controlnet/scripts/controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..08aa5ff2a4f349ee4f3542eedb9b7091fb68fded --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/scripts/controlnet.py @@ -0,0 +1,611 @@ +import os +from typing import Dict, Optional, Tuple, List, Union + +import cv2 +import torch + +import modules.scripts as scripts +from modules import shared, script_callbacks, masking, images +from modules.ui_components import InputAccordion +from modules.api.api import decode_base64_to_image +import gradio as gr + +from lib_controlnet import global_state, external_code +from lib_controlnet.external_code import ControlNetUnit +from lib_controlnet.utils import align_dim_latent, set_numpy_seed, crop_and_resize_image, \ + prepare_mask, judge_image_type +from lib_controlnet.controlnet_ui.controlnet_ui_group import ControlNetUiGroup +from lib_controlnet.controlnet_ui.photopea import Photopea +from lib_controlnet.logging import logger +from modules.processing import StableDiffusionProcessingImg2Img, StableDiffusionProcessingTxt2Img, \ + StableDiffusionProcessing +from lib_controlnet.infotext import Infotext +from modules_forge.forge_util import HWC3, numpy_to_pytorch +from lib_controlnet.enums import HiResFixOption +from lib_controlnet.api import controlnet_api + +import numpy as np +import functools + +from PIL import Image +from modules_forge.shared import try_load_supported_control_model +from modules_forge.supported_controlnet import ControlModelPatcher + +# Gradio 3.32 bug fix +import tempfile + +gradio_tempfile_path = os.path.join(tempfile.gettempdir(), 'gradio') +os.makedirs(gradio_tempfile_path, exist_ok=True) + +global_state.update_controlnet_filenames() + + +@functools.lru_cache(maxsize=shared.opts.data.get("control_net_model_cache_size", 5)) +def cached_controlnet_loader(filename): + return try_load_supported_control_model(filename) + + +class ControlNetCachedParameters: + def __init__(self): + self.preprocessor = None + self.model = None + self.control_cond = None + self.control_cond_for_hr_fix = None + self.control_mask = None + self.control_mask_for_hr_fix = None + + +class ControlNetForForgeOfficial(scripts.Script): + sorting_priority = 10 + + def title(self): + return "ControlNet" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, is_img2img): + infotext = Infotext() + ui_groups = [] + controls = [] + max_models = shared.opts.data.get("control_net_unit_count", 3) + gen_type = "img2img" if is_img2img else "txt2img" + elem_id_tabname = gen_type + "_controlnet" + default_unit = ControlNetUnit(enabled=False, module="None", model="None") + with gr.Group(elem_id=elem_id_tabname): + with gr.Accordion(f"ControlNet Integrated", open=False, elem_id="controlnet", + elem_classes=["controlnet"]): + photopea = ( + Photopea() + if not shared.opts.data.get("controlnet_disable_photopea_edit", False) + else None + ) + with gr.Row(elem_id=elem_id_tabname + "_accordions", elem_classes="accordions"): + for i in range(max_models): + with InputAccordion( + value=False, + label=f"ControlNet Unit {i}", + elem_classes=["cnet-unit-enabled-accordion"], # Class on accordion + ): + group = ControlNetUiGroup(is_img2img, default_unit, photopea) + ui_groups.append(group) + controls.append(group.render(f"ControlNet-{i}", elem_id_tabname)) + + for i, ui_group in enumerate(ui_groups): + infotext.register_unit(i, ui_group) + if shared.opts.data.get("control_net_sync_field_args", True): + self.infotext_fields = infotext.infotext_fields + self.paste_field_names = infotext.paste_field_names + return tuple(controls) + + def get_enabled_units(self, units): + # Parse dict from API calls. + units = [ + ControlNetUnit.from_dict(unit) if isinstance(unit, dict) else unit + for unit in units + ] + assert all(isinstance(unit, ControlNetUnit) for unit in units) + enabled_units = [x for x in units if x.enabled] + return enabled_units + + @staticmethod + def try_crop_image_with_a1111_mask( + p: StableDiffusionProcessing, + unit: ControlNetUnit, + input_image: np.ndarray, + resize_mode: external_code.ResizeMode, + preprocessor + ) -> np.ndarray: + a1111_mask_image: Optional[Image.Image] = getattr(p, "image_mask", None) + is_only_masked_inpaint = ( + issubclass(type(p), StableDiffusionProcessingImg2Img) and + p.inpaint_full_res and + a1111_mask_image is not None + ) + if ( + preprocessor.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab + and is_only_masked_inpaint + ): + logger.info("Crop input image based on A1111 mask.") + input_image = [input_image[:, :, i] for i in range(input_image.shape[2])] + input_image = [Image.fromarray(x) for x in input_image] + + mask = prepare_mask(a1111_mask_image, p) + + crop_region = masking.get_crop_region(np.array(mask), p.inpaint_full_res_padding) + crop_region = masking.expand_crop_region(crop_region, p.width, p.height, mask.width, mask.height) + + input_image = [ + images.resize_image(resize_mode.int_value(), i, mask.width, mask.height) + for i in input_image + ] + + input_image = [x.crop(crop_region) for x in input_image] + input_image = [ + images.resize_image(external_code.ResizeMode.OUTER_FIT.int_value(), x, p.width, p.height) + for x in input_image + ] + + input_image = [np.asarray(x)[:, :, 0] for x in input_image] + input_image = np.stack(input_image, axis=2) + return input_image + + def get_input_data(self, p, unit, preprocessor, h, w): + logger.info(f'ControlNet Input Mode: {unit.input_mode}') + image_list = [] + resize_mode = external_code.resize_mode_from_value(unit.resize_mode) + + if unit.input_mode == external_code.InputMode.MERGE: + for idx, item in enumerate(unit.batch_input_gallery): + img_path = item['name'] + logger.info(f'Try to read image: {img_path}') + img = np.ascontiguousarray(cv2.imread(img_path)[:, :, ::-1]).copy() + mask = None + if len(unit.batch_mask_gallery) > 0: + if len(unit.batch_mask_gallery) >= len(unit.batch_input_gallery): + mask_path = unit.batch_mask_gallery[idx]['name'] + else: + mask_path = unit.batch_mask_gallery[0]['name'] + mask = np.ascontiguousarray(cv2.imread(mask_path)[:, :, ::-1]).copy() + if img is not None: + image_list.append([img, mask]) + elif unit.input_mode == external_code.InputMode.BATCH: + image_list = [] + image_extensions = ['.jpg', '.jpeg', '.png', '.bmp'] + batch_image_files = shared.listfiles(unit.batch_image_dir) + for batch_modifier in getattr(unit, 'batch_modifiers', []): + batch_image_files = batch_modifier(batch_image_files, p) + for idx, filename in enumerate(batch_image_files): + if any(filename.lower().endswith(ext) for ext in image_extensions): + img_path = os.path.join(unit.batch_image_dir, filename) + logger.info(f'Try to read image: {img_path}') + img = np.ascontiguousarray(cv2.imread(img_path)[:, :, ::-1]).copy() + mask = None + if unit.batch_mask_dir: + batch_mask_files = shared.listfiles(unit.batch_mask_dir) + if len(batch_mask_files) >= len(batch_image_files): + mask_path = batch_mask_files[idx] + else: + mask_path = batch_mask_files[0] + mask_path = os.path.join(unit.batch_mask_dir, mask_path) + mask = np.ascontiguousarray(cv2.imread(mask_path)[:, :, ::-1]).copy() + if img is not None: + image_list.append([img, mask]) + else: + a1111_i2i_image = getattr(p, "init_images", [None])[0] + a1111_i2i_mask = getattr(p, "image_mask", None) + + using_a1111_data = False + + if unit.use_preview_as_input and unit.generated_image is not None: + image = unit.generated_image + elif unit.image is None: + resize_mode = external_code.resize_mode_from_value(p.resize_mode) + image = HWC3(np.asarray(a1111_i2i_image)) + using_a1111_data = True + elif (unit.image['image'] < 5).all() and (unit.image['mask'] > 5).any(): + image = unit.image['mask'] + else: + image = unit.image['image'] + + if not isinstance(image, np.ndarray): + raise ValueError("controlnet is enabled but no input image is given") + + image = HWC3(image) + + if using_a1111_data: + mask = HWC3(np.asarray(a1111_i2i_mask)) if a1111_i2i_mask is not None else None + elif unit.mask_image is not None and (unit.mask_image['image'] > 5).any(): + mask = unit.mask_image['image'] + elif unit.mask_image is not None and (unit.mask_image['mask'] > 5).any(): + mask = unit.mask_image['mask'] + elif unit.image is not None and (unit.image['mask'] > 5).any(): + mask = unit.image['mask'] + else: + mask = None + + image = self.try_crop_image_with_a1111_mask(p, unit, image, resize_mode, preprocessor) + + if mask is not None: + mask = cv2.resize(HWC3(mask), (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST) + mask = self.try_crop_image_with_a1111_mask(p, unit, mask, resize_mode, preprocessor) + + image_list = [[image, mask]] + + if resize_mode == external_code.ResizeMode.OUTER_FIT and preprocessor.expand_mask_when_resize_and_fill: + new_image_list = [] + for input_image, input_mask in image_list: + if input_mask is None: + input_mask = np.zeros_like(input_image) + input_mask = crop_and_resize_image( + input_mask, + external_code.ResizeMode.OUTER_FIT, h, w, + fill_border_with_255=True, + ) + input_image = crop_and_resize_image( + input_image, + external_code.ResizeMode.OUTER_FIT, h, w, + fill_border_with_255=False, + ) + new_image_list.append((input_image, input_mask)) + image_list = new_image_list + + return image_list, resize_mode + + @staticmethod + def get_target_dimensions(p: StableDiffusionProcessing) -> Tuple[int, int, int, int]: + """Returns (h, w, hr_h, hr_w).""" + h = align_dim_latent(p.height) + w = align_dim_latent(p.width) + + high_res_fix = ( + isinstance(p, StableDiffusionProcessingTxt2Img) + and getattr(p, 'enable_hr', False) + ) + if high_res_fix: + if p.hr_resize_x == 0 and p.hr_resize_y == 0: + hr_y = int(p.height * p.hr_scale) + hr_x = int(p.width * p.hr_scale) + else: + hr_y, hr_x = p.hr_resize_y, p.hr_resize_x + hr_y = align_dim_latent(hr_y) + hr_x = align_dim_latent(hr_x) + else: + hr_y = h + hr_x = w + + return h, w, hr_y, hr_x + + @torch.no_grad() + def process_unit_after_click_generate(self, + p: StableDiffusionProcessing, + unit: ControlNetUnit, + params: ControlNetCachedParameters, + *args, **kwargs): + + h, w, hr_y, hr_x = self.get_target_dimensions(p) + + has_high_res_fix = ( + isinstance(p, StableDiffusionProcessingTxt2Img) + and getattr(p, 'enable_hr', False) + ) + + if unit.use_preview_as_input: + unit.module = 'None' + + preprocessor = global_state.get_preprocessor(unit.module) + + input_list, resize_mode = self.get_input_data(p, unit, preprocessor, h, w) + preprocessor_outputs = [] + control_masks = [] + preprocessor_output_is_image = False + preprocessor_output = None + + def optional_tqdm(iterable, use_tqdm): + from tqdm import tqdm + return tqdm(iterable) if use_tqdm else iterable + + for input_image, input_mask in optional_tqdm(input_list, len(input_list) > 1): + if unit.pixel_perfect: + unit.processor_res = external_code.pixel_perfect_resolution( + input_image, + target_H=h, + target_W=w, + resize_mode=resize_mode, + ) + + seed = set_numpy_seed(p) + logger.debug(f"Use numpy seed {seed}.") + logger.info(f"Using preprocessor: {unit.module}") + logger.info(f'preprocessor resolution = {unit.processor_res}') + + preprocessor_output = preprocessor( + input_image=input_image, + input_mask=input_mask, + resolution=unit.processor_res, + slider_1=unit.threshold_a, + slider_2=unit.threshold_b, + ) + + preprocessor_outputs.append(preprocessor_output) + + preprocessor_output_is_image = judge_image_type(preprocessor_output) + + if input_mask is not None: + control_masks.append(input_mask) + + if len(input_list) > 1 and not preprocessor_output_is_image: + logger.info('Batch wise input only support controlnet, control-lora, and t2i adapters!') + break + + if has_high_res_fix: + hr_option = HiResFixOption.from_value(unit.hr_option) + else: + hr_option = HiResFixOption.BOTH + + alignment_indices = [i % len(preprocessor_outputs) for i in range(p.batch_size)] + def attach_extra_result_image(img: np.ndarray, is_high_res: bool = False): + if ( + (is_high_res and hr_option.high_res_enabled) or + (not is_high_res and hr_option.low_res_enabled) + ) and unit.save_detected_map: + p.extra_result_images.append(img) + + if preprocessor_output_is_image: + params.control_cond = [] + params.control_cond_for_hr_fix = [] + + for preprocessor_output in preprocessor_outputs: + control_cond = crop_and_resize_image(preprocessor_output, resize_mode, h, w) + attach_extra_result_image(external_code.visualize_inpaint_mask(control_cond)) + params.control_cond.append(numpy_to_pytorch(control_cond).movedim(-1, 1)) + + params.control_cond = torch.cat(params.control_cond, dim=0)[alignment_indices].contiguous() + + if has_high_res_fix: + for preprocessor_output in preprocessor_outputs: + control_cond_for_hr_fix = crop_and_resize_image(preprocessor_output, resize_mode, hr_y, hr_x) + attach_extra_result_image(external_code.visualize_inpaint_mask(control_cond_for_hr_fix), is_high_res=True) + params.control_cond_for_hr_fix.append(numpy_to_pytorch(control_cond_for_hr_fix).movedim(-1, 1)) + params.control_cond_for_hr_fix = torch.cat(params.control_cond_for_hr_fix, dim=0)[alignment_indices].contiguous() + else: + params.control_cond_for_hr_fix = params.control_cond + else: + params.control_cond = preprocessor_output + params.control_cond_for_hr_fix = preprocessor_output + attach_extra_result_image(input_image) + + if len(control_masks) > 0: + params.control_mask = [] + params.control_mask_for_hr_fix = [] + + for input_mask in control_masks: + fill_border = preprocessor.fill_mask_with_one_when_resize_and_fill + control_mask = crop_and_resize_image(input_mask, resize_mode, h, w, fill_border) + attach_extra_result_image(control_mask) + control_mask = numpy_to_pytorch(control_mask).movedim(-1, 1)[:, :1] + params.control_mask.append(control_mask) + + if has_high_res_fix: + control_mask_for_hr_fix = crop_and_resize_image(input_mask, resize_mode, hr_y, hr_x, fill_border) + attach_extra_result_image(control_mask_for_hr_fix, is_high_res=True) + control_mask_for_hr_fix = numpy_to_pytorch(control_mask_for_hr_fix).movedim(-1, 1)[:, :1] + params.control_mask_for_hr_fix.append(control_mask_for_hr_fix) + + params.control_mask = torch.cat(params.control_mask, dim=0)[alignment_indices].contiguous() + if has_high_res_fix: + params.control_mask_for_hr_fix = torch.cat(params.control_mask_for_hr_fix, dim=0)[alignment_indices].contiguous() + else: + params.control_mask_for_hr_fix = params.control_mask + + if preprocessor.do_not_need_model: + model_filename = 'Not Needed' + params.model = ControlModelPatcher() + else: + assert unit.model != 'None', 'You have not selected any control model!' + model_filename = global_state.get_controlnet_filename(unit.model) + params.model = cached_controlnet_loader(model_filename) + assert params.model is not None, logger.error(f"Recognizing Control Model failed: {model_filename}") + + params.preprocessor = preprocessor + + params.preprocessor.process_after_running_preprocessors(process=p, params=params, **kwargs) + params.model.process_after_running_preprocessors(process=p, params=params, **kwargs) + + logger.info(f"Current ControlNet {type(params.model).__name__}: {model_filename}") + return + + @torch.no_grad() + def process_unit_before_every_sampling(self, + p: StableDiffusionProcessing, + unit: ControlNetUnit, + params: ControlNetCachedParameters, + *args, **kwargs): + + is_hr_pass = getattr(p, 'is_hr_pass', False) + + has_high_res_fix = ( + isinstance(p, StableDiffusionProcessingTxt2Img) + and getattr(p, 'enable_hr', False) + ) + + if has_high_res_fix: + hr_option = HiResFixOption.from_value(unit.hr_option) + else: + hr_option = HiResFixOption.BOTH + + if has_high_res_fix and is_hr_pass and (not hr_option.high_res_enabled): + logger.info(f"ControlNet Skipped High-res pass.") + return + + if has_high_res_fix and (not is_hr_pass) and (not hr_option.low_res_enabled): + logger.info(f"ControlNet Skipped Low-res pass.") + return + + if is_hr_pass: + cond = params.control_cond_for_hr_fix + mask = params.control_mask_for_hr_fix + else: + cond = params.control_cond + mask = params.control_mask + + kwargs.update(dict( + unit=unit, + params=params, + cond_original=cond.clone() if isinstance(cond, torch.Tensor) else cond, + mask_original=mask.clone() if isinstance(mask, torch.Tensor) else mask, + )) + + params.model.strength = float(unit.weight) + params.model.start_percent = float(unit.guidance_start) + params.model.end_percent = float(unit.guidance_end) + params.model.positive_advanced_weighting = None + params.model.negative_advanced_weighting = None + params.model.advanced_frame_weighting = None + params.model.advanced_sigma_weighting = None + + soft_weighting = { + 'input': [0.09941396206337118, 0.12050177219802567, 0.14606275417942507, 0.17704576264172736, + 0.214600924414215, + 0.26012233262329093, 0.3152997971191405, 0.3821815722656249, 0.4632503906249999, 0.561515625, + 0.6806249999999999, 0.825], + 'middle': [0.561515625] if p.sd_model.is_sdxl else [1.0], + 'output': [0.09941396206337118, 0.12050177219802567, 0.14606275417942507, 0.17704576264172736, + 0.214600924414215, + 0.26012233262329093, 0.3152997971191405, 0.3821815722656249, 0.4632503906249999, 0.561515625, + 0.6806249999999999, 0.825] + } + + zero_weighting = { + 'input': [0.0] * 12, + 'middle': [0.0], + 'output': [0.0] * 12 + } + + if unit.control_mode == external_code.ControlMode.CONTROL.value: + params.model.positive_advanced_weighting = soft_weighting.copy() + params.model.negative_advanced_weighting = zero_weighting.copy() + + if unit.control_mode == external_code.ControlMode.PROMPT.value: + params.model.positive_advanced_weighting = soft_weighting.copy() + params.model.negative_advanced_weighting = soft_weighting.copy() + + if is_hr_pass and params.preprocessor.use_soft_projection_in_hr_fix: + params.model.positive_advanced_weighting = soft_weighting.copy() + params.model.negative_advanced_weighting = soft_weighting.copy() + + cond, mask = params.preprocessor.process_before_every_sampling(p, cond, mask, *args, **kwargs) + + params.model.advanced_mask_weighting = mask + + params.model.process_before_every_sampling(p, cond, mask, *args, **kwargs) + + logger.info(f"ControlNet Method {params.preprocessor.name} patched.") + return + + @staticmethod + def bound_check_params(unit: ControlNetUnit) -> None: + """ + Checks and corrects negative parameters in ControlNetUnit 'unit'. + Parameters 'processor_res', 'threshold_a', 'threshold_b' are reset to + their default values if negative. + + Args: + unit (ControlNetUnit): The ControlNetUnit instance to check. + """ + preprocessor = global_state.get_preprocessor(unit.module) + + if unit.processor_res < 0: + unit.processor_res = int(preprocessor.slider_resolution.gradio_update_kwargs.get('value', 512)) + + if unit.threshold_a < 0: + unit.threshold_a = int(preprocessor.slider_1.gradio_update_kwargs.get('value', 1.0)) + + if unit.threshold_b < 0: + unit.threshold_b = int(preprocessor.slider_2.gradio_update_kwargs.get('value', 1.0)) + + return + + @torch.no_grad() + def process_unit_after_every_sampling(self, + p: StableDiffusionProcessing, + unit: ControlNetUnit, + params: ControlNetCachedParameters, + *args, **kwargs): + + params.preprocessor.process_after_every_sampling(p, params, *args, **kwargs) + params.model.process_after_every_sampling(p, params, *args, **kwargs) + return + + @torch.no_grad() + def process(self, p, *args, **kwargs): + self.current_params = {} + enabled_units = self.get_enabled_units(args) + Infotext.write_infotext(enabled_units, p) + for i, unit in enumerate(enabled_units): + self.bound_check_params(unit) + params = ControlNetCachedParameters() + self.process_unit_after_click_generate(p, unit, params, *args, **kwargs) + self.current_params[i] = params + return + + @torch.no_grad() + def process_before_every_sampling(self, p, *args, **kwargs): + for i, unit in enumerate(self.get_enabled_units(args)): + self.process_unit_before_every_sampling(p, unit, self.current_params[i], *args, **kwargs) + return + + @torch.no_grad() + def postprocess_batch_list(self, p, pp, *args, **kwargs): + for i, unit in enumerate(self.get_enabled_units(args)): + self.process_unit_after_every_sampling(p, unit, self.current_params[i], pp, *args, **kwargs) + return + + def postprocess(self, p, processed, *args): + self.current_params = {} + return + + +def on_ui_settings(): + section = ('control_net', "ControlNet") + shared.opts.add_option("control_net_detectedmap_dir", shared.OptionInfo( + "detected_maps", "Directory for detected maps auto saving", section=section)) + shared.opts.add_option("control_net_models_path", shared.OptionInfo( + "", "Extra path to scan for ControlNet models (e.g. training output directory)", section=section)) + shared.opts.add_option("control_net_modules_path", shared.OptionInfo( + "", + "Path to directory containing annotator model directories (requires restart, overrides corresponding command line flag)", + section=section)) + shared.opts.add_option("control_net_unit_count", shared.OptionInfo( + 3, "Multi-ControlNet: ControlNet unit number (requires restart)", gr.Slider, + {"minimum": 1, "maximum": 10, "step": 1}, section=section)) + shared.opts.add_option("control_net_model_cache_size", shared.OptionInfo( + 5, "Model cache size (requires restart)", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}, section=section)) + shared.opts.add_option("control_net_no_detectmap", shared.OptionInfo( + False, "Do not append detectmap to output", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("control_net_detectmap_autosaving", shared.OptionInfo( + False, "Allow detectmap auto saving", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("control_net_allow_script_control", shared.OptionInfo( + False, "Allow other script to control this extension", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("control_net_sync_field_args", shared.OptionInfo( + True, "Paste ControlNet parameters in infotext", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("controlnet_show_batch_images_in_ui", shared.OptionInfo( + False, "Show batch images in gradio gallery output", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("controlnet_increment_seed_during_batch", shared.OptionInfo( + False, "Increment seed after each controlnet batch iteration", gr.Checkbox, {"interactive": True}, + section=section)) + shared.opts.add_option("controlnet_disable_openpose_edit", shared.OptionInfo( + False, "Disable openpose edit", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("controlnet_disable_photopea_edit", shared.OptionInfo( + False, "Disable photopea edit", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("controlnet_photopea_warning", shared.OptionInfo( + True, "Photopea popup warning", gr.Checkbox, {"interactive": True}, section=section)) + shared.opts.add_option("controlnet_input_thumbnail", shared.OptionInfo( + True, "Input image thumbnail on unit header", gr.Checkbox, {"interactive": True}, section=section)) + + +script_callbacks.on_ui_settings(on_ui_settings) +script_callbacks.on_infotext_pasted(Infotext.on_infotext_pasted) +script_callbacks.on_after_component(ControlNetUiGroup.on_after_component) +script_callbacks.on_before_reload(ControlNetUiGroup.reset) +script_callbacks.on_app_started(controlnet_api) diff --git a/extensions-builtin/sd_forge_controlnet/scripts/xyz_grid_support.py b/extensions-builtin/sd_forge_controlnet/scripts/xyz_grid_support.py new file mode 100644 index 0000000000000000000000000000000000000000..950bfe9b4a6b0cd6c3db7f87a20001dc6f2a8ac1 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/scripts/xyz_grid_support.py @@ -0,0 +1,449 @@ +import re +import numpy as np + +from modules import scripts, shared + +try: + from lib_controlnet.global_state import update_controlnet_filenames, cn_models_names, get_preprocessor_names + from lib_controlnet.external_code import ResizeMode, ControlMode + +except (ImportError, NameError): + import_error = True +else: + import_error = False + +DEBUG_MODE = False + + +def debug_info(func): + def debug_info_(*args, **kwargs): + if DEBUG_MODE: + print(f"Debug info: {func.__name__}, {args}") + return func(*args, **kwargs) + return debug_info_ + + +def find_dict(dict_list, keyword, search_key="name", stop=False): + result = next((d for d in dict_list if d[search_key] == keyword), None) + if result or not stop: + return result + else: + raise ValueError(f"Dictionary with value '{keyword}' in key '{search_key}' not found.") + + +def flatten(lst): + result = [] + for element in lst: + if isinstance(element, list): + result.extend(flatten(element)) + else: + result.append(element) + return result + + +def is_all_included(target_list, check_list, allow_blank=False, stop=False): + for element in flatten(target_list): + if allow_blank and str(element) in ["None", ""]: + continue + elif element not in check_list: + if not stop: + return False + else: + raise ValueError(f"'{element}' is not included in check list.") + return True + + +class ListParser(): + """This class restores a broken list caused by the following process + in the xyz_grid module. + -> valslist = [x.strip() for x in chain.from_iterable( + csv.reader(StringIO(vals)))] + It also performs type conversion, + adjusts the number of elements in the list, and other operations. + + This class directly modifies the received list. + """ + numeric_pattern = { + int: { + "range": r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\(([+-]\d+)\s*\))?\s*", + "count": r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\[(\d+)\s*\])?\s*" + }, + float: { + "range": r"\s*([+-]?\s*\d+(?:\.\d*)?)\s*-\s*([+-]?\s*\d+(?:\.\d*)?)(?:\s*\(([+-]\d+(?:\.\d*)?)\s*\))?\s*", + "count": r"\s*([+-]?\s*\d+(?:\.\d*)?)\s*-\s*([+-]?\s*\d+(?:\.\d*)?)(?:\s*\[(\d+(?:\.\d*)?)\s*\])?\s*" + } + } + + ################################################ + # + # Initialization method from here. + # + ################################################ + + def __init__(self, my_list, converter=None, allow_blank=True, exclude_list=None, run=True): + self.my_list = my_list + self.converter = converter + self.allow_blank = allow_blank + self.exclude_list = exclude_list + self.re_bracket_start = None + self.re_bracket_start_precheck = None + self.re_bracket_end = None + self.re_bracket_end_precheck = None + self.re_range = None + self.re_count = None + self.compile_regex() + if run: + self.auto_normalize() + + def compile_regex(self): + exclude_pattern = "|".join(self.exclude_list) if self.exclude_list else None + if exclude_pattern is None: + self.re_bracket_start = re.compile(r"^\[") + self.re_bracket_end = re.compile(r"\]$") + else: + self.re_bracket_start = re.compile(fr"^\[(?!(?:{exclude_pattern})\])") + self.re_bracket_end = re.compile(fr"(? valslist = [opt.type(x) for x in valslist] + # Perform type conversion using the function + # set to the confirm attribute instead. + # + def identity(x): + return x + + def enable_script_control(): + shared.opts.data["control_net_allow_script_control"] = True + + def apply_field(field): + @debug_info + def apply_field_(p, x, xs): + enable_script_control() + setattr(p, field, x) + + return apply_field_ + + ################################################ + # The confirm function defined in this module + # enables list notation and performs type conversion. + # + # Example: + # any = [any, any, any, ...] + # [any] = [any, None, None, ...] + # [None, None, any] = [None, None, any] + # [,,any] = [None, None, any] + # any, [,any,] = [any, any, any, ...], [None, any, None] + # + # Enabled Only: + # any = [any] = [any, None, None, ...] + # (any and [any] are considered equivalent) + # + def confirm(func_or_str): + @debug_info + def confirm_(p, xs): + if callable(func_or_str): # func_or_str is converter + ListParser(xs, func_or_str, allow_blank=True) + return + + elif isinstance(func_or_str, str): # func_or_str is keyword + valid_data = find_dict(validation_data, func_or_str, stop=True) + converter = valid_data["type"] + exclude_list = valid_data["exclude"]() if valid_data["exclude"] else None + check_list = valid_data["check"]() + + ListParser(xs, converter, allow_blank=True, exclude_list=exclude_list) + is_all_included(xs, check_list, allow_blank=True, stop=True) + return + + else: + raise TypeError(f"Argument must be callable or str, not {type(func_or_str).__name__}.") + + return confirm_ + + def bool_(string): + string = str(string) + if string in ["None", ""]: + return None + elif string.lower() in ["true", "1"]: + return True + elif string.lower() in ["false", "0"]: + return False + else: + raise ValueError(f"Could not convert string to boolean: {string}") + + def choices_bool(): + return ["False", "True"] + + def choices_model(): + update_controlnet_filenames() + return list(cn_models_names.values()) + + def choices_control_mode(): + return [e.value for e in ControlMode] + + def choices_resize_mode(): + return [e.value for e in ResizeMode] + + def choices_preprocessor(): + return list(get_preprocessor_names()) + + def make_excluded_list(): + pattern = re.compile(r"\[(\w+)\]") + return [match.group(1) for s in choices_model() + for match in pattern.finditer(s)] + + validation_data = [ + {"name": "model", "type": str, "check": choices_model, "exclude": make_excluded_list}, + {"name": "control_mode", "type": str, "check": choices_control_mode, "exclude": None}, + {"name": "resize_mode", "type": str, "check": choices_resize_mode, "exclude": None}, + {"name": "preprocessor", "type": str, "check": choices_preprocessor, "exclude": None}, + ] + + extra_axis_options = [ + xyz_grid.AxisOption("[ControlNet] Enabled", identity, apply_field("control_net_enabled"), confirm=confirm(bool_), choices=choices_bool), + xyz_grid.AxisOption("[ControlNet] Model", identity, apply_field("control_net_model"), confirm=confirm("model"), choices=choices_model, cost=0.9), + xyz_grid.AxisOption("[ControlNet] Weight", identity, apply_field("control_net_weight"), confirm=confirm(float)), + xyz_grid.AxisOption("[ControlNet] Guidance Start", identity, apply_field("control_net_guidance_start"), confirm=confirm(float)), + xyz_grid.AxisOption("[ControlNet] Guidance End", identity, apply_field("control_net_guidance_end"), confirm=confirm(float)), + xyz_grid.AxisOption("[ControlNet] Control Mode", identity, apply_field("control_net_control_mode"), confirm=confirm("control_mode"), choices=choices_control_mode), + xyz_grid.AxisOption("[ControlNet] Resize Mode", identity, apply_field("control_net_resize_mode"), confirm=confirm("resize_mode"), choices=choices_resize_mode), + xyz_grid.AxisOption("[ControlNet] Preprocessor", identity, apply_field("control_net_module"), confirm=confirm("preprocessor"), choices=choices_preprocessor), + xyz_grid.AxisOption("[ControlNet] Pre Resolution", identity, apply_field("control_net_pres"), confirm=confirm(int)), + xyz_grid.AxisOption("[ControlNet] Pre Threshold A", identity, apply_field("control_net_pthr_a"), confirm=confirm(float)), + xyz_grid.AxisOption("[ControlNet] Pre Threshold B", identity, apply_field("control_net_pthr_b"), confirm=confirm(float)), + ] + + xyz_grid.axis_options.extend(extra_axis_options) + + +def run(): + xyz_grid = find_module("xyz_grid.py, xy_grid.py") + if xyz_grid: + add_axis_options(xyz_grid) + + +if not import_error: + run() diff --git a/extensions-builtin/sd_forge_controlnet/style.css b/extensions-builtin/sd_forge_controlnet/style.css new file mode 100644 index 0000000000000000000000000000000000000000..a27207411c5c4b0e9b3988240d0a9c422b24fd39 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/style.css @@ -0,0 +1,228 @@ +/* InputAccordion alignment */ +/* Flex container */ +.controlnet .svelte-vt1mxs { + display: flex; + flex-wrap: wrap; + flex-direction: row; + gap: 10px; + /* Adjusts the space between items */ +} + +.controlnet .input-accordion { + flex: 1 1 calc(50% - 10px); + /* Adjusts for the gap, default 2 columns */ + display: flex; + align-items: center; +} + +/* Media query for screens smaller than a specific width */ +@media (max-width: 600px) { + .controlnet .input-accordion { + flex: 1 1 100%; + /* Changes to 1 column when window width is ≤ 600px */ + } +} +/* Input image thumbnail */ +.cnet-thumbnail { + height: 3rem !important; + border: 1px solid var(--button-secondary-border-color); +} + +.controlnet .input-accordion .label-wrap>span:nth-child(1) { + display: flex; + flex-direction: row; + align-items: center; + gap: 5px; +} + +.controlnet .input-accordion .icon { + height: 1rem; + width: 1rem; +} + +.controlnet .input-accordion .label-wrap { + align-items: center; +} + +.cnet-modal { + display: none; + /* Hidden by default */ + position: fixed; + /* Stay in place */ + z-index: 2147483647; + /* Sit on top */ + left: 0; + top: 0; + width: 100%; + /* Full width */ + height: 100%; + /* Full height */ + overflow: auto; + /* Enable scroll if needed */ + background-color: rgba(0, 0, 0, 0.4); + /* Black with opacity */ + max-width: none !important; + /* Fix sizing with SD.Next (vladmandic/automatic#2594) */ +} + +.cnet-modal-content { + position: relative; + background-color: var(--background-fill-primary); + margin: 5vh auto; + /* 15% from the top and centered */ + padding: 20px; + border: 1px solid #888; + width: 95%; + height: 90vh; + /* Could be more or less, depending on screen size */ + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + animation-name: animatetop; + animation-duration: 0.4s; + max-width: none !important; + /* Fix sizing with SD.Next (vladmandic/automatic#2594) */ +} + +.cnet-modal-content iframe { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; + border: none; +} + +.cnet-modal-content.alert { + padding: var(--size-5); +} + +.cnet-modal-content.alert ul { + list-style-type: none; +} + +.cnet-modal-close { + color: white !important; + right: 0.25em; + top: 0; + cursor: pointer; + position: absolute; + font-size: 56px; + font-weight: bold; +} + +@keyframes animatetop { + from { + top: -300px; + opacity: 0 + } + + to { + top: 0; + opacity: 1 + } +} + +.cnet-generated-image-control-group, +.cnet-upload-pose { + display: flex; + flex-direction: column; + align-items: flex-end; + + position: absolute; + right: var(--size-2); + bottom: var(--size-2); +} + +/* Gradio button style */ +.cnet-download-pose a, +.cnet-close-preview, +.cnet-edit-pose, +.cnet-upload-pose, +.cnet-photopea-child-trigger { + font-size: x-small !important; + font-weight: bold !important; + padding: 2px !important; + box-shadow: var(--shadow-drop); + border: 1px solid var(--button-secondary-border-color); + border-radius: var(--radius-sm); + background: var(--background-fill-primary); + height: var(--size-5); + color: var(--block-label-text-color) !important; + display: flex; + justify-content: center; + cursor: pointer; +} + +.cnet-download-pose:hover a, +.cnet-close-preview:hover a, +.cnet-edit-pose:hover, +.cnet-upload-pose:hover, +.cnet-photopea-child-trigger:hover { + color: var(--block-label-text-color) !important; +} + +.cnet-unit-active { + color: green !important; + font-weight: bold !important; +} + +.dark .cnet-unit-active { + color: greenyellow !important; +} + +.cnet-badge { + display: inline-block; + padding: 0.25em 0.75em; + font-size: 0.75em; + font-weight: bold; + color: white; + border-radius: 0.5em; + text-align: center; + vertical-align: middle; + margin-left: var(--size-2); +} + +.cnet-badge.primary { + background-color: green; +} + +.cnet-a1111-badge { + position: absolute; + bottom: 0px; + right: 0px; +} + +.cnet-disabled-radio { + opacity: 50%; +} + +.controlnet_row { + margin-top: 10px !important; +} + +/* JSON pose upload button styling */ +.cnet-upload-pose input[type=file] { + position: absolute; + left: 0; + top: 0; + opacity: 0; + width: 100%; + height: 100%; +} + +/* Photopea integration styles */ +.photopea-button-group { + position: absolute; + top: -30px; /* 20px modal padding + 10px margin */ +} + +.photopea-button { + font-size: 3rem; + font-weight: bold; + padding: 2px !important; + margin: 2px !important; + box-shadow: var(--shadow-drop); + border: 1px solid var(--button-secondary-border-color); + border-radius: var(--radius-sm); + background: var(--background-fill-primary); + color: var(--block-label-text-color); +} \ No newline at end of file diff --git a/extensions-builtin/sd_forge_controlnet/tests/conftest.py b/extensions-builtin/sd_forge_controlnet/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..c8792cd97cdb257d7cb8bd710a403b5d90099b10 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/tests/conftest.py @@ -0,0 +1,7 @@ +import os + + +def pytest_configure(config): + # We don't want to fail on Py.test command line arguments being + # parsed by webui: + os.environ.setdefault("IGNORE_CMD_ARGS_ERRORS", "1") diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/1girl.png b/extensions-builtin/sd_forge_controlnet/tests/images/1girl.png new file mode 100644 index 0000000000000000000000000000000000000000..d825e716be9b40ddcce6987fdd8aab7d1e6871a0 Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/1girl.png differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/mask.png b/extensions-builtin/sd_forge_controlnet/tests/images/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..166203af05d94bec271dc3ea34b4ce41135b998f Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/mask.png differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/mask_small.png b/extensions-builtin/sd_forge_controlnet/tests/images/mask_small.png new file mode 100644 index 0000000000000000000000000000000000000000..c48d77e47f4b3cdebf196bb427932be7d03b8dad Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/mask_small.png differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/1.webp b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/1.webp new file mode 100644 index 0000000000000000000000000000000000000000..5b9eccf01a16b051296f59b41281f9e31e0dbc74 Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/1.webp differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/2.jpg b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c16127c298190d43f2f3e63a5c25a1c592eb51d3 Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/2.jpg differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/3.jpeg b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..1936d9fc21cee9ef6bdead5368c8e4b14eea6927 Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/3.jpeg differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/4.jpg b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f0a0a8bf73953ab8da2df14d39ad1f757ed28fef Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/4.jpg differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/5.jpg b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..605a914aaf0dba16ac8937b0f5b1bb72c3f04aa8 Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/5.jpg differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/images/portrait/6.jpg b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c081789ad32b5e810bc48cd89dd82a0212b26ddb Binary files /dev/null and b/extensions-builtin/sd_forge_controlnet/tests/images/portrait/6.jpg differ diff --git a/extensions-builtin/sd_forge_controlnet/tests/web_api/__init__.py b/extensions-builtin/sd_forge_controlnet/tests/web_api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/extensions-builtin/sd_forge_controlnet/tests/web_api/detect_test.py b/extensions-builtin/sd_forge_controlnet/tests/web_api/detect_test.py new file mode 100644 index 0000000000000000000000000000000000000000..81f26b75620aa4c31d8f2cad2e2c6b12e0443101 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/tests/web_api/detect_test.py @@ -0,0 +1,63 @@ +import pytest +import requests +from typing import List + +from .template import ( + APITestTemplate, + realistic_girl_face_img, + save_base64, + get_dest_dir, + disable_in_cq, +) + + +def get_modules() -> List[str]: + return requests.get(APITestTemplate.BASE_URL + "controlnet/module_list").json()[ + "module_list" + ] + + +def detect_template(payload, output_name: str): + url = APITestTemplate.BASE_URL + "controlnet/detect" + resp = requests.post(url, json=payload) + assert resp.status_code == 200 + resp_json = resp.json() + assert "images" in resp_json + assert len(resp_json["images"]) == len(payload["controlnet_input_images"]) + if not APITestTemplate.is_cq_run: + for i, img in enumerate(resp_json["images"]): + if img == "Detect result is not image": + continue + dest = get_dest_dir() / f"{output_name}_{i}.png" + save_base64(img, dest) + return resp_json + + +@disable_in_cq +@pytest.mark.parametrize("module", get_modules()) +def test_detect_all_modules(module: str): + payload = dict( + controlnet_input_images=[realistic_girl_face_img], + controlnet_module=module, + ) + detect_template(payload, f"detect_{module}") + + +def test_detect_simple(): + detect_template( + dict( + controlnet_input_images=[realistic_girl_face_img], + controlnet_module="canny", # Canny does not require model download. + ), + "simple_detect", + ) + + +def test_detect_multiple_inputs(): + detect_template( + dict( + controlnet_input_images=[realistic_girl_face_img, realistic_girl_face_img], + controlnet_module="canny", # Canny does not require model download. + ), + "multiple_inputs_detect", + ) diff --git a/extensions-builtin/sd_forge_controlnet/tests/web_api/generation_test.py b/extensions-builtin/sd_forge_controlnet/tests/web_api/generation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..433819d15c1cfa6c20c923de50bce403354ba136 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/tests/web_api/generation_test.py @@ -0,0 +1,171 @@ +import pytest + +from .template import ( + APITestTemplate, + girl_img, + mask_img, + disable_in_cq, + get_model, +) + + +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_no_unit(gen_type): + assert APITestTemplate( + f"test_no_unit{gen_type}", + gen_type, + payload_overrides={}, + unit_overrides=[], + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_multiple_iter(gen_type): + assert APITestTemplate( + f"test_multiple_iter{gen_type}", + gen_type, + payload_overrides={"n_iter": 2}, + unit_overrides={}, + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_batch_size(gen_type): + assert APITestTemplate( + f"test_batch_size{gen_type}", + gen_type, + payload_overrides={"batch_size": 2}, + unit_overrides={}, + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_2_units(gen_type): + assert APITestTemplate( + f"test_2_units{gen_type}", + gen_type, + payload_overrides={}, + unit_overrides=[{}, {}], + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_preprocessor(gen_type): + assert APITestTemplate( + f"test_preprocessor{gen_type}", + gen_type, + payload_overrides={}, + unit_overrides={"module": "canny"}, + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("param_name", ("processor_res", "threshold_a", "threshold_b")) +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_invalid_param(gen_type, param_name): + assert APITestTemplate( + f"test_invalid_param{(gen_type, param_name)}", + gen_type, + payload_overrides={}, + unit_overrides={param_name: -1}, + input_image=girl_img, + ).exec() + + +@pytest.mark.parametrize("save_map", [True, False]) +@pytest.mark.parametrize("gen_type", ["img2img", "txt2img"]) +def test_save_map(gen_type, save_map): + assert APITestTemplate( + f"test_save_map{(gen_type, save_map)}", + gen_type, + payload_overrides={}, + unit_overrides={"save_detected_map": save_map}, + input_image=girl_img, + ).exec(expected_output_num=2 if save_map else 1) + + +@disable_in_cq +def test_masked_controlnet_txt2img(): + assert APITestTemplate( + f"test_masked_controlnet_txt2img", + "txt2img", + payload_overrides={}, + unit_overrides={ + "image": girl_img, + "mask_image": mask_img, + }, + ).exec() + + +@disable_in_cq +def test_masked_controlnet_img2img(): + assert APITestTemplate( + f"test_masked_controlnet_img2img", + "img2img", + payload_overrides={ + "init_images": [girl_img], + }, + # Note: Currently you must give ControlNet unit input image to specify + # mask. + # TODO: Fix this for img2img. + unit_overrides={ + "image": girl_img, + "mask_image": mask_img, + }, + ).exec() + + +@disable_in_cq +def test_txt2img_inpaint(): + assert APITestTemplate( + "txt2img_inpaint", + "txt2img", + payload_overrides={}, + unit_overrides={ + "image": girl_img, + "mask_image": mask_img, + "model": get_model("v11p_sd15_inpaint"), + "module": "inpaint_only", + }, + ).exec() + + +@disable_in_cq +def test_img2img_inpaint(): + assert APITestTemplate( + "img2img_inpaint", + "img2img", + payload_overrides={ + "init_images": [girl_img], + "mask": mask_img, + }, + unit_overrides={ + "model": get_model("v11p_sd15_inpaint"), + "module": "inpaint_only", + }, + ).exec() + + +# Currently failing. +# TODO Fix lama outpaint. +@disable_in_cq +def test_lama_outpaint(): + assert APITestTemplate( + "txt2img_lama_outpaint", + "txt2img", + payload_overrides={ + "width": 768, + "height": 768, + }, + # Outpaint should not need a mask. + unit_overrides={ + "image": girl_img, + "model": get_model("v11p_sd15_inpaint"), + "module": "inpaint_only+lama", + "resize_mode": "Resize and Fill", # OUTER_FIT + }, + ).exec() diff --git a/extensions-builtin/sd_forge_controlnet/tests/web_api/template.py b/extensions-builtin/sd_forge_controlnet/tests/web_api/template.py new file mode 100644 index 0000000000000000000000000000000000000000..5129e541fdd22fd7425b7ba32f9dde2b0266523e --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet/tests/web_api/template.py @@ -0,0 +1,347 @@ +import io +import os +import cv2 +import base64 +import functools +from typing import Dict, Any, List, Union, Literal, Optional +from pathlib import Path +import datetime +from enum import Enum +import numpy as np +import pytest + +import requests +from PIL import Image + + +def disable_in_cq(func): + """Skips the decorated test func in CQ run.""" + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + if APITestTemplate.is_cq_run: + pytest.skip() + return func(*args, **kwargs) + return wrapped_func + + +PayloadOverrideType = Dict[str, Any] + +timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") +test_result_dir = Path(__file__).parent / "results" / f"test_result_{timestamp}" +test_expectation_dir = Path(__file__).parent / "expectations" +os.makedirs(test_expectation_dir, exist_ok=True) +resource_dir = Path(__file__).parents[1] / "images" + + +def get_dest_dir(): + if APITestTemplate.is_set_expectation_run: + return test_expectation_dir + else: + return test_result_dir + + +def save_base64(base64img: str, dest: Path): + Image.open(io.BytesIO(base64.b64decode(base64img.split(",", 1)[0]))).save(dest) + + +def read_image(img_path: Path) -> str: + img = cv2.imread(str(img_path)) + _, bytes = cv2.imencode(".png", img) + encoded_image = base64.b64encode(bytes).decode("utf-8") + return encoded_image + + +def read_image_dir(img_dir: Path, suffixes=('.png', '.jpg', '.jpeg', '.webp')) -> List[str]: + """Try read all images in given img_dir.""" + img_dir = str(img_dir) + images = [] + for filename in os.listdir(img_dir): + if filename.endswith(suffixes): + img_path = os.path.join(img_dir, filename) + try: + images.append(read_image(img_path)) + except IOError: + print(f"Error opening {img_path}") + return images + + +girl_img = read_image(resource_dir / "1girl.png") +mask_img = read_image(resource_dir / "mask.png") +mask_small_img = read_image(resource_dir / "mask_small.png") +portrait_imgs = read_image_dir(resource_dir / "portrait") +realistic_girl_face_img = portrait_imgs[0] + + +general_negative_prompt = """ +(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, +((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, +backlight,(ugly:1.331), (duplicate:1.331), (morbid:1.21), (mutilated:1.21), +(tranny:1.331), mutated hands, (poorly drawn hands:1.331), blurry, (bad anatomy:1.21), +(bad proportions:1.331), extra limbs, (missing arms:1.331), (extra legs:1.331), +(fused fingers:1.61051), (too many fingers:1.61051), (unclear eyes:1.331), bad hands, +missing fingers, extra digit, bad body, easynegative, nsfw""" + +class StableDiffusionVersion(Enum): + """The version family of stable diffusion model.""" + + UNKNOWN = 0 + SD1x = 1 + SD2x = 2 + SDXL = 3 + + +sd_version = StableDiffusionVersion( + int(os.environ.get("CONTROLNET_TEST_SD_VERSION", StableDiffusionVersion.SD1x.value)) +) + +is_full_coverage = os.environ.get("CONTROLNET_TEST_FULL_COVERAGE", None) is not None + + +class APITestTemplate: + is_set_expectation_run = os.environ.get("CONTROLNET_SET_EXP", "True") == "True" + is_cq_run = os.environ.get("FORGE_CQ_TEST", "False") == "True" + BASE_URL = "http://localhost:7860/" + + def __init__( + self, + name: str, + gen_type: Union[Literal["img2img"], Literal["txt2img"]], + payload_overrides: PayloadOverrideType, + unit_overrides: Union[PayloadOverrideType, List[PayloadOverrideType]], + input_image: Optional[str] = None, + ): + self.name = name + self.url = APITestTemplate.BASE_URL + "sdapi/v1/" + gen_type + self.payload = { + **(txt2img_payload if gen_type == "txt2img" else img2img_payload), + **payload_overrides, + } + if gen_type == "img2img" and input_image is not None: + self.payload["init_images"] = [input_image] + + # CQ runs on CPU. Reduce steps to increase test speed. + if "steps" not in payload_overrides and APITestTemplate.is_cq_run: + self.payload["steps"] = 3 + + unit_overrides = ( + unit_overrides + if isinstance(unit_overrides, (list, tuple)) + else [unit_overrides] + ) + self.payload["alwayson_scripts"]["ControlNet"]["args"] = [ + { + **default_unit, + **unit_override, + **({"image": input_image} if gen_type == "txt2img" and input_image is not None else {}), + } + for unit_override in unit_overrides + ] + self.active_unit_count = len(unit_overrides) + + def exec(self, *args, **kwargs) -> bool: + if APITestTemplate.is_cq_run: + return self.exec_cq(*args, **kwargs) + else: + return self.exec_local(*args, **kwargs) + + def exec_cq(self, expected_output_num: Optional[int] = None, *args, **kwargs) -> bool: + """Execute test in CQ environment.""" + res = requests.post(url=self.url, json=self.payload) + if res.status_code != 200: + print(f"Unexpected status code {res.status_code}") + return False + + response = res.json() + if "images" not in response: + print(response.keys()) + return False + + if expected_output_num is None: + expected_output_num = self.payload["n_iter"] * self.payload["batch_size"] + self.active_unit_count + + if len(response["images"]) != expected_output_num: + print(f"{len(response['images'])} != {expected_output_num}") + return False + + return True + + def exec_local(self, result_only: bool = True, *args, **kwargs) -> bool: + """Execute test in local environment.""" + if not APITestTemplate.is_set_expectation_run: + os.makedirs(test_result_dir, exist_ok=True) + + failed = False + + response = requests.post(url=self.url, json=self.payload).json() + if "images" not in response: + print(response.keys()) + return False + + dest_dir = get_dest_dir() + results = response["images"][:1] if result_only else response["images"] + for i, base64image in enumerate(results): + img_file_name = f"{self.name}_{i}.png" + save_base64(base64image, dest_dir / img_file_name) + + if not APITestTemplate.is_set_expectation_run: + try: + img1 = cv2.imread(os.path.join(test_expectation_dir, img_file_name)) + img2 = cv2.imread(os.path.join(test_result_dir, img_file_name)) + except Exception as e: + print(f"Get exception reading imgs: {e}") + failed = True + continue + + if img1 is None: + print(f"Warn: No expectation file found {img_file_name}.") + continue + + if not expect_same_image( + img1, + img2, + diff_img_path=str(test_result_dir + / img_file_name.replace(".png", "_diff.png")), + ): + failed = True + return not failed + + +def expect_same_image(img1, img2, diff_img_path: str) -> bool: + # Calculate the difference between the two images + diff = cv2.absdiff(img1, img2) + + # Set a threshold to highlight the different pixels + threshold = 30 + diff_highlighted = np.where(diff > threshold, 255, 0).astype(np.uint8) + + # Assert that the two images are similar within a tolerance + similar = np.allclose(img1, img2, rtol=0.5, atol=1) + if not similar: + # Save the diff_highlighted image to inspect the differences + cv2.imwrite(diff_img_path, diff_highlighted) + + matching_pixels = np.isclose(img1, img2, rtol=0.5, atol=1) + similar_in_general = (matching_pixels.sum() / matching_pixels.size) >= 0.95 + return similar_in_general + + +def get_model(model_name: str) -> str: + """ Find an available model with specified model name.""" + if model_name.lower() == "none": + return "None" + + r = requests.get(APITestTemplate.BASE_URL + "controlnet/model_list") + result = r.json() + if "model_list" not in result: + raise ValueError("No model available") + + candidates = [ + model + for model in result["model_list"] + if model_name.lower() in model.lower() + ] + + if not candidates: + raise ValueError("No suitable model available") + + return candidates[0] + + +default_unit = { + "control_mode": 0, + "enabled": True, + "guidance_end": 1, + "guidance_start": 0, + "pixel_perfect": True, + "processor_res": 512, + "resize_mode": 1, + "threshold_a": 64, + "threshold_b": 64, + "weight": 1, + "module": "canny", + "model": get_model("sd15_canny"), +} + +img2img_payload = { + "batch_size": 1, + "cfg_scale": 7, + "height": 768, + "width": 512, + "n_iter": 1, + "steps": 10, + "sampler_name": "Euler a", + "prompt": "(masterpiece: 1.3), (highres: 1.3), best quality,", + "negative_prompt": "", + "seed": 42, + "seed_enable_extras": False, + "seed_resize_from_h": 0, + "seed_resize_from_w": 0, + "subseed": -1, + "subseed_strength": 0, + "override_settings": {}, + "override_settings_restore_afterwards": False, + "do_not_save_grid": False, + "do_not_save_samples": False, + "s_churn": 0, + "s_min_uncond": 0, + "s_noise": 1, + "s_tmax": None, + "s_tmin": 0, + "script_args": [], + "script_name": None, + "styles": [], + "alwayson_scripts": {"ControlNet": {"args": [default_unit]}}, + "denoising_strength": 0.75, + "initial_noise_multiplier": 1, + "inpaint_full_res": 0, + "inpaint_full_res_padding": 32, + "inpainting_fill": 1, + "inpainting_mask_invert": 0, + "mask_blur_x": 4, + "mask_blur_y": 4, + "mask_blur": 4, + "resize_mode": 0, +} + +txt2img_payload = { + "alwayson_scripts": {"ControlNet": {"args": [default_unit]}}, + "batch_size": 1, + "cfg_scale": 7, + "comments": {}, + "disable_extra_networks": False, + "do_not_save_grid": False, + "do_not_save_samples": False, + "enable_hr": False, + "height": 768, + "hr_negative_prompt": "", + "hr_prompt": "", + "hr_resize_x": 0, + "hr_resize_y": 0, + "hr_scale": 2, + "hr_second_pass_steps": 0, + "hr_upscaler": "Latent", + "n_iter": 1, + "negative_prompt": "", + "override_settings": {}, + "override_settings_restore_afterwards": True, + "prompt": "(masterpiece: 1.3), (highres: 1.3), best quality,", + "restore_faces": False, + "s_churn": 0.0, + "s_min_uncond": 0, + "s_noise": 1.0, + "s_tmax": None, + "s_tmin": 0.0, + "sampler_name": "Euler a", + "script_args": [], + "script_name": None, + "seed": 42, + "seed_enable_extras": True, + "seed_resize_from_h": -1, + "seed_resize_from_w": -1, + "steps": 10, + "styles": [], + "subseed": -1, + "subseed_strength": 0, + "tiling": False, + "width": 512, +} diff --git a/extensions-builtin/sd_forge_controlnet_example/preload.py b/extensions-builtin/sd_forge_controlnet_example/preload.py new file mode 100644 index 0000000000000000000000000000000000000000..ddc29489a7ba22318807c75897ef4fbb78c8cd78 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet_example/preload.py @@ -0,0 +1,6 @@ +def preload(parser): + parser.add_argument( + "--show-controlnet-example", + action="store_true", + help="Show development example extension for ControlNet.", + ) diff --git a/extensions-builtin/sd_forge_controlnet_example/scripts/sd_forge_controlnet_example.py b/extensions-builtin/sd_forge_controlnet_example/scripts/sd_forge_controlnet_example.py new file mode 100644 index 0000000000000000000000000000000000000000..9c10cb23bfdb636cde376fd5abdbb6446fecd8c0 --- /dev/null +++ b/extensions-builtin/sd_forge_controlnet_example/scripts/sd_forge_controlnet_example.py @@ -0,0 +1,160 @@ +# Use --show-controlnet-example to see this extension. + +import cv2 +import gradio as gr +import torch + +from modules import scripts +from modules.shared_cmd_options import cmd_opts +from modules_forge.shared import supported_preprocessors +from modules.modelloader import load_file_from_url +from ldm_patched.modules.controlnet import load_controlnet +from modules_forge.controlnet import apply_controlnet_advanced +from modules_forge.forge_util import numpy_to_pytorch +from modules_forge.shared import controlnet_dir + + +class ControlNetExampleForge(scripts.Script): + model = None + + def title(self): + return "ControlNet Example for Developers" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + gr.HTML('This is an example controlnet extension for developers.') + gr.HTML('You see this extension because you used --show-controlnet-example') + input_image = gr.Image(source='upload', type='numpy') + funny_slider = gr.Slider(label='This slider does nothing. It just shows you how to transfer parameters.', + minimum=0.0, maximum=1.0, value=0.5) + + return input_image, funny_slider + + def process(self, p, *script_args, **kwargs): + input_image, funny_slider = script_args + + # This slider does nothing. It just shows you how to transfer parameters. + del funny_slider + + if input_image is None: + return + + # controlnet_canny_path = load_file_from_url( + # url='https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/sai_xl_canny_256lora.safetensors', + # model_dir=model_dir, + # file_name='sai_xl_canny_256lora.safetensors' + # ) + controlnet_canny_path = load_file_from_url( + url='https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/control_v11p_sd15_canny_fp16.safetensors', + model_dir=controlnet_dir, + file_name='control_v11p_sd15_canny_fp16.safetensors' + ) + print('The model [control_v11p_sd15_canny_fp16.safetensors] download finished.') + + self.model = load_controlnet(controlnet_canny_path) + print('Controlnet loaded.') + + return + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + input_image, funny_slider = script_args + + if input_image is None or self.model is None: + return + + B, C, H, W = kwargs['noise'].shape # latent_shape + height = H * 8 + width = W * 8 + batch_size = p.batch_size + + preprocessor = supported_preprocessors['canny'] + + # detect control at certain resolution + control_image = preprocessor( + input_image, resolution=512, slider_1=100, slider_2=200, slider_3=None) + + # here we just use nearest neighbour to align input shape. + # You may want crop and resize, or crop and fill, or others. + control_image = cv2.resize( + control_image, (width, height), interpolation=cv2.INTER_NEAREST) + + # Output preprocessor result. Now called every sampling. Cache in your own way. + p.extra_result_images.append(control_image) + + print('Preprocessor Canny finished.') + + control_image_bchw = numpy_to_pytorch(control_image).movedim(-1, 1) + + unet = p.sd_model.forge_objects.unet + + # Unet has input, middle, output blocks, and we can give different weights + # to each layers in all blocks. + # Below is an example for stronger control in middle block. + # This is helpful for some high-res fix passes. (p.is_hr_pass) + positive_advanced_weighting = { + 'input': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2], + 'middle': [1.0], + 'output': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2] + } + negative_advanced_weighting = { + 'input': [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 1.15, 1.25], + 'middle': [1.05], + 'output': [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 1.15, 1.25] + } + + # The advanced_frame_weighting is a weight applied to each image in a batch. + # The length of this list must be same with batch size + # For example, if batch size is 5, the below list is [0.2, 0.4, 0.6, 0.8, 1.0] + # If you view the 5 images as 5 frames in a video, this will lead to + # progressively stronger control over time. + advanced_frame_weighting = [float(i + 1) / float(batch_size) for i in range(batch_size)] + + # The advanced_sigma_weighting allows you to dynamically compute control + # weights given diffusion timestep (sigma). + # For example below code can softly make beginning steps stronger than ending steps. + sigma_max = unet.model.model_sampling.sigma_max + sigma_min = unet.model.model_sampling.sigma_min + advanced_sigma_weighting = lambda s: (s - sigma_min) / (sigma_max - sigma_min) + + # You can even input a tensor to mask all control injections + # The mask will be automatically resized during inference in UNet. + # The size should be B 1 H W and the H and W are not important + # because they will be resized automatically + advanced_mask_weighting = torch.ones(size=(1, 1, 512, 512)) + + # But in this simple example we do not use them + positive_advanced_weighting = None + negative_advanced_weighting = None + advanced_frame_weighting = None + advanced_sigma_weighting = None + advanced_mask_weighting = None + + unet = apply_controlnet_advanced(unet=unet, controlnet=self.model, image_bchw=control_image_bchw, + strength=0.6, start_percent=0.0, end_percent=0.8, + positive_advanced_weighting=positive_advanced_weighting, + negative_advanced_weighting=negative_advanced_weighting, + advanced_frame_weighting=advanced_frame_weighting, + advanced_sigma_weighting=advanced_sigma_weighting, + advanced_mask_weighting=advanced_mask_weighting) + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + controlnet_info='You should see these texts below output images!', + )) + + return + + +# Use --show-controlnet-example to see this extension. +if not cmd_opts.show_controlnet_example: + del ControlNetExampleForge diff --git a/extensions-builtin/sd_forge_dynamic_thresholding/LICENSE.txt b/extensions-builtin/sd_forge_dynamic_thresholding/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..560a9bf3eafa56c30f36a6f8b70304eadbd66cc2 --- /dev/null +++ b/extensions-builtin/sd_forge_dynamic_thresholding/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2023 Alex "mcmonkey" Goodwin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres.py b/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres.py new file mode 100644 index 0000000000000000000000000000000000000000..9c55240f73ccaa0c9ef1112534e651b1b5059b11 --- /dev/null +++ b/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres.py @@ -0,0 +1,49 @@ +# https://github.com/mcmonkeyprojects/sd-dynamic-thresholding + + +from lib_dynamic_thresholding.dynthres_core import DynThresh + + +class DynamicThresholdingNode: + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "mimic_scale": ("FLOAT", {"default": 7.0, "min": 0.0, "max": 100.0, "step": 0.5}), + "threshold_percentile": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + "mimic_mode": (DynThresh.Modes, ), + "mimic_scale_min": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.5}), + "cfg_mode": (DynThresh.Modes, ), + "cfg_scale_min": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.5}), + "sched_val": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}), + "separate_feature_channels": (["enable", "disable"], ), + "scaling_startpoint": (DynThresh.Startpoints, ), + "variability_measure": (DynThresh.Variabilities, ), + "interpolate_phi": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + CATEGORY = "advanced/mcmonkey" + + def patch(self, model, mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, cfg_mode, cfg_scale_min, sched_val, separate_feature_channels, scaling_startpoint, variability_measure, interpolate_phi): + + dynamic_thresh = DynThresh(mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, cfg_mode, cfg_scale_min, sched_val, 0, 999, separate_feature_channels == "enable", scaling_startpoint, variability_measure, interpolate_phi) + + def sampler_dyn_thresh(args): + input = args["input"] + cond = input - args["cond"] + uncond = input - args["uncond"] + cond_scale = args["cond_scale"] + time_step = model.model.model_sampling.timestep(args["sigma"]) + time_step = time_step[0].item() + dynamic_thresh.step = 999 - time_step + + return input - dynamic_thresh.dynthresh(cond, uncond, cond_scale, None) + + m = model.clone() + m.set_model_sampler_cfg_function(sampler_dyn_thresh) + return (m, ) diff --git a/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres_core.py b/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres_core.py new file mode 100644 index 0000000000000000000000000000000000000000..f697441ef068e66cc2fb7aeea75e078b6f832ff1 --- /dev/null +++ b/extensions-builtin/sd_forge_dynamic_thresholding/lib_dynamic_thresholding/dynthres_core.py @@ -0,0 +1,170 @@ +# https://github.com/mcmonkeyprojects/sd-dynamic-thresholding + + +import torch, math + +######################### DynThresh Core ######################### + +class DynThresh: + + Modes = ["Constant", "Linear Down", "Cosine Down", "Half Cosine Down", "Linear Up", "Cosine Up", "Half Cosine Up", "Power Up", "Power Down", "Linear Repeating", "Cosine Repeating", "Sawtooth"] + Startpoints = ["MEAN", "ZERO"] + Variabilities = ["AD", "STD"] + + def __init__(self, mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, cfg_mode, cfg_scale_min, sched_val, experiment_mode, max_steps, separate_feature_channels, scaling_startpoint, variability_measure, interpolate_phi): + self.mimic_scale = mimic_scale + self.threshold_percentile = threshold_percentile + self.mimic_mode = mimic_mode + self.cfg_mode = cfg_mode + self.max_steps = max_steps + self.cfg_scale_min = cfg_scale_min + self.mimic_scale_min = mimic_scale_min + self.experiment_mode = experiment_mode + self.sched_val = sched_val + self.sep_feat_channels = separate_feature_channels + self.scaling_startpoint = scaling_startpoint + self.variability_measure = variability_measure + self.interpolate_phi = interpolate_phi + + def interpret_scale(self, scale, mode, min): + scale -= min + max = self.max_steps - 1 + frac = self.step / max + if mode == "Constant": + pass + elif mode == "Linear Down": + scale *= 1.0 - frac + elif mode == "Half Cosine Down": + scale *= math.cos(frac) + elif mode == "Cosine Down": + scale *= math.cos(frac * 1.5707) + elif mode == "Linear Up": + scale *= frac + elif mode == "Half Cosine Up": + scale *= 1.0 - math.cos(frac) + elif mode == "Cosine Up": + scale *= 1.0 - math.cos(frac * 1.5707) + elif mode == "Power Up": + scale *= math.pow(frac, self.sched_val) + elif mode == "Power Down": + scale *= 1.0 - math.pow(frac, self.sched_val) + elif mode == "Linear Repeating": + portion = (frac * self.sched_val) % 1.0 + scale *= (0.5 - portion) * 2 if portion < 0.5 else (portion - 0.5) * 2 + elif mode == "Cosine Repeating": + scale *= math.cos(frac * 6.28318 * self.sched_val) * 0.5 + 0.5 + elif mode == "Sawtooth": + scale *= (frac * self.sched_val) % 1.0 + scale += min + return scale + + def dynthresh(self, cond, uncond, cfg_scale, weights): + mimic_scale = self.interpret_scale(self.mimic_scale, self.mimic_mode, self.mimic_scale_min) + cfg_scale = self.interpret_scale(cfg_scale, self.cfg_mode, self.cfg_scale_min) + # uncond shape is (batch, 4, height, width) + conds_per_batch = cond.shape[0] / uncond.shape[0] + assert conds_per_batch == int(conds_per_batch), "Expected # of conds per batch to be constant across batches" + cond_stacked = cond.reshape((-1, int(conds_per_batch)) + uncond.shape[1:]) + + ### Normal first part of the CFG Scale logic, basically + diff = cond_stacked - uncond.unsqueeze(1) + if weights is not None: + diff = diff * weights + relative = diff.sum(1) + + ### Get the normal result for both mimic and normal scale + mim_target = uncond + relative * mimic_scale + cfg_target = uncond + relative * cfg_scale + ### If we weren't doing mimic scale, we'd just return cfg_target here + + ### Now recenter the values relative to their average rather than absolute, to allow scaling from average + mim_flattened = mim_target.flatten(2) + cfg_flattened = cfg_target.flatten(2) + mim_means = mim_flattened.mean(dim=2).unsqueeze(2) + cfg_means = cfg_flattened.mean(dim=2).unsqueeze(2) + mim_centered = mim_flattened - mim_means + cfg_centered = cfg_flattened - cfg_means + + if self.sep_feat_channels: + if self.variability_measure == 'STD': + mim_scaleref = mim_centered.std(dim=2).unsqueeze(2) + cfg_scaleref = cfg_centered.std(dim=2).unsqueeze(2) + else: # 'AD' + mim_scaleref = mim_centered.abs().max(dim=2).values.unsqueeze(2) + cfg_scaleref = torch.quantile(cfg_centered.abs(), self.threshold_percentile, dim=2).unsqueeze(2) + + else: + if self.variability_measure == 'STD': + mim_scaleref = mim_centered.std() + cfg_scaleref = cfg_centered.std() + else: # 'AD' + mim_scaleref = mim_centered.abs().max() + cfg_scaleref = torch.quantile(cfg_centered.abs(), self.threshold_percentile) + + if self.scaling_startpoint == 'ZERO': + scaling_factor = mim_scaleref / cfg_scaleref + result = cfg_flattened * scaling_factor + + else: # 'MEAN' + if self.variability_measure == 'STD': + cfg_renormalized = (cfg_centered / cfg_scaleref) * mim_scaleref + else: # 'AD' + ### Get the maximum value of all datapoints (with an optional threshold percentile on the uncond) + max_scaleref = torch.maximum(mim_scaleref, cfg_scaleref) + ### Clamp to the max + cfg_clamped = cfg_centered.clamp(-max_scaleref, max_scaleref) + ### Now shrink from the max to normalize and grow to the mimic scale (instead of the CFG scale) + cfg_renormalized = (cfg_clamped / max_scaleref) * mim_scaleref + + ### Now add it back onto the averages to get into real scale again and return + result = cfg_renormalized + cfg_means + + actual_res = result.unflatten(2, mim_target.shape[2:]) + + if self.interpolate_phi != 1.0: + actual_res = actual_res * self.interpolate_phi + cfg_target * (1.0 - self.interpolate_phi) + + if self.experiment_mode == 1: + num = actual_res.cpu().numpy() + for y in range(0, 64): + for x in range (0, 64): + if num[0][0][y][x] > 1.0: + num[0][1][y][x] *= 0.5 + if num[0][1][y][x] > 1.0: + num[0][1][y][x] *= 0.5 + if num[0][2][y][x] > 1.5: + num[0][2][y][x] *= 0.5 + actual_res = torch.from_numpy(num).to(device=uncond.device) + elif self.experiment_mode == 2: + num = actual_res.cpu().numpy() + for y in range(0, 64): + for x in range (0, 64): + over_scale = False + for z in range(0, 4): + if abs(num[0][z][y][x]) > 1.5: + over_scale = True + if over_scale: + for z in range(0, 4): + num[0][z][y][x] *= 0.7 + actual_res = torch.from_numpy(num).to(device=uncond.device) + elif self.experiment_mode == 3: + coefs = torch.tensor([ + # R G B W + [0.298, 0.207, 0.208, 0.0], # L1 + [0.187, 0.286, 0.173, 0.0], # L2 + [-0.158, 0.189, 0.264, 0.0], # L3 + [-0.184, -0.271, -0.473, 1.0], # L4 + ], device=uncond.device) + res_rgb = torch.einsum("laxy,ab -> lbxy", actual_res, coefs) + max_r, max_g, max_b, max_w = res_rgb[0][0].max(), res_rgb[0][1].max(), res_rgb[0][2].max(), res_rgb[0][3].max() + max_rgb = max(max_r, max_g, max_b) + print(f"test max = r={max_r}, g={max_g}, b={max_b}, w={max_w}, rgb={max_rgb}") + if self.step / (self.max_steps - 1) > 0.2: + if max_rgb < 2.0 and max_w < 3.0: + res_rgb /= max_rgb / 2.4 + else: + if max_rgb > 2.4 and max_w > 3.0: + res_rgb /= max_rgb / 2.4 + actual_res = torch.einsum("laxy,ab -> lbxy", res_rgb, coefs.inverse()) + + return actual_res diff --git a/extensions-builtin/sd_forge_dynamic_thresholding/scripts/forge_dynamic_thresholding.py b/extensions-builtin/sd_forge_dynamic_thresholding/scripts/forge_dynamic_thresholding.py new file mode 100644 index 0000000000000000000000000000000000000000..d1ae209af979d8a8fd96b749a6e371a418cec2bf --- /dev/null +++ b/extensions-builtin/sd_forge_dynamic_thresholding/scripts/forge_dynamic_thresholding.py @@ -0,0 +1,81 @@ +import gradio as gr + +from modules import scripts +from lib_dynamic_thresholding.dynthres import DynamicThresholdingNode + +opDynamicThresholdingNode = DynamicThresholdingNode().patch + + +class DynamicThresholdingForForge(scripts.Script): + sorting_priority = 11 + + def title(self): + return "DynamicThresholding (CFG-Fix) Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + mimic_scale = gr.Slider(label='Mimic Scale', minimum=0.0, maximum=100.0, step=0.5, value=7.0) + threshold_percentile = gr.Slider(label='Threshold Percentile', minimum=0.0, maximum=1.0, step=0.01, + value=1.0) + mimic_mode = gr.Radio(label='Mimic Mode', + choices=['Constant', 'Linear Down', 'Cosine Down', 'Half Cosine Down', 'Linear Up', + 'Cosine Up', 'Half Cosine Up', 'Power Up', 'Power Down', 'Linear Repeating', + 'Cosine Repeating', 'Sawtooth'], value='Constant') + mimic_scale_min = gr.Slider(label='Mimic Scale Min', minimum=0.0, maximum=100.0, step=0.5, value=0.0) + cfg_mode = gr.Radio(label='Cfg Mode', + choices=['Constant', 'Linear Down', 'Cosine Down', 'Half Cosine Down', 'Linear Up', + 'Cosine Up', 'Half Cosine Up', 'Power Up', 'Power Down', 'Linear Repeating', + 'Cosine Repeating', 'Sawtooth'], value='Constant') + cfg_scale_min = gr.Slider(label='Cfg Scale Min', minimum=0.0, maximum=100.0, step=0.5, value=0.0) + sched_val = gr.Slider(label='Sched Val', minimum=0.0, maximum=100.0, step=0.01, value=1.0) + separate_feature_channels = gr.Radio(label='Separate Feature Channels', choices=['enable', 'disable'], + value='enable') + scaling_startpoint = gr.Radio(label='Scaling Startpoint', choices=['MEAN', 'ZERO'], value='MEAN') + variability_measure = gr.Radio(label='Variability Measure', choices=['AD', 'STD'], value='AD') + interpolate_phi = gr.Slider(label='Interpolate Phi', minimum=0.0, maximum=1.0, step=0.01, value=1.0) + + return enabled, mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, cfg_mode, cfg_scale_min, \ + sched_val, separate_feature_channels, scaling_startpoint, variability_measure, interpolate_phi + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + enabled, mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, cfg_mode, cfg_scale_min, \ + sched_val, separate_feature_channels, scaling_startpoint, variability_measure, \ + interpolate_phi = script_args + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opDynamicThresholdingNode(unet, mimic_scale, threshold_percentile, mimic_mode, mimic_scale_min, + cfg_mode, cfg_scale_min, sched_val, separate_feature_channels, + scaling_startpoint, variability_measure, interpolate_phi)[0] + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + dynthres_enabled=enabled, + dynthres_mimic_scale=mimic_scale, + dynthres_threshold_percentile=threshold_percentile, + dynthres_mimic_mode=mimic_mode, + dynthres_mimic_scale_min=mimic_scale_min, + dynthres_cfg_mode=cfg_mode, + dynthres_cfg_scale_min=cfg_scale_min, + dynthres_sched_val=sched_val, + dynthres_separate_feature_channels=separate_feature_channels, + dynthres_scaling_startpoint=scaling_startpoint, + dynthres_variability_measure=variability_measure, + dynthres_interpolate_phi=interpolate_phi, + )) + + return diff --git a/extensions-builtin/sd_forge_fooocus_inpaint/scripts/fooocus_inpaint_head b/extensions-builtin/sd_forge_fooocus_inpaint/scripts/fooocus_inpaint_head new file mode 100644 index 0000000000000000000000000000000000000000..a5a3030a0521c9c08abad0a89923f226a365dfe1 Binary files /dev/null and b/extensions-builtin/sd_forge_fooocus_inpaint/scripts/fooocus_inpaint_head differ diff --git a/extensions-builtin/sd_forge_fooocus_inpaint/scripts/forge_fooocus_inpaint.py b/extensions-builtin/sd_forge_fooocus_inpaint/scripts/forge_fooocus_inpaint.py new file mode 100644 index 0000000000000000000000000000000000000000..15d8f8099e1373ea5153b1d6d8e6ca874a8f49dc --- /dev/null +++ b/extensions-builtin/sd_forge_fooocus_inpaint/scripts/forge_fooocus_inpaint.py @@ -0,0 +1,131 @@ +import os +import torch +import copy + +from modules_forge.shared import add_supported_control_model +from modules_forge.supported_controlnet import ControlModelPatcher +from modules_forge.forge_sampler import sampling_prepare +from ldm_patched.modules.utils import load_torch_file +from ldm_patched.modules import model_patcher +from ldm_patched.modules.model_management import cast_to_device, current_loaded_models +from ldm_patched.modules.lora import model_lora_keys_unet + + +def is_model_loaded(model): + return any(model == m.model for m in current_loaded_models) + + +class InpaintHead(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.head = torch.nn.Parameter(torch.empty(size=(320, 5, 3, 3), device="cpu")) + + def __call__(self, x): + x = torch.nn.functional.pad(x, (1, 1, 1, 1), "replicate") + return torch.nn.functional.conv2d(input=x, weight=self.head) + + +def load_fooocus_patch(lora: dict, to_load: dict): + patch_dict = {} + loaded_keys = set() + for key in to_load.values(): + if value := lora.get(key, None): + patch_dict[key] = ("fooocus", value) + loaded_keys.add(key) + + not_loaded = sum(1 for x in lora if x not in loaded_keys) + print(f"[Fooocus Patch Loader] {len(loaded_keys)} keys loaded, {not_loaded} remaining keys not found in model.") + return patch_dict + + +def calculate_weight_fooocus(weight, alpha, v): + w1 = cast_to_device(v[0], weight.device, torch.float32) + if w1.shape == weight.shape: + w_min = cast_to_device(v[1], weight.device, torch.float32) + w_max = cast_to_device(v[2], weight.device, torch.float32) + w1 = (w1 / 255.0) * (w_max - w_min) + w_min + weight += alpha * cast_to_device(w1, weight.device, weight.dtype) + else: + print(f"[Fooocus Patch Loader] weight not merged ({w1.shape} != {weight.shape})") + return weight + + +class FooocusInpaintPatcher(ControlModelPatcher): + @staticmethod + def try_build_from_state_dict(state_dict, ckpt_path): + if 'diffusion_model.time_embed.0.weight' in state_dict: + if len(state_dict['diffusion_model.time_embed.0.weight']) == 3: + return FooocusInpaintPatcher(state_dict) + + return None + + def __init__(self, state_dict): + super().__init__() + self.state_dict = state_dict + self.inpaint_head = InpaintHead().to(device=torch.device('cpu'), dtype=torch.float32) + self.inpaint_head.load_state_dict(load_torch_file(os.path.join(os.path.dirname(__file__), 'fooocus_inpaint_head'))) + + return + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + cond_original = kwargs['cond_original'] + mask_original = kwargs['mask_original'] + + unet_original = process.sd_model.forge_objects.unet.clone() + unet = process.sd_model.forge_objects.unet.clone() + vae = process.sd_model.forge_objects.vae + + latent_image = vae.encode(cond_original.movedim(1, -1)) + latent_image = process.sd_model.forge_objects.unet.model.latent_format.process_in(latent_image) + latent_mask = torch.nn.functional.max_pool2d(mask_original, (8, 8)).round().to(cond) + feed = torch.cat([ + latent_mask.to(device=torch.device('cpu'), dtype=torch.float32), + latent_image.to(device=torch.device('cpu'), dtype=torch.float32) + ], dim=1) + inpaint_head_feature = self.inpaint_head(feed) + + def input_block_patch(h, transformer_options): + if transformer_options["block"][1] == 0: + h = h + inpaint_head_feature.to(h) + return h + + unet.set_model_input_block_patch(input_block_patch) + + lora_keys = model_lora_keys_unet(unet.model, {}) + lora_keys.update({x: x for x in unet.model.state_dict().keys()}) + loaded_lora = load_fooocus_patch(self.state_dict, lora_keys) + + patched = unet.add_patches(loaded_lora, 1.0) + + not_patched_count = sum(1 for x in loaded_lora if x not in patched) + + if not_patched_count > 0: + print(f"[Fooocus Patch Loader] Failed to load {not_patched_count} keys") + + sigma_start = unet.model.model_sampling.percent_to_sigma(self.start_percent) + sigma_end = unet.model.model_sampling.percent_to_sigma(self.end_percent) + + def conditioning_modifier(model, x, timestep, uncond, cond, cond_scale, model_options, seed): + if timestep > sigma_start or timestep < sigma_end: + target_model = unet_original + model_options = copy.deepcopy(model_options) + if 'transformer_options' in model_options: + if 'patches' in model_options['transformer_options']: + if 'input_block_patch' in model_options['transformer_options']['patches']: + del model_options['transformer_options']['patches']['input_block_patch'] + else: + target_model = unet + + if not is_model_loaded(target_model): + sampling_prepare(target_model, x) + + return target_model.model, x, timestep, uncond, cond, cond_scale, model_options, seed + + unet.add_conditioning_modifier(conditioning_modifier) + + process.sd_model.forge_objects.unet = unet + return + + +model_patcher.extra_weight_calculators['fooocus'] = calculate_weight_fooocus +add_supported_control_model(FooocusInpaintPatcher) diff --git a/extensions-builtin/sd_forge_freeu/scripts/forge_freeu.py b/extensions-builtin/sd_forge_freeu/scripts/forge_freeu.py new file mode 100644 index 0000000000000000000000000000000000000000..07864802d8cf3fb3a17cda740cad0c193ec516e2 --- /dev/null +++ b/extensions-builtin/sd_forge_freeu/scripts/forge_freeu.py @@ -0,0 +1,91 @@ +import gradio as gr + +from modules import scripts +from ldm_patched.contrib.external_freelunch import FreeU_V2 + + +opFreeU_V2 = FreeU_V2() + + +# def Fourier_filter(x, threshold, scale): +# x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) +# x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) +# B, C, H, W = x_freq.shape +# mask = torch.ones((B, C, H, W), device=x.device) +# crow, ccol = H // 2, W //2 +# mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale +# x_freq = x_freq * mask +# x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) +# x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real +# return x_filtered.to(x.dtype) +# +# +# def set_freeu_v2_patch(model, b1, b2, s1, s2): +# model_channels = model.model.model_config.unet_config["model_channels"] +# scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} +# +# def output_block_patch(h, hsp, *args, **kwargs): +# scale = scale_dict.get(h.shape[1], None) +# if scale is not None: +# hidden_mean = h.mean(1).unsqueeze(1) +# B = hidden_mean.shape[0] +# hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) +# hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) +# hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / \ +# (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) +# h[:, :h.shape[1] // 2] = h[:, :h.shape[1] // 2] * ((scale[0] - 1) * hidden_mean + 1) +# hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) +# return h, hsp +# +# m = model.clone() +# m.set_model_output_block_patch(output_block_patch) +# return m + + +class FreeUForForge(scripts.Script): + sorting_priority = 12 + + def title(self): + return "FreeU Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + freeu_enabled = gr.Checkbox(label='Enabled', value=False) + freeu_b1 = gr.Slider(label='B1', minimum=0, maximum=2, step=0.01, value=1.01) + freeu_b2 = gr.Slider(label='B2', minimum=0, maximum=2, step=0.01, value=1.02) + freeu_s1 = gr.Slider(label='S1', minimum=0, maximum=4, step=0.01, value=0.99) + freeu_s2 = gr.Slider(label='S2', minimum=0, maximum=4, step=0.01, value=0.95) + + return freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = script_args + + if not freeu_enabled: + return + + unet = p.sd_model.forge_objects.unet + + # unet = set_freeu_v2_patch(unet, freeu_b1, freeu_b2, freeu_s1, freeu_s2) + unet = opFreeU_V2.patch(unet, freeu_b1, freeu_b2, freeu_s1, freeu_s2)[0] + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + freeu_enabled=freeu_enabled, + freeu_b1=freeu_b1, + freeu_b2=freeu_b2, + freeu_s1=freeu_s1, + freeu_s2=freeu_s2, + )) + + return diff --git a/extensions-builtin/sd_forge_hypertile/scripts/forge_hypertile.py b/extensions-builtin/sd_forge_hypertile/scripts/forge_hypertile.py new file mode 100644 index 0000000000000000000000000000000000000000..05ea101fa9a0e35ba61c3bf063c7c3a2ed9081b2 --- /dev/null +++ b/extensions-builtin/sd_forge_hypertile/scripts/forge_hypertile.py @@ -0,0 +1,50 @@ +import gradio as gr + +from modules import scripts +from ldm_patched.contrib.external_hypertile import HyperTile + + +opHyperTile = HyperTile() + + +class HyperTileForForge(scripts.Script): + sorting_priority = 13 + + def title(self): + return "HyperTile Integrated" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + tile_size = gr.Slider(label='Tile Size', minimum=1, maximum=2048, step=1, value=256) + swap_size = gr.Slider(label='Swap Size', minimum=1, maximum=128, step=1, value=2) + max_depth = gr.Slider(label='Max Depth', minimum=0, maximum=10, step=1, value=0) + scale_depth = gr.Checkbox(label='Scale Depth', value=False) + + return enabled, tile_size, swap_size, max_depth, scale_depth + + def process_before_every_sampling(self, p, *script_args, **kwargs): + enabled, tile_size, swap_size, max_depth, scale_depth = script_args + tile_size, swap_size, max_depth = int(tile_size), int(swap_size), int(max_depth) + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opHyperTile.patch(unet, tile_size, swap_size, max_depth, scale_depth)[0] + + p.sd_model.forge_objects.unet = unet + + p.extra_generation_params.update(dict( + HyperTile_enabled=enabled, + HyperTile_tile_size=tile_size, + HyperTile_swap_size=swap_size, + HyperTile_max_depth=max_depth, + HyperTile_scale_depth=scale_depth, + )) + + return diff --git a/extensions-builtin/sd_forge_ipadapter/LICENSE b/extensions-builtin/sd_forge_ipadapter/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f288702d2fa16d3cdf0035b15a9fcbc552cd88e7 --- /dev/null +++ b/extensions-builtin/sd_forge_ipadapter/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/IPAdapterPlus.py b/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/IPAdapterPlus.py new file mode 100644 index 0000000000000000000000000000000000000000..e35b8d600e55c0a1f785b33f8b34eeaa2ef9d863 --- /dev/null +++ b/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/IPAdapterPlus.py @@ -0,0 +1,1102 @@ +# https://github.com/cubiq/ComfyUI_IPAdapter_plus/blob/main/IPAdapterPlus.py + +import torch +import contextlib +import os +import math + +import ldm_patched.modules.utils +import ldm_patched.modules.model_management +from ldm_patched.modules.clip_vision import clip_preprocess +from ldm_patched.ldm.modules.attention import optimized_attention +from ldm_patched.utils import path_utils as folder_paths + +from torch import nn +from PIL import Image +import torch.nn.functional as F +import torchvision.transforms as TT + +from lib_ipadapter.resampler import PerceiverAttention, FeedForward, Resampler + +# set the models directory backward compatible +GLOBAL_MODELS_DIR = os.path.join(folder_paths.models_dir, "ipadapter") +MODELS_DIR = GLOBAL_MODELS_DIR if os.path.isdir(GLOBAL_MODELS_DIR) else os.path.join(os.path.dirname(os.path.realpath(__file__)), "models") +if "ipadapter" not in folder_paths.folder_names_and_paths: + current_paths = [MODELS_DIR] +else: + current_paths, _ = folder_paths.folder_names_and_paths["ipadapter"] +folder_paths.folder_names_and_paths["ipadapter"] = (current_paths, folder_paths.supported_pt_extensions) + +INSIGHTFACE_DIR = os.path.join(folder_paths.models_dir, "insightface") + +class FacePerceiverResampler(torch.nn.Module): + def __init__( + self, + *, + dim=768, + depth=4, + dim_head=64, + heads=16, + embedding_dim=1280, + output_dim=768, + ff_mult=4, + ): + super().__init__() + + self.proj_in = torch.nn.Linear(embedding_dim, dim) + self.proj_out = torch.nn.Linear(dim, output_dim) + self.norm_out = torch.nn.LayerNorm(output_dim) + self.layers = torch.nn.ModuleList([]) + for _ in range(depth): + self.layers.append( + torch.nn.ModuleList( + [ + PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), + FeedForward(dim=dim, mult=ff_mult), + ] + ) + ) + + def forward(self, latents, x): + x = self.proj_in(x) + for attn, ff in self.layers: + latents = attn(x, latents) + latents + latents = ff(latents) + latents + latents = self.proj_out(latents) + return self.norm_out(latents) + +class MLPProjModel(torch.nn.Module): + def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024): + super().__init__() + + self.proj = torch.nn.Sequential( + torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim), + torch.nn.GELU(), + torch.nn.Linear(clip_embeddings_dim, cross_attention_dim), + torch.nn.LayerNorm(cross_attention_dim) + ) + + def forward(self, image_embeds): + clip_extra_context_tokens = self.proj(image_embeds) + return clip_extra_context_tokens + +class MLPProjModelFaceId(torch.nn.Module): + def __init__(self, cross_attention_dim=768, id_embeddings_dim=512, num_tokens=4): + super().__init__() + + self.cross_attention_dim = cross_attention_dim + self.num_tokens = num_tokens + + self.proj = torch.nn.Sequential( + torch.nn.Linear(id_embeddings_dim, id_embeddings_dim*2), + torch.nn.GELU(), + torch.nn.Linear(id_embeddings_dim*2, cross_attention_dim*num_tokens), + ) + self.norm = torch.nn.LayerNorm(cross_attention_dim) + + def forward(self, id_embeds): + clip_extra_context_tokens = self.proj(id_embeds) + clip_extra_context_tokens = clip_extra_context_tokens.reshape(-1, self.num_tokens, self.cross_attention_dim) + clip_extra_context_tokens = self.norm(clip_extra_context_tokens) + return clip_extra_context_tokens + +class ProjModelFaceIdPlus(torch.nn.Module): + def __init__(self, cross_attention_dim=768, id_embeddings_dim=512, clip_embeddings_dim=1280, num_tokens=4): + super().__init__() + + self.cross_attention_dim = cross_attention_dim + self.num_tokens = num_tokens + + self.proj = torch.nn.Sequential( + torch.nn.Linear(id_embeddings_dim, id_embeddings_dim*2), + torch.nn.GELU(), + torch.nn.Linear(id_embeddings_dim*2, cross_attention_dim*num_tokens), + ) + self.norm = torch.nn.LayerNorm(cross_attention_dim) + + self.perceiver_resampler = FacePerceiverResampler( + dim=cross_attention_dim, + depth=4, + dim_head=64, + heads=cross_attention_dim // 64, + embedding_dim=clip_embeddings_dim, + output_dim=cross_attention_dim, + ff_mult=4, + ) + + def forward(self, id_embeds, clip_embeds, scale=1.0, shortcut=False): + x = self.proj(id_embeds) + x = x.reshape(-1, self.num_tokens, self.cross_attention_dim) + x = self.norm(x) + out = self.perceiver_resampler(x, clip_embeds) + if shortcut: + out = x + scale * out + return out + +class ImageProjModel(nn.Module): + def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extra_context_tokens=4): + super().__init__() + + self.cross_attention_dim = cross_attention_dim + self.clip_extra_context_tokens = clip_extra_context_tokens + self.proj = nn.Linear(clip_embeddings_dim, self.clip_extra_context_tokens * cross_attention_dim) + self.norm = nn.LayerNorm(cross_attention_dim) + + def forward(self, image_embeds): + embeds = image_embeds + clip_extra_context_tokens = self.proj(embeds).reshape(-1, self.clip_extra_context_tokens, self.cross_attention_dim) + clip_extra_context_tokens = self.norm(clip_extra_context_tokens) + return clip_extra_context_tokens + +class To_KV(nn.Module): + def __init__(self, state_dict): + super().__init__() + + self.to_kvs = nn.ModuleDict() + for key, value in state_dict.items(): + self.to_kvs[key.replace(".weight", "").replace(".", "_")] = nn.Linear(value.shape[1], value.shape[0], bias=False) + self.to_kvs[key.replace(".weight", "").replace(".", "_")].weight.data = value + +def set_model_patch_replace(model, patch_kwargs, key): + to = model.model_options["transformer_options"] + if "patches_replace" not in to: + to["patches_replace"] = {} + if "attn2" not in to["patches_replace"]: + to["patches_replace"]["attn2"] = {} + if key not in to["patches_replace"]["attn2"]: + patch = CrossAttentionPatch(**patch_kwargs) + to["patches_replace"]["attn2"][key] = patch + else: + to["patches_replace"]["attn2"][key].set_new_condition(**patch_kwargs) + +def image_add_noise(image, noise): + image = image.permute([0,3,1,2]) + torch.manual_seed(0) # use a fixed random for reproducible results + transforms = TT.Compose([ + TT.CenterCrop(min(image.shape[2], image.shape[3])), + TT.Resize((224, 224), interpolation=TT.InterpolationMode.BICUBIC, antialias=True), + TT.ElasticTransform(alpha=75.0, sigma=noise*3.5), # shuffle the image + TT.RandomVerticalFlip(p=1.0), # flip the image to change the geometry even more + TT.RandomHorizontalFlip(p=1.0), + ]) + image = transforms(image.cpu()) + image = image.permute([0,2,3,1]) + image = image + ((0.25*(1-noise)+0.05) * torch.randn_like(image) ) # add further random noise + return image + +def zeroed_hidden_states(clip_vision, batch_size): + image = torch.zeros([batch_size, 224, 224, 3]) + ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher) + pixel_values = clip_preprocess(image.to(clip_vision.load_device)).float() + outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True) + outputs = outputs.hidden_states[-2].to(ldm_patched.modules.model_management.intermediate_device()) + return outputs + +def min_(tensor_list): + # return the element-wise min of the tensor list. + x = torch.stack(tensor_list) + mn = x.min(axis=0)[0] + return torch.clamp(mn, min=0) + +def max_(tensor_list): + # return the element-wise max of the tensor list. + x = torch.stack(tensor_list) + mx = x.max(axis=0)[0] + return torch.clamp(mx, max=1) + +# From https://github.com/Jamy-L/Pytorch-Contrast-Adaptive-Sharpening/ +def contrast_adaptive_sharpening(image, amount): + img = F.pad(image, pad=(1, 1, 1, 1)).cpu() + + a = img[..., :-2, :-2] + b = img[..., :-2, 1:-1] + c = img[..., :-2, 2:] + d = img[..., 1:-1, :-2] + e = img[..., 1:-1, 1:-1] + f = img[..., 1:-1, 2:] + g = img[..., 2:, :-2] + h = img[..., 2:, 1:-1] + i = img[..., 2:, 2:] + + # Computing contrast + cross = (b, d, e, f, h) + mn = min_(cross) + mx = max_(cross) + + diag = (a, c, g, i) + mn2 = min_(diag) + mx2 = max_(diag) + mx = mx + mx2 + mn = mn + mn2 + + # Computing local weight + inv_mx = torch.reciprocal(mx) + amp = inv_mx * torch.minimum(mn, (2 - mx)) + + # scaling + amp = torch.sqrt(amp) + w = - amp * (amount * (1/5 - 1/8) + 1/8) + div = torch.reciprocal(1 + 4*w) + + output = ((b + d + f + h)*w + e) * div + output = output.clamp(0, 1) + output = torch.nan_to_num(output) + + return (output) + +def tensorToNP(image): + out = torch.clamp(255. * image.detach().cpu(), 0, 255).to(torch.uint8) + out = out[..., [2, 1, 0]] + out = out.numpy() + + return out + +def NPToTensor(image): + out = torch.from_numpy(image) + out = torch.clamp(out.to(torch.float)/255., 0.0, 1.0) + out = out[..., [2, 1, 0]] + + return out + +class IPAdapter(nn.Module): + def __init__(self, ipadapter_model, cross_attention_dim=1024, output_cross_attention_dim=1024, + clip_embeddings_dim=1024, clip_extra_context_tokens=4, + is_sdxl=False, is_plus=False, is_full=False, + is_faceid=False, is_instant_id=False): + super().__init__() + + self.clip_embeddings_dim = clip_embeddings_dim + self.cross_attention_dim = cross_attention_dim + self.output_cross_attention_dim = output_cross_attention_dim + self.clip_extra_context_tokens = clip_extra_context_tokens + self.is_sdxl = is_sdxl + self.is_full = is_full + self.is_plus = is_plus + self.is_instant_id = is_instant_id + + if is_instant_id: + self.image_proj_model = self.init_proj_instantid() + elif is_faceid: + self.image_proj_model = self.init_proj_faceid() + elif is_plus: + self.image_proj_model = self.init_proj_plus() + else: + self.image_proj_model = self.init_proj() + + self.image_proj_model.load_state_dict(ipadapter_model["image_proj"]) + self.ip_layers = To_KV(ipadapter_model["ip_adapter"]) + + def init_proj(self): + image_proj_model = ImageProjModel( + cross_attention_dim=self.cross_attention_dim, + clip_embeddings_dim=self.clip_embeddings_dim, + clip_extra_context_tokens=self.clip_extra_context_tokens + ) + return image_proj_model + + def init_proj_plus(self): + if self.is_full: + image_proj_model = MLPProjModel( + cross_attention_dim=self.cross_attention_dim, + clip_embeddings_dim=self.clip_embeddings_dim + ) + else: + image_proj_model = Resampler( + dim=self.cross_attention_dim, + depth=4, + dim_head=64, + heads=20 if self.is_sdxl else 12, + num_queries=self.clip_extra_context_tokens, + embedding_dim=self.clip_embeddings_dim, + output_dim=self.output_cross_attention_dim, + ff_mult=4 + ) + return image_proj_model + + def init_proj_faceid(self): + if self.is_plus: + image_proj_model = ProjModelFaceIdPlus( + cross_attention_dim=self.cross_attention_dim, + id_embeddings_dim=512, + clip_embeddings_dim=1280, + num_tokens=4, + ) + else: + image_proj_model = MLPProjModelFaceId( + cross_attention_dim=self.cross_attention_dim, + id_embeddings_dim=512, + num_tokens=self.clip_extra_context_tokens, + ) + return image_proj_model + + def init_proj_instantid(self, image_emb_dim=512, num_tokens=16): + image_proj_model = Resampler( + dim=1280, + depth=4, + dim_head=64, + heads=20, + num_queries=num_tokens, + embedding_dim=image_emb_dim, + output_dim=self.cross_attention_dim, + ff_mult=4, + ) + return image_proj_model + + def get_image_embeds(self, clip_embed, clip_embed_zeroed): + image_prompt_embeds = self.image_proj_model(clip_embed) + uncond_image_prompt_embeds = self.image_proj_model(clip_embed_zeroed) + return image_prompt_embeds, uncond_image_prompt_embeds + + def get_image_embeds_faceid_plus(self, face_embed, clip_embed, s_scale, shortcut): + embeds = self.image_proj_model(face_embed, clip_embed, scale=s_scale, shortcut=shortcut) + return embeds + + def get_image_embeds_instantid(self, prompt_image_emb): + c = self.image_proj_model(prompt_image_emb) + uc = self.image_proj_model(torch.zeros_like(prompt_image_emb)) + return c, uc + +class CrossAttentionPatch: + # forward for patching + def __init__(self, weight, ipadapter, number, cond, uncond, weight_type, mask=None, sigma_start=0.0, sigma_end=1.0, unfold_batch=False): + self.weights = [weight] + self.ipadapters = [ipadapter] + self.conds = [cond] + self.unconds = [uncond] + self.number = number + self.weight_type = [weight_type] + self.masks = [mask] + self.sigma_start = [sigma_start] + self.sigma_end = [sigma_end] + self.unfold_batch = [unfold_batch] + + self.k_key = str(self.number*2+1) + "_to_k_ip" + self.v_key = str(self.number*2+1) + "_to_v_ip" + + def set_new_condition(self, weight, ipadapter, number, cond, uncond, weight_type, mask=None, sigma_start=0.0, sigma_end=1.0, unfold_batch=False): + self.weights.append(weight) + self.ipadapters.append(ipadapter) + self.conds.append(cond) + self.unconds.append(uncond) + self.masks.append(mask) + self.weight_type.append(weight_type) + self.sigma_start.append(sigma_start) + self.sigma_end.append(sigma_end) + self.unfold_batch.append(unfold_batch) + + def __call__(self, n, context_attn2, value_attn2, extra_options): + org_dtype = n.dtype + cond_or_uncond = extra_options["cond_or_uncond"] + + sigma = extra_options["sigmas"][0] if 'sigmas' in extra_options else None + sigma = sigma.item() if sigma is not None else 999999999.9 + + # extra options for AnimateDiff + ad_params = extra_options['ad_params'] if "ad_params" in extra_options else None + + q = n + k = context_attn2 + v = value_attn2 + b = q.shape[0] + qs = q.shape[1] + batch_prompt = b // len(cond_or_uncond) + out = optimized_attention(q, k, v, extra_options["n_heads"]) + _, _, lh, lw = extra_options["original_shape"] + + for weight, cond, uncond, ipadapter, mask, weight_type, sigma_start, sigma_end, unfold_batch in zip(self.weights, self.conds, self.unconds, self.ipadapters, self.masks, self.weight_type, self.sigma_start, self.sigma_end, self.unfold_batch): + if sigma > sigma_start or sigma < sigma_end: + continue + + if unfold_batch and cond.shape[0] > 1: + # Check AnimateDiff context window + if ad_params is not None and ad_params["sub_idxs"] is not None: + # if images length matches or exceeds full_length get sub_idx images + if cond.shape[0] >= ad_params["full_length"]: + cond = torch.Tensor(cond[ad_params["sub_idxs"]]) + uncond = torch.Tensor(uncond[ad_params["sub_idxs"]]) + # otherwise, need to do more to get proper sub_idxs masks + else: + # check if images length matches full_length - if not, make it match + if cond.shape[0] < ad_params["full_length"]: + cond = torch.cat((cond, cond[-1:].repeat((ad_params["full_length"]-cond.shape[0], 1, 1))), dim=0) + uncond = torch.cat((uncond, uncond[-1:].repeat((ad_params["full_length"]-uncond.shape[0], 1, 1))), dim=0) + # if we have too many remove the excess (should not happen, but just in case) + if cond.shape[0] > ad_params["full_length"]: + cond = cond[:ad_params["full_length"]] + uncond = uncond[:ad_params["full_length"]] + cond = cond[ad_params["sub_idxs"]] + uncond = uncond[ad_params["sub_idxs"]] + + # if we don't have enough reference images repeat the last one until we reach the right size + if cond.shape[0] < batch_prompt: + cond = torch.cat((cond, cond[-1:].repeat((batch_prompt-cond.shape[0], 1, 1))), dim=0) + uncond = torch.cat((uncond, uncond[-1:].repeat((batch_prompt-uncond.shape[0], 1, 1))), dim=0) + # if we have too many remove the exceeding + elif cond.shape[0] > batch_prompt: + cond = cond[:batch_prompt] + uncond = uncond[:batch_prompt] + + k_cond = ipadapter.ip_layers.to_kvs[self.k_key](cond) + k_uncond = ipadapter.ip_layers.to_kvs[self.k_key](uncond) + v_cond = ipadapter.ip_layers.to_kvs[self.v_key](cond) + v_uncond = ipadapter.ip_layers.to_kvs[self.v_key](uncond) + else: + k_cond = ipadapter.ip_layers.to_kvs[self.k_key](cond).repeat(batch_prompt, 1, 1) + k_uncond = ipadapter.ip_layers.to_kvs[self.k_key](uncond).repeat(batch_prompt, 1, 1) + v_cond = ipadapter.ip_layers.to_kvs[self.v_key](cond).repeat(batch_prompt, 1, 1) + v_uncond = ipadapter.ip_layers.to_kvs[self.v_key](uncond).repeat(batch_prompt, 1, 1) + + if weight_type.startswith("linear"): + ip_k = torch.cat([(k_cond, k_uncond)[i] for i in cond_or_uncond], dim=0) * weight + ip_v = torch.cat([(v_cond, v_uncond)[i] for i in cond_or_uncond], dim=0) * weight + else: + ip_k = torch.cat([(k_cond, k_uncond)[i] for i in cond_or_uncond], dim=0) + ip_v = torch.cat([(v_cond, v_uncond)[i] for i in cond_or_uncond], dim=0) + + if weight_type.startswith("channel"): + # code by Lvmin Zhang at Stanford University as also seen on Fooocus IPAdapter implementation + # please read licensing notes https://github.com/lllyasviel/Fooocus/blob/69a23c4d60c9e627409d0cb0f8862cdb015488eb/extras/ip_adapter.py#L234 + ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True) + ip_v_offset = ip_v - ip_v_mean + _, _, C = ip_k.shape + channel_penalty = float(C) / 1280.0 + W = weight * channel_penalty + ip_k = ip_k * W + ip_v = ip_v_offset + ip_v_mean * W + + out_ip = optimized_attention(q, ip_k.to(org_dtype), ip_v.to(org_dtype), extra_options["n_heads"]) + if weight_type.startswith("original"): + out_ip = out_ip * weight + + if mask is not None: + # TODO: needs checking + mask_h = lh / math.sqrt(lh * lw / qs) + mask_h = int(mask_h) + int((qs % int(mask_h)) != 0) + mask_w = qs // mask_h + + # check if using AnimateDiff and sliding context window + if (mask.shape[0] > 1 and ad_params is not None and ad_params["sub_idxs"] is not None): + # if mask length matches or exceeds full_length, just get sub_idx masks, resize, and continue + if mask.shape[0] >= ad_params["full_length"]: + mask_downsample = torch.Tensor(mask[ad_params["sub_idxs"]]) + mask_downsample = F.interpolate(mask_downsample.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + # otherwise, need to do more to get proper sub_idxs masks + else: + # resize to needed attention size (to save on memory) + mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + # check if mask length matches full_length - if not, make it match + if mask_downsample.shape[0] < ad_params["full_length"]: + mask_downsample = torch.cat((mask_downsample, mask_downsample[-1:].repeat((ad_params["full_length"]-mask_downsample.shape[0], 1, 1))), dim=0) + # if we have too many remove the excess (should not happen, but just in case) + if mask_downsample.shape[0] > ad_params["full_length"]: + mask_downsample = mask_downsample[:ad_params["full_length"]] + # now, select sub_idxs masks + mask_downsample = mask_downsample[ad_params["sub_idxs"]] + # otherwise, perform usual mask interpolation + else: + mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + + # if we don't have enough masks repeat the last one until we reach the right size + if mask_downsample.shape[0] < batch_prompt: + mask_downsample = torch.cat((mask_downsample, mask_downsample[-1:, :, :].repeat((batch_prompt-mask_downsample.shape[0], 1, 1))), dim=0) + # if we have too many remove the exceeding + elif mask_downsample.shape[0] > batch_prompt: + mask_downsample = mask_downsample[:batch_prompt, :, :] + + # repeat the masks + mask_downsample = mask_downsample.repeat(len(cond_or_uncond), 1, 1) + mask_downsample = mask_downsample.view(mask_downsample.shape[0], -1, 1).repeat(1, 1, out.shape[2]) + + out_ip = out_ip * mask_downsample + + out = out + out_ip + + return out.to(dtype=org_dtype) + +class IPAdapterModelLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "ipadapter_file": (folder_paths.get_filename_list("ipadapter"), )}} + + RETURN_TYPES = ("IPADAPTER",) + FUNCTION = "load_ipadapter_model" + CATEGORY = "ipadapter" + + def load_ipadapter_model(self, ipadapter_file): + ckpt_path = folder_paths.get_full_path("ipadapter", ipadapter_file) + + model = ldm_patched.modules.utils.load_torch_file(ckpt_path, safe_load=True) + + if ckpt_path.lower().endswith(".safetensors"): + st_model = {"image_proj": {}, "ip_adapter": {}} + for key in model.keys(): + if key.startswith("image_proj."): + st_model["image_proj"][key.replace("image_proj.", "")] = model[key] + elif key.startswith("ip_adapter."): + st_model["ip_adapter"][key.replace("ip_adapter.", "")] = model[key] + model = st_model + + if not "ip_adapter" in model.keys() or not model["ip_adapter"]: + raise Exception("invalid IPAdapter model {}".format(ckpt_path)) + + return (model,) + +insightface_face_align = None +class InsightFaceLoader: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "provider": (["CPU", "CUDA", "ROCM"], ), + }, + } + + RETURN_TYPES = ("INSIGHTFACE",) + FUNCTION = "load_insight_face" + CATEGORY = "ipadapter" + + def load_insight_face(self, name="buffalo_l", provider="CPU"): + try: + from insightface.app import FaceAnalysis + except ImportError as e: + raise Exception(e) + + if name == 'antelopev2': + from modules.modelloader import load_file_from_url + model_root = os.path.join(INSIGHTFACE_DIR, 'models', "antelopev2") + if not model_root: + os.makedirs(model_root, exist_ok=True) + for local_file, url in ( + ("1k3d68.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/1k3d68.onnx"), + ("2d106det.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/2d106det.onnx"), + ("genderage.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/genderage.onnx"), + ("glintr100.onnx", "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/glintr100.onnx"), + ("scrfd_10g_bnkps.onnx", + "https://huggingface.co/DIAMONIK7777/antelopev2/resolve/main/scrfd_10g_bnkps.onnx"), + ): + local_path = os.path.join(model_root, local_file) + if not os.path.exists(local_path): + load_file_from_url(url, model_dir=model_root) + + from insightface.utils import face_align + global insightface_face_align + insightface_face_align = face_align + + model = FaceAnalysis(name=name, root=INSIGHTFACE_DIR, providers=[provider + 'ExecutionProvider',]) + model.prepare(ctx_id=0, det_size=(640, 640)) + + return (model,) + +class IPAdapterApply: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "ipadapter": ("IPADAPTER", ), + "clip_vision": ("CLIP_VISION",), + "image": ("IMAGE",), + "model": ("MODEL", ), + "weight": ("FLOAT", { "default": 1.0, "min": -1, "max": 3, "step": 0.05 }), + "noise": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01 }), + "weight_type": (["original", "linear", "channel penalty"], ), + "start_at": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "end_at": ("FLOAT", { "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "unfold_batch": ("BOOLEAN", { "default": False }), + }, + "optional": { + "attn_mask": ("MASK",), + } + } + + RETURN_TYPES = ("MODEL", ) + FUNCTION = "apply_ipadapter" + CATEGORY = "ipadapter" + + def apply_ipadapter(self, ipadapter, model, weight, clip_vision=None, image=None, weight_type="original", + noise=None, embeds=None, attn_mask=None, start_at=0.0, end_at=1.0, unfold_batch=False, + insightface=None, faceid_v2=False, weight_v2=False, instant_id=False): + + self.dtype = torch.float16 if ldm_patched.modules.model_management.should_use_fp16() else torch.float32 + self.device = ldm_patched.modules.model_management.get_torch_device() + self.weight = weight + self.is_full = "proj.3.weight" in ipadapter["image_proj"] + self.is_portrait = "proj.2.weight" in ipadapter["image_proj"] and not "proj.3.weight" in ipadapter["image_proj"] and not "0.to_q_lora.down.weight" in ipadapter["ip_adapter"] + self.is_faceid = self.is_portrait or "0.to_q_lora.down.weight" in ipadapter["ip_adapter"] + self.is_plus = (self.is_full or "latents" in ipadapter["image_proj"] or "perceiver_resampler.proj_in.weight" in ipadapter["image_proj"]) + self.is_instant_id = instant_id + + if self.is_faceid and not insightface: + raise Exception('InsightFace must be provided for FaceID models.') + + output_cross_attention_dim = ipadapter["ip_adapter"]["1.to_k_ip.weight"].shape[1] + self.is_sdxl = output_cross_attention_dim == 2048 + cross_attention_dim = 1280 if self.is_plus and self.is_sdxl and not self.is_faceid else output_cross_attention_dim + clip_extra_context_tokens = 16 if self.is_plus or self.is_portrait else 4 + + if self.is_instant_id: + cross_attention_dim = output_cross_attention_dim + + if embeds is not None: + embeds = torch.unbind(embeds) + clip_embed = embeds[0].cpu() + clip_embed_zeroed = embeds[1].cpu() + else: + if self.is_instant_id: + insightface.det_model.input_size = (640, 640) # reset the detection size + face_img = tensorToNP(image) + face_embed = [] + + for i in range(face_img.shape[0]): + for size in [(size, size) for size in range(640, 128, -64)]: + insightface.det_model.input_size = size # TODO: hacky but seems to be working + face = insightface.get(face_img[i]) + if face: + face_embed.append(torch.from_numpy(face[0].embedding).unsqueeze(0)) + + if 640 not in size: + print(f"\033[33mINFO: InsightFace detection resolution lowered to {size}.\033[0m") + break + else: + raise Exception('InsightFace: No face detected.') + + face_embed = torch.stack(face_embed, dim=0) + clip_embed = face_embed + elif self.is_faceid: + insightface.det_model.input_size = (640,640) # reset the detection size + face_img = tensorToNP(image) + face_embed = [] + face_clipvision = [] + + for i in range(face_img.shape[0]): + for size in [(size, size) for size in range(640, 128, -64)]: + insightface.det_model.input_size = size # TODO: hacky but seems to be working + face = insightface.get(face_img[i]) + if face: + face_embed.append(torch.from_numpy(face[0].normed_embedding).unsqueeze(0)) + face_clipvision.append(NPToTensor(insightface_face_align.norm_crop(face_img[i], landmark=face[0].kps, image_size=224))) + + if 640 not in size: + print(f"\033[33mINFO: InsightFace detection resolution lowered to {size}.\033[0m") + break + else: + raise Exception('InsightFace: No face detected.') + + face_embed = torch.stack(face_embed, dim=0) + image = torch.stack(face_clipvision, dim=0) + + neg_image = image_add_noise(image, noise) if noise > 0 else None + + if self.is_plus: + clip_embed = clip_vision.encode_image(image).penultimate_hidden_states + if noise > 0: + clip_embed_zeroed = clip_vision.encode_image(neg_image).penultimate_hidden_states + else: + clip_embed_zeroed = zeroed_hidden_states(clip_vision, image.shape[0]) + + # TODO: check noise to the uncods too + face_embed_zeroed = torch.zeros_like(face_embed) + else: + clip_embed = face_embed + clip_embed_zeroed = torch.zeros_like(clip_embed) + else: + if image.shape[1] != image.shape[2]: + print("\033[33mINFO: the IPAdapter reference image is not a square, CLIPImageProcessor will resize and crop it at the center. If the main focus of the picture is not in the middle the result might not be what you are expecting.\033[0m") + + clip_embed = clip_vision.encode_image(image) + neg_image = image_add_noise(image, noise) if noise > 0 else None + + if self.is_plus: + clip_embed = clip_embed.penultimate_hidden_states + if noise > 0: + clip_embed_zeroed = clip_vision.encode_image(neg_image).penultimate_hidden_states + else: + clip_embed_zeroed = zeroed_hidden_states(clip_vision, image.shape[0]) + else: + clip_embed = clip_embed.image_embeds + if noise > 0: + clip_embed_zeroed = clip_vision.encode_image(neg_image).image_embeds + else: + clip_embed_zeroed = torch.zeros_like(clip_embed) + + clip_embeddings_dim = clip_embed.shape[-1] + + self.ipadapter = IPAdapter( + ipadapter, + cross_attention_dim=cross_attention_dim, + output_cross_attention_dim=output_cross_attention_dim, + clip_embeddings_dim=clip_embeddings_dim, + clip_extra_context_tokens=clip_extra_context_tokens, + is_sdxl=self.is_sdxl, + is_plus=self.is_plus, + is_full=self.is_full, + is_faceid=self.is_faceid, + is_instant_id=self.is_instant_id + ) + + self.ipadapter.to(self.device, dtype=self.dtype) + + if self.is_instant_id: + image_prompt_embeds, uncond_image_prompt_embeds = self.ipadapter.get_image_embeds_instantid(face_embed.to(self.device, dtype=self.dtype)) + elif self.is_faceid and self.is_plus: + image_prompt_embeds = self.ipadapter.get_image_embeds_faceid_plus(face_embed.to(self.device, dtype=self.dtype), clip_embed.to(self.device, dtype=self.dtype), weight_v2, faceid_v2) + uncond_image_prompt_embeds = self.ipadapter.get_image_embeds_faceid_plus(face_embed_zeroed.to(self.device, dtype=self.dtype), clip_embed_zeroed.to(self.device, dtype=self.dtype), weight_v2, faceid_v2) + else: + image_prompt_embeds, uncond_image_prompt_embeds = self.ipadapter.get_image_embeds(clip_embed.to(self.device, dtype=self.dtype), clip_embed_zeroed.to(self.device, dtype=self.dtype)) + + image_prompt_embeds = image_prompt_embeds.to(self.device, dtype=self.dtype) + uncond_image_prompt_embeds = uncond_image_prompt_embeds.to(self.device, dtype=self.dtype) + + work_model = model.clone() + + if self.is_instant_id: + def modifier(cnet, x_noisy, t, cond, batched_number): + cond_mark = cond['transformer_options']['cond_mark'][:, None, None].to(cond['c_crossattn']) # cond is 0 + c_crossattn = image_prompt_embeds * (1.0 - cond_mark) + uncond_image_prompt_embeds * cond_mark + cond['c_crossattn'] = c_crossattn + return x_noisy, t, cond, batched_number + + work_model.add_controlnet_conditioning_modifier(modifier) + + if attn_mask is not None: + attn_mask = attn_mask.to(self.device) + + sigma_start = model.model.model_sampling.percent_to_sigma(start_at) + sigma_end = model.model.model_sampling.percent_to_sigma(end_at) + + patch_kwargs = { + "number": 0, + "weight": self.weight, + "ipadapter": self.ipadapter, + "cond": image_prompt_embeds, + "uncond": uncond_image_prompt_embeds, + "weight_type": weight_type, + "mask": attn_mask, + "sigma_start": sigma_start, + "sigma_end": sigma_end, + "unfold_batch": unfold_batch, + } + + if not self.is_sdxl: + for id in [1,2,4,5,7,8]: # id of input_blocks that have cross attention + set_model_patch_replace(work_model, patch_kwargs, ("input", id)) + patch_kwargs["number"] += 1 + for id in [3,4,5,6,7,8,9,10,11]: # id of output_blocks that have cross attention + set_model_patch_replace(work_model, patch_kwargs, ("output", id)) + patch_kwargs["number"] += 1 + set_model_patch_replace(work_model, patch_kwargs, ("middle", 0)) + else: + for id in [4,5,7,8]: # id of input_blocks that have cross attention + block_indices = range(2) if id in [4, 5] else range(10) # transformer_depth + for index in block_indices: + set_model_patch_replace(work_model, patch_kwargs, ("input", id, index)) + patch_kwargs["number"] += 1 + for id in range(6): # id of output_blocks that have cross attention + block_indices = range(2) if id in [3, 4, 5] else range(10) # transformer_depth + for index in block_indices: + set_model_patch_replace(work_model, patch_kwargs, ("output", id, index)) + patch_kwargs["number"] += 1 + for index in range(10): + set_model_patch_replace(work_model, patch_kwargs, ("middle", 0, index)) + patch_kwargs["number"] += 1 + + return (work_model, ) + +class IPAdapterApplyFaceID(IPAdapterApply): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "ipadapter": ("IPADAPTER", ), + "clip_vision": ("CLIP_VISION",), + "insightface": ("INSIGHTFACE",), + "image": ("IMAGE",), + "model": ("MODEL", ), + "weight": ("FLOAT", { "default": 1.0, "min": -1, "max": 3, "step": 0.05 }), + "noise": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01 }), + "weight_type": (["original", "linear", "channel penalty"], ), + "start_at": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "end_at": ("FLOAT", { "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "faceid_v2": ("BOOLEAN", { "default": False }), + "weight_v2": ("FLOAT", { "default": 1.0, "min": -1, "max": 3, "step": 0.05 }), + "unfold_batch": ("BOOLEAN", { "default": False }), + }, + "optional": { + "attn_mask": ("MASK",), + } + } + +def prepImage(image, interpolation="LANCZOS", crop_position="center", size=(224,224), sharpening=0.0, padding=0): + _, oh, ow, _ = image.shape + output = image.permute([0,3,1,2]) + + if "pad" in crop_position: + target_length = max(oh, ow) + pad_l = (target_length - ow) // 2 + pad_r = (target_length - ow) - pad_l + pad_t = (target_length - oh) // 2 + pad_b = (target_length - oh) - pad_t + output = F.pad(output, (pad_l, pad_r, pad_t, pad_b), value=0, mode="constant") + else: + crop_size = min(oh, ow) + x = (ow-crop_size) // 2 + y = (oh-crop_size) // 2 + if "top" in crop_position: + y = 0 + elif "bottom" in crop_position: + y = oh-crop_size + elif "left" in crop_position: + x = 0 + elif "right" in crop_position: + x = ow-crop_size + + x2 = x+crop_size + y2 = y+crop_size + + # crop + output = output[:, :, y:y2, x:x2] + + # resize (apparently PIL resize is better than tourchvision interpolate) + imgs = [] + for i in range(output.shape[0]): + img = TT.ToPILImage()(output[i]) + img = img.resize(size, resample=Image.Resampling[interpolation]) + imgs.append(TT.ToTensor()(img)) + output = torch.stack(imgs, dim=0) + imgs = None # zelous GC + + if sharpening > 0: + output = contrast_adaptive_sharpening(output, sharpening) + + if padding > 0: + output = F.pad(output, (padding, padding, padding, padding), value=255, mode="constant") + + output = output.permute([0,2,3,1]) + + return output + +class PrepImageForInsightFace: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "image": ("IMAGE",), + "crop_position": (["center", "top", "bottom", "left", "right"],), + "sharpening": ("FLOAT", {"default": 0.0, "min": 0, "max": 1, "step": 0.05}), + "pad_around": ("BOOLEAN", { "default": True }), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "prep_image" + + CATEGORY = "ipadapter" + + def prep_image(self, image, crop_position, sharpening=0.0, pad_around=True): + if pad_around: + padding = 30 + size = (580, 580) + else: + padding = 0 + size = (640, 640) + output = prepImage(image, "LANCZOS", crop_position, size, sharpening, padding) + + return (output, ) + +class PrepImageForClipVision: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "image": ("IMAGE",), + "interpolation": (["LANCZOS", "BICUBIC", "HAMMING", "BILINEAR", "BOX", "NEAREST"],), + "crop_position": (["top", "bottom", "left", "right", "center", "pad"],), + "sharpening": ("FLOAT", {"default": 0.0, "min": 0, "max": 1, "step": 0.05}), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "prep_image" + + CATEGORY = "ipadapter" + + def prep_image(self, image, interpolation="LANCZOS", crop_position="center", sharpening=0.0): + size = (224, 224) + output = prepImage(image, interpolation, crop_position, size, sharpening, 0) + return (output, ) + +class IPAdapterEncoder: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "clip_vision": ("CLIP_VISION",), + "image_1": ("IMAGE",), + "ipadapter_plus": ("BOOLEAN", { "default": False }), + "noise": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01 }), + "weight_1": ("FLOAT", { "default": 1.0, "min": 0, "max": 1.0, "step": 0.01 }), + }, + "optional": { + "image_2": ("IMAGE",), + "image_3": ("IMAGE",), + "image_4": ("IMAGE",), + "weight_2": ("FLOAT", { "default": 1.0, "min": 0, "max": 1.0, "step": 0.01 }), + "weight_3": ("FLOAT", { "default": 1.0, "min": 0, "max": 1.0, "step": 0.01 }), + "weight_4": ("FLOAT", { "default": 1.0, "min": 0, "max": 1.0, "step": 0.01 }), + } + } + + RETURN_TYPES = ("EMBEDS",) + FUNCTION = "preprocess" + CATEGORY = "ipadapter" + + def preprocess(self, clip_vision, image_1, ipadapter_plus, noise, weight_1, image_2=None, image_3=None, image_4=None, weight_2=1.0, weight_3=1.0, weight_4=1.0): + weight_1 *= (0.1 + (weight_1 - 0.1)) + weight_2 *= (0.1 + (weight_2 - 0.1)) + weight_3 *= (0.1 + (weight_3 - 0.1)) + weight_4 *= (0.1 + (weight_4 - 0.1)) + + image = image_1 + weight = [weight_1]*image_1.shape[0] + + if image_2 is not None: + if image_1.shape[1:] != image_2.shape[1:]: + image_2 = ldm_patched.modules.utils.common_upscale(image_2.movedim(-1,1), image.shape[2], image.shape[1], "bilinear", "center").movedim(1,-1) + image = torch.cat((image, image_2), dim=0) + weight += [weight_2]*image_2.shape[0] + if image_3 is not None: + if image.shape[1:] != image_3.shape[1:]: + image_3 = ldm_patched.modules.utils.common_upscale(image_3.movedim(-1,1), image.shape[2], image.shape[1], "bilinear", "center").movedim(1,-1) + image = torch.cat((image, image_3), dim=0) + weight += [weight_3]*image_3.shape[0] + if image_4 is not None: + if image.shape[1:] != image_4.shape[1:]: + image_4 = ldm_patched.modules.utils.common_upscale(image_4.movedim(-1,1), image.shape[2], image.shape[1], "bilinear", "center").movedim(1,-1) + image = torch.cat((image, image_4), dim=0) + weight += [weight_4]*image_4.shape[0] + + clip_embed = clip_vision.encode_image(image) + neg_image = image_add_noise(image, noise) if noise > 0 else None + + if ipadapter_plus: + clip_embed = clip_embed.penultimate_hidden_states + if noise > 0: + clip_embed_zeroed = clip_vision.encode_image(neg_image).penultimate_hidden_states + else: + clip_embed_zeroed = zeroed_hidden_states(clip_vision, image.shape[0]) + else: + clip_embed = clip_embed.image_embeds + if noise > 0: + clip_embed_zeroed = clip_vision.encode_image(neg_image).image_embeds + else: + clip_embed_zeroed = torch.zeros_like(clip_embed) + + if any(e != 1.0 for e in weight): + weight = torch.tensor(weight).unsqueeze(-1) if not ipadapter_plus else torch.tensor(weight).unsqueeze(-1).unsqueeze(-1) + clip_embed = clip_embed * weight + + output = torch.stack((clip_embed, clip_embed_zeroed)) + + return( output, ) + +class IPAdapterApplyEncoded(IPAdapterApply): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "ipadapter": ("IPADAPTER", ), + "embeds": ("EMBEDS",), + "model": ("MODEL", ), + "weight": ("FLOAT", { "default": 1.0, "min": -1, "max": 3, "step": 0.05 }), + "weight_type": (["original", "linear", "channel penalty"], ), + "start_at": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "end_at": ("FLOAT", { "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001 }), + "unfold_batch": ("BOOLEAN", { "default": False }), + }, + "optional": { + "attn_mask": ("MASK",), + } + } + +class IPAdapterSaveEmbeds: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": { + "embeds": ("EMBEDS",), + "filename_prefix": ("STRING", {"default": "embeds/IPAdapter"}) + }, + } + + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + CATEGORY = "ipadapter" + + def save(self, embeds, filename_prefix): + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) + file = f"{filename}_{counter:05}_.ipadpt" + file = os.path.join(full_output_folder, file) + + torch.save(embeds, file) + return (None, ) + + +class IPAdapterLoadEmbeds: + @classmethod + def INPUT_TYPES(s): + input_dir = folder_paths.get_input_directory() + files = [os.path.relpath(os.path.join(root, file), input_dir) for root, dirs, files in os.walk(input_dir) for file in files if file.endswith('.ipadpt')] + return {"required": {"embeds": [sorted(files), ]}, } + + RETURN_TYPES = ("EMBEDS", ) + FUNCTION = "load" + CATEGORY = "ipadapter" + + def load(self, embeds): + path = folder_paths.get_annotated_filepath(embeds) + output = torch.load(path).cpu() + + return (output, ) + + +class IPAdapterBatchEmbeds: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "embed1": ("EMBEDS",), + "embed2": ("EMBEDS",), + }} + + RETURN_TYPES = ("EMBEDS",) + FUNCTION = "batch" + CATEGORY = "ipadapter" + + def batch(self, embed1, embed2): + return (torch.cat((embed1, embed2), dim=1), ) + +NODE_CLASS_MAPPINGS = { + "IPAdapterModelLoader": IPAdapterModelLoader, + "IPAdapterApply": IPAdapterApply, + "IPAdapterApplyFaceID": IPAdapterApplyFaceID, + "IPAdapterApplyEncoded": IPAdapterApplyEncoded, + "PrepImageForClipVision": PrepImageForClipVision, + "IPAdapterEncoder": IPAdapterEncoder, + "IPAdapterSaveEmbeds": IPAdapterSaveEmbeds, + "IPAdapterLoadEmbeds": IPAdapterLoadEmbeds, + "IPAdapterBatchEmbeds": IPAdapterBatchEmbeds, + "InsightFaceLoader": InsightFaceLoader, + "PrepImageForInsightFace": PrepImageForInsightFace, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "IPAdapterModelLoader": "Load IPAdapter Model", + "IPAdapterApply": "Apply IPAdapter", + "IPAdapterApplyFaceID": "Apply IPAdapter FaceID", + "IPAdapterApplyEncoded": "Apply IPAdapter from Encoded", + "PrepImageForClipVision": "Prepare Image For Clip Vision", + "IPAdapterEncoder": "Encode IPAdapter Image", + "IPAdapterSaveEmbeds": "Save IPAdapter Embeds", + "IPAdapterLoadEmbeds": "Load IPAdapter Embeds", + "IPAdapterBatchEmbeds": "IPAdapter Batch Embeds", + "InsightFaceLoader": "Load InsightFace", + "PrepImageForInsightFace": "Prepare Image For InsightFace", +} \ No newline at end of file diff --git a/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/resampler.py b/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/resampler.py new file mode 100644 index 0000000000000000000000000000000000000000..24266671d02092438ae6576336a59659fef9c054 --- /dev/null +++ b/extensions-builtin/sd_forge_ipadapter/lib_ipadapter/resampler.py @@ -0,0 +1,158 @@ +# modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py +# and https://github.com/lucidrains/imagen-pytorch/blob/main/imagen_pytorch/imagen_pytorch.py + +import math + +import torch +import torch.nn as nn +from einops import rearrange +from einops.layers.torch import Rearrange + + +# FFN +def FeedForward(dim, mult=4): + inner_dim = int(dim * mult) + return nn.Sequential( + nn.LayerNorm(dim), + nn.Linear(dim, inner_dim, bias=False), + nn.GELU(), + nn.Linear(inner_dim, dim, bias=False), + ) + + +def reshape_tensor(x, heads): + bs, length, width = x.shape + # (bs, length, width) --> (bs, length, n_heads, dim_per_head) + x = x.view(bs, length, heads, -1) + # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head) + x = x.transpose(1, 2) + # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head) + x = x.reshape(bs, heads, length, -1) + return x + + +class PerceiverAttention(nn.Module): + def __init__(self, *, dim, dim_head=64, heads=8): + super().__init__() + self.scale = dim_head**-0.5 + self.dim_head = dim_head + self.heads = heads + inner_dim = dim_head * heads + + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) + self.to_out = nn.Linear(inner_dim, dim, bias=False) + + def forward(self, x, latents): + """ + Args: + x (torch.Tensor): image features + shape (b, n1, D) + latent (torch.Tensor): latent features + shape (b, n2, D) + """ + x = self.norm1(x) + latents = self.norm2(latents) + + b, l, _ = latents.shape + + q = self.to_q(latents) + kv_input = torch.cat((x, latents), dim=-2) + k, v = self.to_kv(kv_input).chunk(2, dim=-1) + + q = reshape_tensor(q, self.heads) + k = reshape_tensor(k, self.heads) + v = reshape_tensor(v, self.heads) + + # attention + scale = 1 / math.sqrt(math.sqrt(self.dim_head)) + weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + out = weight @ v + + out = out.permute(0, 2, 1, 3).reshape(b, l, -1) + + return self.to_out(out) + + +class Resampler(nn.Module): + def __init__( + self, + dim=1024, + depth=8, + dim_head=64, + heads=16, + num_queries=8, + embedding_dim=768, + output_dim=1024, + ff_mult=4, + max_seq_len: int = 257, # CLIP tokens + CLS token + apply_pos_emb: bool = False, + num_latents_mean_pooled: int = 0, # number of latents derived from mean pooled representation of the sequence + ): + super().__init__() + self.pos_emb = nn.Embedding(max_seq_len, embedding_dim) if apply_pos_emb else None + + self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim**0.5) + + self.proj_in = nn.Linear(embedding_dim, dim) + + self.proj_out = nn.Linear(dim, output_dim) + self.norm_out = nn.LayerNorm(output_dim) + + self.to_latents_from_mean_pooled_seq = ( + nn.Sequential( + nn.LayerNorm(dim), + nn.Linear(dim, dim * num_latents_mean_pooled), + Rearrange("b (n d) -> b n d", n=num_latents_mean_pooled), + ) + if num_latents_mean_pooled > 0 + else None + ) + + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append( + nn.ModuleList( + [ + PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), + FeedForward(dim=dim, mult=ff_mult), + ] + ) + ) + + def forward(self, x): + if self.pos_emb is not None: + n, device = x.shape[1], x.device + pos_emb = self.pos_emb(torch.arange(n, device=device)) + x = x + pos_emb + + latents = self.latents.repeat(x.size(0), 1, 1) + + x = self.proj_in(x) + + if self.to_latents_from_mean_pooled_seq: + meanpooled_seq = masked_mean(x, dim=1, mask=torch.ones(x.shape[:2], device=x.device, dtype=torch.bool)) + meanpooled_latents = self.to_latents_from_mean_pooled_seq(meanpooled_seq) + latents = torch.cat((meanpooled_latents, latents), dim=-2) + + for attn, ff in self.layers: + latents = attn(x, latents) + latents + latents = ff(latents) + latents + + latents = self.proj_out(latents) + return self.norm_out(latents) + + +def masked_mean(t, *, dim, mask=None): + if mask is None: + return t.mean(dim=dim) + + denom = mask.sum(dim=dim, keepdim=True) + mask = rearrange(mask, "b n -> b n 1") + masked_t = t.masked_fill(~mask, 0.0) + + return masked_t.sum(dim=dim) / denom.clamp(min=1e-5) diff --git a/extensions-builtin/sd_forge_ipadapter/scripts/forge_ipadapter.py b/extensions-builtin/sd_forge_ipadapter/scripts/forge_ipadapter.py new file mode 100644 index 0000000000000000000000000000000000000000..47b720442f9a84d38a4afc81ee2c18d5f8b97119 --- /dev/null +++ b/extensions-builtin/sd_forge_ipadapter/scripts/forge_ipadapter.py @@ -0,0 +1,163 @@ +from modules_forge.supported_preprocessor import PreprocessorClipVision, Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor +from modules_forge.forge_util import numpy_to_pytorch +from modules_forge.shared import add_supported_control_model +from modules_forge.supported_controlnet import ControlModelPatcher +from lib_ipadapter.IPAdapterPlus import IPAdapterApply, InsightFaceLoader +from pathlib import Path + + +opIPAdapterApply = IPAdapterApply().apply_ipadapter +opInsightFaceLoader = InsightFaceLoader().load_insight_face + + +class PreprocessorClipVisionForIPAdapter(PreprocessorClipVision): + def __init__(self, name, url, filename): + super().__init__(name, url, filename) + self.tags = ['IP-Adapter'] + self.model_filename_filters = ['IP-Adapter', 'IP_Adapter'] + self.sorting_priority = 20 + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + cond = dict( + clip_vision=self.load_clipvision(), + image=numpy_to_pytorch(input_image), + weight_type="original", + noise=0.0, + embeds=None, + unfold_batch=False, + ) + return cond + + +class PreprocessorClipVisionWithInsightFaceForIPAdapter(PreprocessorClipVisionForIPAdapter): + def __init__(self, name, url, filename): + super().__init__(name, url, filename) + self.cached_insightface = None + + def load_insightface(self): + if self.cached_insightface is None: + self.cached_insightface = opInsightFaceLoader()[0] + return self.cached_insightface + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + cond = dict( + clip_vision=self.load_clipvision(), + insightface=self.load_insightface(), + image=numpy_to_pytorch(input_image), + weight_type="original", + noise=0.0, + embeds=None, + unfold_batch=False, + ) + return cond + + +class PreprocessorInsightFaceForInstantID(Preprocessor): + def __init__(self, name): + super().__init__() + self.name = name + self.tags = ['Instant-ID'] + self.model_filename_filters = ['Instant-ID', 'Instant_ID'] + self.sorting_priority = 20 + self.slider_resolution = PreprocessorParameter(visible=False) + self.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab = False + self.show_control_mode = False + self.sorting_priority = 10 + self.cached_insightface = None + + def load_insightface(self): + if self.cached_insightface is None: + self.cached_insightface = opInsightFaceLoader(name='antelopev2')[0] + return self.cached_insightface + + def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs): + cond = dict( + clip_vision=None, + insightface=self.load_insightface(), + image=numpy_to_pytorch(input_image), + weight_type="original", + noise=0.0, + embeds=None, + unfold_batch=False, + instant_id=True + ) + return cond + + +add_supported_preprocessor(PreprocessorClipVisionForIPAdapter( + name='CLIP-ViT-H (IPAdapter)', + url='https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors', + filename='CLIP-ViT-H-14.safetensors' +)) + +add_supported_preprocessor(PreprocessorClipVisionForIPAdapter( + name='CLIP-ViT-bigG (IPAdapter)', + url='https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors', + filename='CLIP-ViT-bigG.safetensors' +)) + +add_supported_preprocessor(PreprocessorClipVisionWithInsightFaceForIPAdapter( + name='InsightFace+CLIP-H (IPAdapter)', + url='https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors', + filename='CLIP-ViT-H-14.safetensors' +)) + +add_supported_preprocessor(PreprocessorInsightFaceForInstantID( + name='InsightFace (InstantID)', +)) + + +class IPAdapterPatcher(ControlModelPatcher): + @staticmethod + def try_build_from_state_dict(state_dict, ckpt_path): + model = state_dict + + if ckpt_path.lower().endswith(".safetensors"): + st_model = {"image_proj": {}, "ip_adapter": {}} + for key in model.keys(): + if key.startswith("image_proj."): + st_model["image_proj"][key.replace("image_proj.", "")] = model[key] + elif key.startswith("ip_adapter."): + st_model["ip_adapter"][key.replace("ip_adapter.", "")] = model[key] + model = st_model + + if "ip_adapter" not in model.keys() or len(model["ip_adapter"]) == 0: + return None + + o = IPAdapterPatcher(model) + + model_filename = Path(ckpt_path).name.lower() + if 'v2' in model_filename: + o.faceid_v2 = True + o.weight_v2 = True + + return o + + def __init__(self, state_dict): + super().__init__() + self.ip_adapter = state_dict + self.faceid_v2 = False + self.weight_v2 = False + return + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + unet = process.sd_model.forge_objects.unet + + unet = opIPAdapterApply( + ipadapter=self.ip_adapter, + model=unet, + weight=self.strength, + start_at=self.start_percent, + end_at=self.end_percent, + faceid_v2=self.faceid_v2, + weight_v2=self.weight_v2, + attn_mask=mask.squeeze(1) if mask is not None else None, + **cond, + )[0] + + process.sd_model.forge_objects.unet = unet + return + + +add_supported_control_model(IPAdapterPatcher) diff --git a/extensions-builtin/sd_forge_ipadapter/thanks b/extensions-builtin/sd_forge_ipadapter/thanks new file mode 100644 index 0000000000000000000000000000000000000000..e1c7bcb46feba1acac42fec61e382cd547b974a7 --- /dev/null +++ b/extensions-builtin/sd_forge_ipadapter/thanks @@ -0,0 +1 @@ +This repo is modified from https://github.com/cubiq/ComfyUI_IPAdapter_plus diff --git a/extensions-builtin/sd_forge_kohya_hrfix/scripts/kohya_hrfix.py b/extensions-builtin/sd_forge_kohya_hrfix/scripts/kohya_hrfix.py new file mode 100644 index 0000000000000000000000000000000000000000..de21d566c68e6b054d15b18f6a5ac83c8da24e83 --- /dev/null +++ b/extensions-builtin/sd_forge_kohya_hrfix/scripts/kohya_hrfix.py @@ -0,0 +1,57 @@ +import gradio as gr + +from modules import scripts +from ldm_patched.contrib.external_model_downscale import PatchModelAddDownscale + + +opPatchModelAddDownscale = PatchModelAddDownscale() + + +class KohyaHRFixForForge(scripts.Script): + sorting_priority = 14 + + def title(self): + return "Kohya HRFix Integrated" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + block_number = gr.Slider(label='Block Number', value=3, minimum=1, maximum=32, step=1) + downscale_factor = gr.Slider(label='Downscale Factor', value=2.0, minimum=0.1, maximum=9.0, step=0.001) + start_percent = gr.Slider(label='Start Percent', value=0.0, minimum=0.0, maximum=1.0, step=0.001) + end_percent = gr.Slider(label='End Percent', value=0.35, minimum=0.0, maximum=1.0, step=0.001) + downscale_after_skip = gr.Checkbox(label='Downscale After Skip', value=True) + downscale_method = gr.Radio(label='Downscale Method', choices=upscale_methods, value=upscale_methods[0]) + upscale_method = gr.Radio(label='Upscale Method', choices=upscale_methods, value=upscale_methods[0]) + + return enabled, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method + + def process_before_every_sampling(self, p, *script_args, **kwargs): + enabled, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method = script_args + block_number = int(block_number) + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opPatchModelAddDownscale.patch(unet, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method)[0] + + p.sd_model.forge_objects.unet = unet + + p.extra_generation_params.update(dict( + kohya_hrfix_enabled=enabled, + kohya_hrfix_block_number=block_number, + kohya_hrfix_downscale_factor=downscale_factor, + kohya_hrfix_start_percent=start_percent, + kohya_hrfix_end_percent=end_percent, + kohya_hrfix_downscale_after_skip=downscale_after_skip, + kohya_hrfix_downscale_method=downscale_method, + kohya_hrfix_upscale_method=upscale_method, + )) + + return diff --git a/extensions-builtin/sd_forge_latent_modifier/LICENSE b/extensions-builtin/sd_forge_latent_modifier/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f288702d2fa16d3cdf0035b15a9fcbc552cd88e7 --- /dev/null +++ b/extensions-builtin/sd_forge_latent_modifier/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/extensions-builtin/sd_forge_latent_modifier/README.md b/extensions-builtin/sd_forge_latent_modifier/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d1b85da876c61b2ba61e4dfd3af3220b74c6a114 --- /dev/null +++ b/extensions-builtin/sd_forge_latent_modifier/README.md @@ -0,0 +1,70 @@ +This extension is compiled from https://github.com/Clybius +Original Licence GPL V3 + + +## Latent Diffusion Mega Modifier (sampler_mega_modifier.py) +### Adds multiple parameters to control the diffusion process towards a quality the user expects. +* Sharpness: utilizes code from Fooocus's sampling process to sharpen the noise in the middle of the diffusion process. +This can lead to more perceptual detail, especially at higher strengths. + +* Tonemap: Clamps conditioning noise (CFG) using a user-chosen method, which can allow for the use of higher CFG values. + +* Rescale: Scales the CFG by comparing the standard deviation to the existing latent to dynamically lower the CFG. + +* Extra Noise: Adds extra noise in the middle of the diffusion process to conditioning, and do the inverse operation on unconditioning, if chosen. + +* Contrast: Adjusts the contrast of the conditioning, can lead to more pop-style results. Essentially functions as a secondary CFG slider for stylization, without changing subject pose and location much, if at all. + +* Combat CFG Drift: As we increase CFG, the mean will slightly drift away from 0. This subtracts the mean or median of the latent. Can lead to potentially sharper and higher frequency results, but may result in discoloration. + +* Divisive Norm: Normalizes the latent using avg_pool2d, and can reduce noisy artifacts, due in part to features such as sharpness. + +* Spectral Modulation: Converts the latent to frequencies, and clamps higher frequencies while boosting lower ones, then converts it back to an image latent. This effectively can be treated as a solution to oversaturation or burning as a result of higher CFG values, while not touching values around the median. + +### Tonemapping Methods Explanation: +* Reinhard:

Uses the reinhard method of tonemapping (from comfyanonymous' ComfyUI Experiments) to clamp the CFG if the difference is too strong. + + Lower `tonemap_multiplier` clamps more noise, and a lower `tonemap_percentile` will increase the calculated standard deviation from the original noise. Play with it!

+* Arctan:

Clamps the values dynamically using a simple arctan curve. [Link to interactive Desmos visualization](https://www.desmos.com/calculator/e4nrcdpqbl). + + Recommended values for testing: tonemap_multiplier of 5, tonemap_percentile of 90.

+* Quantile:

Clamps the values using torch.quantile for obtaining the highest magnitudes, and clamping based on the result. + + + `Closer to 100 percentile == stronger clamping`. Recommended values for testing: tonemap_multiplier of 1, tonemap_percentile of 99.

+* Gated:

Clamps the values using torch.quantile, only if above a specific floor value, which is set by `tonemapping_multiplier`. Clamps the noise prediction latent based on the percentile. + + + `Closer to 100 percentile == stronger clamping, lower tonemapping_multiplier == stronger clamping`. Recommended values for testing: tonemap_multiplier of 0.8-1, tonemap_percentile of 99.995.

+* CFG-Mimic:

Attempts to mimic a lower or higher CFG based on `tonemapping_multiplier`, and clamps it using `tonemapping_percentile` with torch.quantile. + + + `Closer to 100 percentile == stronger clamping, lower tonemapping_multiplier == stronger clamping`. Recommended values for testing: tonemap_multiplier of 0.33-1.0, tonemap_percentile of 100.

+* Spatial-Norm:

Clamps the values according to the noise prediction's absolute mean in the spectral domain. `tonemap_multiplier` adjusts the strength of the clamping. + + + `Lower tonemapping_multiplier == stronger clamping`. Recommended value for testing: tonemap_multiplier of 0.5-2.0.

+ +### Contrast Explanation: +

Scales the pixel values by the standard deviation, achieving a more contrasty look. In practice, this can effectively act as a secondary CFG slider for stylization. It doesn't modify subject poses much, if at all, which can be great for those looking to get more oomf out of their low-cfg setups. + +Using a negative value will apply the inverse of the operation to the latent.

+ +### Spectral Modification Explanation: +

We boost the low frequencies (low rate of change in the noise), and we lower the high frequencies (high rates of change in the noise). + +Change the low/high frequency range using `spectral_mod_percentile` (default of 5.0, which is the upper and lower 5th percentiles.) + +Increase/Decrease the strength of the adjustment by increasing `spectral_mod_multiplier` + +Beware of percentile values higher than 15 and multiplier values higher than 5, especially for hard clamping. Here be dragons, as large values may cause it to "noise-out", or become full of non-sensical noise, especially earlier in the diffusion process.

+ + +#### Current Pipeline: +>##### Add extra noise to conditioning -> Sharpen conditioning -> Convert to Noise Prediction -> Tonemap Noise Prediction -> Spectral Modification -> Modify contrast of noise prediction -> Rescale CFG -> Divisive Normalization -> Combat CFG Drift + +#### Why use this over `x` node? +Since the `set_model_sampler_cfg_function` hijack in ComfyUI can only utilize a single function, we bundle many latent modification methods into one large function for processing. This is simpler than taking an existing hijack and modifying it, which may be possible, but my (Clybius') lack of Python/PyTorch knowledge leads to this being the optimal method for simplicity. If you know how to do this, feel free to reach out through any means! + +#### Can you implement `x` function? +Depends. Is there existing code for such a function, with an open license for possible use in this repository? I could likely attempt adding it! Feel free to start an issue or to reach out for ideas you'd want implemented. diff --git a/extensions-builtin/sd_forge_latent_modifier/lib_latent_modifier/sampler_mega_modifier.py b/extensions-builtin/sd_forge_latent_modifier/lib_latent_modifier/sampler_mega_modifier.py new file mode 100644 index 0000000000000000000000000000000000000000..f8b494e2c2964cec90881b76bf5102338a52da21 --- /dev/null +++ b/extensions-builtin/sd_forge_latent_modifier/lib_latent_modifier/sampler_mega_modifier.py @@ -0,0 +1,1177 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import random + +# Set manual seeds for noise +# rand(n)_like. but with generator support +def gen_like(f, input, generator=None): + return f(input.size(), generator=generator).to(input) + +''' + The following snippet is utilized from https://github.com/Jamy-L/Pytorch-Contrast-Adaptive-Sharpening/ +''' +def min_(tensor_list): + # return the element-wise min of the tensor list. + x = torch.stack(tensor_list) + mn = x.min(axis=0)[0] + return mn#torch.clamp(mn, min=-1) + +def max_(tensor_list): + # return the element-wise max of the tensor list. + x = torch.stack(tensor_list) + mx = x.max(axis=0)[0] + return mx#torch.clamp(mx, max=1) +def contrast_adaptive_sharpening(image, amount): + img = F.pad(image, pad=(1, 1, 1, 1)) + absmean = torch.abs(image.mean()) + + a = img[..., :-2, :-2] + b = img[..., :-2, 1:-1] + c = img[..., :-2, 2:] + d = img[..., 1:-1, :-2] + e = img[..., 1:-1, 1:-1] + f = img[..., 1:-1, 2:] + g = img[..., 2:, :-2] + h = img[..., 2:, 1:-1] + i = img[..., 2:, 2:] + + # Computing contrast + cross = (b, d, e, f, h) + mn = min_(cross) + mx = max_(cross) + + diag = (a, c, g, i) + mn2 = min_(diag) + mx2 = max_(diag) + mx = mx + mx2 + mn = mn + mn2 + + # Computing local weight + inv_mx = torch.reciprocal(mx) + amp = inv_mx * torch.minimum(mn, (2 - mx)) + + # scaling + amp = torch.copysign(torch.sqrt(torch.abs(amp)), amp) + w = - amp * (amount * (1/5 - 1/8) + 1/8) + div = torch.reciprocal(1 + 4*w).clamp(-10, 10) + + output = ((b + d + f + h)*w + e) * div + output = torch.nan_to_num(output) + + return (output.to(image.device)) + +''' + The following gaussian functions were utilized from the Fooocus UI, many thanks to github.com/Illyasviel ! +''' +def gaussian_kernel(kernel_size, sigma): + kernel = np.fromfunction( + lambda x, y: (1 / (2 * np.pi * sigma ** 2)) * + np.exp(-((x - (kernel_size - 1) / 2) ** 2 + (y - (kernel_size - 1) / 2) ** 2) / (2 * sigma ** 2)), + (kernel_size, kernel_size) + ) + return kernel / np.sum(kernel) + + +class GaussianBlur(nn.Module): + def __init__(self, channels, kernel_size, sigma): + super(GaussianBlur, self).__init__() + self.channels = channels + self.kernel_size = kernel_size + self.sigma = sigma + self.padding = kernel_size // 2 # Ensure output size matches input size + self.register_buffer('kernel', torch.tensor(gaussian_kernel(kernel_size, sigma), dtype=torch.float32)) + self.kernel = self.kernel.view(1, 1, kernel_size, kernel_size) + self.kernel = self.kernel.expand(self.channels, -1, -1, -1) # Repeat the kernel for each input channel + + def forward(self, x): + x = F.conv2d(x, self.kernel.to(x), padding=self.padding, groups=self.channels) + return x + +gaussian_filter_2d = GaussianBlur(4, 7, 0.8) + +''' + As of August 18th (on Fooocus' GitHub), the gaussian functions were replaced by an anisotropic function for better stability. +''' +Tensor = torch.Tensor +Device = torch.DeviceObjType +Dtype = torch.Type +pad = torch.nn.functional.pad + + +def _compute_zero_padding(kernel_size: tuple[int, int] | int) -> tuple[int, int]: + ky, kx = _unpack_2d_ks(kernel_size) + return (ky - 1) // 2, (kx - 1) // 2 + + +def _unpack_2d_ks(kernel_size: tuple[int, int] | int) -> tuple[int, int]: + if isinstance(kernel_size, int): + ky = kx = kernel_size + else: + assert len(kernel_size) == 2, '2D Kernel size should have a length of 2.' + ky, kx = kernel_size + + ky = int(ky) + kx = int(kx) + return ky, kx + + +def gaussian( + window_size: int, sigma: Tensor | float, *, device: Device | None = None, dtype: Dtype | None = None +) -> Tensor: + + batch_size = sigma.shape[0] + + x = (torch.arange(window_size, device=sigma.device, dtype=sigma.dtype) - window_size // 2).expand(batch_size, -1) + + if window_size % 2 == 0: + x = x + 0.5 + + gauss = torch.exp(-x.pow(2.0) / (2 * sigma.pow(2.0))) + + return gauss / gauss.sum(-1, keepdim=True) + + +def get_gaussian_kernel1d( + kernel_size: int, + sigma: float | Tensor, + force_even: bool = False, + *, + device: Device | None = None, + dtype: Dtype | None = None, +) -> Tensor: + + return gaussian(kernel_size, sigma, device=device, dtype=dtype) + + +def get_gaussian_kernel2d( + kernel_size: tuple[int, int] | int, + sigma: tuple[float, float] | Tensor, + force_even: bool = False, + *, + device: Device | None = None, + dtype: Dtype | None = None, +) -> Tensor: + + sigma = torch.Tensor([[sigma, sigma]]).to(device=device, dtype=dtype) + + ksize_y, ksize_x = _unpack_2d_ks(kernel_size) + sigma_y, sigma_x = sigma[:, 0, None], sigma[:, 1, None] + + kernel_y = get_gaussian_kernel1d(ksize_y, sigma_y, force_even, device=device, dtype=dtype)[..., None] + kernel_x = get_gaussian_kernel1d(ksize_x, sigma_x, force_even, device=device, dtype=dtype)[..., None] + + return kernel_y * kernel_x.view(-1, 1, ksize_x) + + +def _bilateral_blur( + input: Tensor, + guidance: Tensor | None, + kernel_size: tuple[int, int] | int, + sigma_color: float | Tensor, + sigma_space: tuple[float, float] | Tensor, + border_type: str = 'reflect', + color_distance_type: str = 'l1', +) -> Tensor: + + if isinstance(sigma_color, Tensor): + sigma_color = sigma_color.to(device=input.device, dtype=input.dtype).view(-1, 1, 1, 1, 1) + + ky, kx = _unpack_2d_ks(kernel_size) + pad_y, pad_x = _compute_zero_padding(kernel_size) + + padded_input = pad(input, (pad_x, pad_x, pad_y, pad_y), mode=border_type) + unfolded_input = padded_input.unfold(2, ky, 1).unfold(3, kx, 1).flatten(-2) # (B, C, H, W, Ky x Kx) + + if guidance is None: + guidance = input + unfolded_guidance = unfolded_input + else: + padded_guidance = pad(guidance, (pad_x, pad_x, pad_y, pad_y), mode=border_type) + unfolded_guidance = padded_guidance.unfold(2, ky, 1).unfold(3, kx, 1).flatten(-2) # (B, C, H, W, Ky x Kx) + + diff = unfolded_guidance - guidance.unsqueeze(-1) + if color_distance_type == "l1": + color_distance_sq = diff.abs().sum(1, keepdim=True).square() + elif color_distance_type == "l2": + color_distance_sq = diff.square().sum(1, keepdim=True) + else: + raise ValueError("color_distance_type only acceps l1 or l2") + color_kernel = (-0.5 / sigma_color**2 * color_distance_sq).exp() # (B, 1, H, W, Ky x Kx) + + space_kernel = get_gaussian_kernel2d(kernel_size, sigma_space, device=input.device, dtype=input.dtype) + space_kernel = space_kernel.view(-1, 1, 1, 1, kx * ky) + + kernel = space_kernel * color_kernel + out = (unfolded_input * kernel).sum(-1) / kernel.sum(-1) + return out + + +def bilateral_blur( + input: Tensor, + kernel_size: tuple[int, int] | int = (13, 13), + sigma_color: float | Tensor = 3.0, + sigma_space: tuple[float, float] | Tensor = 3.0, + border_type: str = 'reflect', + color_distance_type: str = 'l1', +) -> Tensor: + return _bilateral_blur(input, None, kernel_size, sigma_color, sigma_space, border_type, color_distance_type) + + +def joint_bilateral_blur( + input: Tensor, + guidance: Tensor, + kernel_size: tuple[int, int] | int, + sigma_color: float | Tensor, + sigma_space: tuple[float, float] | Tensor, + border_type: str = 'reflect', + color_distance_type: str = 'l1', +) -> Tensor: + return _bilateral_blur(input, guidance, kernel_size, sigma_color, sigma_space, border_type, color_distance_type) + + +class _BilateralBlur(torch.nn.Module): + def __init__( + self, + kernel_size: tuple[int, int] | int, + sigma_color: float | Tensor, + sigma_space: tuple[float, float] | Tensor, + border_type: str = 'reflect', + color_distance_type: str = "l1", + ) -> None: + super().__init__() + self.kernel_size = kernel_size + self.sigma_color = sigma_color + self.sigma_space = sigma_space + self.border_type = border_type + self.color_distance_type = color_distance_type + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}" + f"(kernel_size={self.kernel_size}, " + f"sigma_color={self.sigma_color}, " + f"sigma_space={self.sigma_space}, " + f"border_type={self.border_type}, " + f"color_distance_type={self.color_distance_type})" + ) + + +class BilateralBlur(_BilateralBlur): + def forward(self, input: Tensor) -> Tensor: + return bilateral_blur( + input, self.kernel_size, self.sigma_color, self.sigma_space, self.border_type, self.color_distance_type + ) + + +class JointBilateralBlur(_BilateralBlur): + def forward(self, input: Tensor, guidance: Tensor) -> Tensor: + return joint_bilateral_blur( + input, + guidance, + self.kernel_size, + self.sigma_color, + self.sigma_space, + self.border_type, + self.color_distance_type, + ) + + +# Below is perlin noise from https://github.com/tasptz/pytorch-perlin-noise/blob/main/perlin_noise/perlin_noise.py +from torch import Generator, Tensor, lerp +from torch.nn.functional import unfold +from typing import Callable, Tuple +from math import pi + +def get_positions(block_shape: Tuple[int, int]) -> Tensor: + """ + Generate position tensor. + + Arguments: + block_shape -- (height, width) of position tensor + + Returns: + position vector shaped (1, height, width, 1, 1, 2) + """ + bh, bw = block_shape + positions = torch.stack( + torch.meshgrid( + [(torch.arange(b) + 0.5) / b for b in (bw, bh)], + indexing="xy", + ), + -1, + ).view(1, bh, bw, 1, 1, 2) + return positions + + +def unfold_grid(vectors: Tensor) -> Tensor: + """ + Unfold vector grid to batched vectors. + + Arguments: + vectors -- grid vectors + + Returns: + batched grid vectors + """ + batch_size, _, gpy, gpx = vectors.shape + return ( + unfold(vectors, (2, 2)) + .view(batch_size, 2, 4, -1) + .permute(0, 2, 3, 1) + .view(batch_size, 4, gpy - 1, gpx - 1, 2) + ) + + +def smooth_step(t: Tensor) -> Tensor: + """ + Smooth step function [0, 1] -> [0, 1]. + + Arguments: + t -- input values (any shape) + + Returns: + output values (same shape as input values) + """ + return t * t * (3.0 - 2.0 * t) + + +def perlin_noise_tensor( + vectors: Tensor, positions: Tensor, step: Callable = None +) -> Tensor: + """ + Generate perlin noise from batched vectors and positions. + + Arguments: + vectors -- batched grid vectors shaped (batch_size, 4, grid_height, grid_width, 2) + positions -- batched grid positions shaped (batch_size or 1, block_height, block_width, grid_height or 1, grid_width or 1, 2) + + Keyword Arguments: + step -- smooth step function [0, 1] -> [0, 1] (default: `smooth_step`) + + Raises: + Exception: if position and vector shapes do not match + + Returns: + (batch_size, block_height * grid_height, block_width * grid_width) + """ + if step is None: + step = smooth_step + + batch_size = vectors.shape[0] + # grid height, grid width + gh, gw = vectors.shape[2:4] + # block height, block width + bh, bw = positions.shape[1:3] + + for i in range(2): + if positions.shape[i + 3] not in (1, vectors.shape[i + 2]): + raise Exception( + f"Blocks shapes do not match: vectors ({vectors.shape[1]}, {vectors.shape[2]}), positions {gh}, {gw})" + ) + + if positions.shape[0] not in (1, batch_size): + raise Exception( + f"Batch sizes do not match: vectors ({vectors.shape[0]}), positions ({positions.shape[0]})" + ) + + vectors = vectors.view(batch_size, 4, 1, gh * gw, 2) + positions = positions.view(positions.shape[0], bh * bw, -1, 2) + + step_x = step(positions[..., 0]) + step_y = step(positions[..., 1]) + + row0 = lerp( + (vectors[:, 0] * positions).sum(dim=-1), + (vectors[:, 1] * (positions - positions.new_tensor((1, 0)))).sum(dim=-1), + step_x, + ) + row1 = lerp( + (vectors[:, 2] * (positions - positions.new_tensor((0, 1)))).sum(dim=-1), + (vectors[:, 3] * (positions - positions.new_tensor((1, 1)))).sum(dim=-1), + step_x, + ) + noise = lerp(row0, row1, step_y) + return ( + noise.view( + batch_size, + bh, + bw, + gh, + gw, + ) + .permute(0, 3, 1, 4, 2) + .reshape(batch_size, gh * bh, gw * bw) + ) + + +def perlin_noise( + grid_shape: Tuple[int, int], + out_shape: Tuple[int, int], + batch_size: int = 1, + generator: Generator = None, + *args, + **kwargs, +) -> Tensor: + """ + Generate perlin noise with given shape. `*args` and `**kwargs` are forwarded to `Tensor` creation. + + Arguments: + grid_shape -- Shape of grid (height, width). + out_shape -- Shape of output noise image (height, width). + + Keyword Arguments: + batch_size -- (default: {1}) + generator -- random generator used for grid vectors (default: {None}) + + Raises: + Exception: if grid and out shapes do not match + + Returns: + Noise image shaped (batch_size, height, width) + """ + # grid height and width + gh, gw = grid_shape + # output height and width + oh, ow = out_shape + # block height and width + bh, bw = oh // gh, ow // gw + + if oh != bh * gh: + raise Exception(f"Output height {oh} must be divisible by grid height {gh}") + if ow != bw * gw != 0: + raise Exception(f"Output width {ow} must be divisible by grid width {gw}") + + angle = torch.empty( + [batch_size] + [s + 1 for s in grid_shape], *args, **kwargs + ).uniform_(to=2.0 * pi, generator=generator) + # random vectors on grid points + vectors = unfold_grid(torch.stack((torch.cos(angle), torch.sin(angle)), dim=1)) + # positions inside grid cells [0, 1) + positions = get_positions((bh, bw)).to(vectors) + return perlin_noise_tensor(vectors, positions).squeeze(0) + +def generate_1f_noise(tensor, alpha, k, generator=None): + """Generate 1/f noise for a given tensor. + + Args: + tensor: The tensor to add noise to. + alpha: The parameter that determines the slope of the spectrum. + k: A constant. + + Returns: + A tensor with the same shape as `tensor` containing 1/f noise. + """ + fft = torch.fft.fft2(tensor) + freq = torch.arange(1, len(fft) + 1, dtype=torch.float) + spectral_density = k / freq**alpha + noise = torch.randn(tensor.shape, generator=generator) * spectral_density + return noise + +def green_noise(width, height, generator=None): + noise = torch.randn(width, height, generator=generator) + scale = 1.0 / (width * height) + fy = torch.fft.fftfreq(width)[:, None] ** 2 + fx = torch.fft.fftfreq(height) ** 2 + f = fy + fx + power = torch.sqrt(f) + power[0, 0] = 1 + noise = torch.fft.ifft2(torch.fft.fft2(noise) / torch.sqrt(power)) + noise *= scale / noise.std() + return torch.real(noise) + +# Algorithm from https://github.com/v0xie/sd-webui-cads/ +def add_cads_noise(y, timestep, cads_schedule_start, cads_schedule_end, cads_noise_scale, cads_rescale_factor, cads_rescale=False): + timestep_as_float = (timestep / 999.0)[:, None, None, None].clone()[0].item() + gamma = 0.0 + if timestep_as_float < cads_schedule_start: + gamma = 1.0 + elif timestep_as_float > cads_schedule_end: + gamma = 0.0 + else: + gamma = (cads_schedule_end - timestep_as_float) / (cads_schedule_end - cads_schedule_start) + + y_mean, y_std = torch.mean(y), torch.std(y) + y = np.sqrt(gamma) * y + cads_noise_scale * np.sqrt(1 - gamma) * torch.randn_like(y) + + if cads_rescale: + y_scaled = (y - torch.mean(y)) / torch.std(y) * y_std + y_mean + if not torch.isnan(y_scaled).any(): + y = cads_rescale_factor * y_scaled + (1 - cads_rescale_factor) * y + else: + print("Encountered NaN in cads rescaling. Skipping rescaling.") + return y + +# Algorithm from https://github.com/v0xie/sd-webui-cads/ +def add_cads_custom_noise(y, noise, timestep, cads_schedule_start, cads_schedule_end, cads_noise_scale, cads_rescale_factor, cads_rescale=False): + timestep_as_float = (timestep / 999.0)[:, None, None, None].clone()[0].item() + gamma = 0.0 + if timestep_as_float < cads_schedule_start: + gamma = 1.0 + elif timestep_as_float > cads_schedule_end: + gamma = 0.0 + else: + gamma = (cads_schedule_end - timestep_as_float) / (cads_schedule_end - cads_schedule_start) + + y_mean, y_std = torch.mean(y), torch.std(y) + y = np.sqrt(gamma) * y + cads_noise_scale * np.sqrt(1 - gamma) * noise#.sub_(noise.mean()).div_(noise.std()) + + if cads_rescale: + y_scaled = (y - torch.mean(y)) / torch.std(y) * y_std + y_mean + if not torch.isnan(y_scaled).any(): + y = cads_rescale_factor * y_scaled + (1 - cads_rescale_factor) * y + else: + print("Encountered NaN in cads rescaling. Skipping rescaling.") + return y + +# Tonemapping functions + +def train_difference(a: Tensor, b: Tensor, c: Tensor) -> Tensor: + diff_AB = a.float() - b.float() + distance_A0 = torch.abs(b.float() - c.float()) + distance_A1 = torch.abs(b.float() - a.float()) + + sum_distances = distance_A0 + distance_A1 + + scale = torch.where( + sum_distances != 0, distance_A1 / sum_distances, torch.tensor(0.0).float() + ) + sign_scale = torch.sign(b.float() - c.float()) + scale = sign_scale * torch.abs(scale) + new_diff = scale * torch.abs(diff_AB) + return new_diff + +def gated_thresholding(percentile: float, floor: float, t: Tensor) -> Tensor: + """ + Args: + percentile: float between 0.0 and 1.0. for example 0.995 would subject only the top 0.5%ile to clamping. + t: [b, c, v] tensor in pixel or latent space (where v is the result of flattening w and h) + """ + a = t.abs() # Magnitudes + q = torch.quantile(a, percentile, dim=2) # Get clamp value via top % of magnitudes + q.clamp_(min=floor) + q = q.unsqueeze(2).expand(*t.shape) + t = t.clamp(-q, q) # Clamp latent with magnitude value + t = t / q + return t + +def dyn_thresh_gate(latent: Tensor, centered_magnitudes: Tensor, tonemap_percentile: float, floor: float, ceil: float): + if centered_magnitudes.lt(torch.tensor(ceil, device=centered_magnitudes.device)).all().item(): # If the magnitudes are less than the ceiling + return latent # Return the unmodified centered latent + else: + latent = gated_thresholding(tonemap_percentile, floor, latent) # If the magnitudes are higher than the ceiling + return latent # Gated-dynamic thresholding by Birchlabs + +def spatial_norm_thresholding(x0, value): + # b c h w + pow_x0 = torch.pow(torch.abs(x0), 2) + s = pow_x0.mean(1, keepdim=True).sqrt().clamp(min=value) + return x0 * (value / s) + +def spatial_norm_chw_thresholding(x0, value): + # b c h w + pow_x0 = torch.pow(torch.abs(x0), 2) + s = pow_x0.mean(dim=(1, 2, 3), keepdim=True).sqrt().clamp(min=value) + return x0 * (value / s) + +# Contrast function + +def contrast(x: Tensor): + # Calculate the mean and standard deviation of the pixel values + #mean = x.mean(dim=(1,2,3), keepdim=True) + stddev = x.std(dim=(1,2,3), keepdim=True) + # Scale the pixel values by the standard deviation + scaled_pixels = (x) / stddev + return scaled_pixels + +def contrast_with_mean(x: Tensor): + # Calculate the mean and standard deviation of the pixel values + #mean = x.mean(dim=(2,3), keepdim=True) + stddev = x.std(dim=(1,2,3), keepdim=True) + diff_mean = ((x / stddev) - x).mean(dim=(1,2,3), keepdim=True) + # Scale the pixel values by the standard deviation + scaled_pixels = x / stddev + return scaled_pixels - diff_mean + +def center_latent(tensor): #https://birchlabs.co.uk/machine-learning#combating-mean-drift-in-cfg + """Centers on 0 to combat CFG drift.""" + tensor = tensor - tensor.mean(dim=(-2, -1)).unsqueeze(-1).unsqueeze(-1).expand(tensor.shape) + return tensor + +def center_0channel(tensor): #https://birchlabs.co.uk/machine-learning#combating-mean-drift-in-cfg + """Centers on 0 to combat CFG drift.""" + std_dev_0 = tensor[:, [0]].std() + mean_0 = tensor[:, [0]].mean() + mean_12 = tensor[:, [1,2]].mean() + mean_3 = tensor[:, [3]].mean() + + #tensor[:, [0]] /= std_dev_0 + tensor[:, [0]] -= mean_0 + tensor[:, [0]] += torch.copysign(torch.pow(torch.abs(mean_0), 1.5), mean_0) + #tensor[:, [1, 2]] -= tensor[:, [1, 2]].mean() + tensor[:, [1, 2]] -= mean_12 * 0.5 + tensor[:, [3]] -= mean_3 + tensor[:, [3]] += torch.copysign(torch.pow(torch.abs(mean_3), 1.5), mean_3) + return tensor# - tensor.mean(dim=(2,3), keepdim=True) + +def channel_sharpen(tensor): + """Centers on 0 to combat CFG drift.""" + flattened = tensor.flatten(2) + flat_std = flattened.std(dim=(2)).unsqueeze(2).expand(flattened.shape) + flattened *= flat_std + flattened -= flattened.mean(dim=(2)).unsqueeze(2).expand(flattened.shape) + flattened /= flat_std + tensor = flattened.unflatten(2, tensor.shape[2:]) + return tensor + + +def center_012channel(tensor): #https://birchlabs.co.uk/machine-learning#combating-mean-drift-in-cfg + """Centers on 0 to combat CFG drift.""" + curr_tens = tensor[:, [0,1,2]] + tensor[:, [0,1,2]] -= curr_tens.mean() + return tensor + +def center_latent_perchannel(tensor): # Does nothing different than above + """Centers on 0 to combat CFG drift.""" + flattened = tensor.flatten(2) + flattened = flattened - flattened.mean(dim=(2)).unsqueeze(2).expand(flattened.shape) + tensor = flattened.unflatten(2, tensor.shape[2:]) + return tensor + +def center_latent_perchannel_with_magnitudes(tensor): # Does nothing different than above + """Centers on 0 to combat CFG drift.""" + flattened = tensor.flatten(2) + flattened_magnitude = (torch.linalg.vector_norm(flattened, dim=(2), keepdim=True) + 0.0000000001) + flattened /= flattened_magnitude + flattened = flattened - flattened.mean(dim=(2)).unsqueeze(2).expand(flattened.shape) + flattened *= flattened_magnitude + tensor = flattened.unflatten(2, tensor.shape[2:]) + return tensor + +def center_latent_perchannel_with_decorrelate(tensor): # Decorrelates data, slight change, test and play with it. + """Centers on 0 to combat CFG drift, preprocesses the latent with decorrelation""" + tensor = decorrelate_data(tensor) + flattened = tensor.flatten(2) + flattened_magnitude = (torch.linalg.vector_norm(flattened, dim=(2), keepdim=True) + 0.0000000001) + flattened /= flattened_magnitude + flattened = flattened - flattened.mean(dim=(2)).unsqueeze(2).expand(flattened.shape) + flattened *= flattened_magnitude + tensor = flattened.unflatten(2, tensor.shape[2:]) + return tensor + +def center_latent_median(tensor): + flattened = tensor.flatten(2) + median = flattened.median() + scaled_data = (flattened - median) + scaled_data = scaled_data.unflatten(2, tensor.shape[2:]) + return scaled_data + +def divisive_normalization(image_tensor, neighborhood_size, threshold=1e-6): + # Compute the local mean and local variance + local_mean = F.avg_pool2d(image_tensor, neighborhood_size, stride=1, padding=neighborhood_size // 2, count_include_pad=False) + local_mean_squared = local_mean**2 + + local_variance = F.avg_pool2d(image_tensor**2, neighborhood_size, stride=1, padding=neighborhood_size // 2, count_include_pad=False) - local_mean_squared + + # Add a small value to prevent division by zero + local_variance = local_variance + threshold + + # Apply divisive normalization + normalized_tensor = image_tensor / torch.sqrt(local_variance) + + return normalized_tensor + +def decorrelate_data(data): + """flattened = tensor.flatten(2).squeeze(0) # this code aint shit, yo + cov_matrix = torch.cov(flattened) + sqrt_inv_cov_matrix = torch.linalg.inv(torch.sqrt(cov_matrix)) + decorrelated_tensor = torch.dot(flattened, sqrt_inv_cov_matrix.T) + decorrelated_tensor = decorrelated_tensor.unflatten(2, tensor.shape[2:]).unsqueeze(0)""" + + # Reshape the 4D tensor to a 2D tensor for covariance calculation + num_samples, num_channels, height, width = data.size() + data_reshaped = data.view(num_samples, num_channels, -1) + data_reshaped = data_reshaped - torch.mean(data_reshaped, dim=2, keepdim=True) + + # Compute covariance matrix + cov_matrix = torch.matmul(data_reshaped, data_reshaped.transpose(1, 2)) / (height * width - 1) + + # Compute the inverse square root of the covariance matrix + u, s, v = torch.svd(cov_matrix) + sqrt_inv_cov_matrix = torch.matmul(u, torch.matmul(torch.diag_embed(1.0 / torch.sqrt(s)), v.transpose(1, 2))) + + # Reshape sqrt_inv_cov_matrix to match the dimensions of data_reshaped + sqrt_inv_cov_matrix = sqrt_inv_cov_matrix.unsqueeze(0).expand(num_samples, -1, -1, -1) + + # Decorrelate the data + decorrelated_data = torch.matmul(data_reshaped.transpose(1, 2), sqrt_inv_cov_matrix.transpose(2, 3)) + decorrelated_data = decorrelated_data.transpose(2, 3) + + # Reshape back to the original shape + decorrelated_data = decorrelated_data.view(num_samples, num_channels, height, width) + + return decorrelated_data.to(data.device) + +def get_low_frequency_noise(image: Tensor, threshold: float): + # Convert image to Fourier domain + fourier = torch.fft.fft2(image, dim=(-2, -1)) # Apply FFT along Height and Width dimensions + + # Compute the power spectrum + power_spectrum = torch.abs(fourier) ** 2 + + threshold = threshold ** 2 + + # Drop low-frequency components + mask = (power_spectrum < threshold).float() + filtered_fourier = fourier * mask + + # Inverse transform back to spatial domain + inverse_transformed = torch.fft.ifft2(filtered_fourier, dim=(-2, -1)) # Apply IFFT along Height and Width dimensions + + return inverse_transformed.real.to(image.device) + +def spectral_modulation(image: Tensor, modulation_multiplier: float, spectral_mod_percentile: float): # Reference implementation by Clybius, 2023 :tm::c::r: (jk idc who uses it :3) + # Convert image to Fourier domain + fourier = torch.fft.fft2(image, dim=(-2, -1)) # Apply FFT along Height and Width dimensions + + log_amp = torch.log(torch.sqrt(fourier.real ** 2 + fourier.imag ** 2)) + + quantile_low = torch.quantile( + log_amp.abs().flatten(2), + spectral_mod_percentile * 0.01, + dim = 2 + ).unsqueeze(-1).unsqueeze(-1).expand(log_amp.shape) + + quantile_high = torch.quantile( + log_amp.abs().flatten(2), + 1 - (spectral_mod_percentile * 0.01), + dim = 2 + ).unsqueeze(-1).unsqueeze(-1).expand(log_amp.shape) + + # Increase low-frequency components + mask_low = ((log_amp < quantile_low).float() + 1).clamp_(max=1.5) # If lower than low 5% quantile, set to 1.5, otherwise 1 + # Decrease high-frequency components + mask_high = ((log_amp < quantile_high).float()).clamp_(min=0.5) # If lower than high 5% quantile, set to 1, otherwise 0.5 + filtered_fourier = fourier * ((mask_low * mask_high) ** modulation_multiplier) # Effectively + + # Inverse transform back to spatial domain + inverse_transformed = torch.fft.ifft2(filtered_fourier, dim=(-2, -1)) # Apply IFFT along Height and Width dimensions + + return inverse_transformed.real.to(image.device) + +def spectral_modulation_soft(image: Tensor, modulation_multiplier: float, spectral_mod_percentile: float): # Modified for soft quantile adjustment using a novel:tm::c::r: method titled linalg. + # Convert image to Fourier domain + fourier = torch.fft.fft2(image, dim=(-2, -1)) # Apply FFT along Height and Width dimensions + + log_amp = torch.log(torch.sqrt(fourier.real ** 2 + fourier.imag ** 2)) + + quantile_low = torch.quantile( + log_amp.abs().flatten(2), + spectral_mod_percentile * 0.01, + dim = 2 + ).unsqueeze(-1).unsqueeze(-1).expand(log_amp.shape) + + quantile_high = torch.quantile( + log_amp.abs().flatten(2), + 1 - (spectral_mod_percentile * 0.01), + dim = 2 + ).unsqueeze(-1).unsqueeze(-1).expand(log_amp.shape) + + quantile_max = torch.quantile( + log_amp.abs().flatten(2), + 1, + dim = 2 + ).unsqueeze(-1).unsqueeze(-1).expand(log_amp.shape) + + # Decrease high-frequency components + mask_high = log_amp > quantile_high # If we're larger than 95th percentile + + additive_mult_high = torch.where( + mask_high, + 1 - ((log_amp - quantile_high) / (quantile_max - quantile_high)).clamp_(max=0.5), # (1) - (0-1), where 0 is 95th %ile and 1 is 100%ile + torch.tensor(1.0) + ) + + + # Increase low-frequency components + mask_low = log_amp < quantile_low + additive_mult_low = torch.where( + mask_low, + 1 + (1 - (log_amp / quantile_low)).clamp_(max=0.5), # (1) + (0-1), where 0 is 5th %ile and 1 is 0%ile + torch.tensor(1.0) + ) + + mask_mult = ((additive_mult_low * additive_mult_high) ** modulation_multiplier).clamp_(min=0.05, max=20) + #print(mask_mult) + filtered_fourier = fourier * mask_mult + + # Inverse transform back to spatial domain + inverse_transformed = torch.fft.ifft2(filtered_fourier, dim=(-2, -1)) # Apply IFFT along Height and Width dimensions + + return inverse_transformed.real.to(image.device) + +def pyramid_noise_like(x, discount=0.9, generator=None, rand_source=random): + b, c, w, h = x.shape # EDIT: w and h get over-written, rename for a different variant! + u = torch.nn.Upsample(size=(w, h), mode='nearest-exact') + noise = gen_like(torch.randn, x, generator=generator) + for i in range(10): + r = rand_source.random()*2+2 # Rather than always going 2x, + w, h = max(1, int(w/(r**i))), max(1, int(h/(r**i))) + noise += u(torch.randn(b, c, w, h, generator=generator).to(x)) * discount**i + if w==1 or h==1: break # Lowest resolution is 1x1 + return noise/noise.std() # Scaled back to roughly unit variance + +import math +def dyn_cfg_modifier(conditioning, unconditioning, method, cond_scale, time_mult): + match method: + case "dyncfg-halfcosine": + noise_pred = conditioning - unconditioning + + noise_pred_magnitude = (torch.linalg.vector_norm(noise_pred, dim=(1)) + 0.0000000001)[:,None] + + time = time_mult.item() + time_factor = -(math.cos(0.5 * time * math.pi) / 2) + 1 + noise_pred_timescaled_magnitude = (torch.linalg.vector_norm(noise_pred * time_factor, dim=(1)) + 0.0000000001)[:,None] + + noise_pred /= noise_pred_magnitude + noise_pred *= noise_pred_timescaled_magnitude + return noise_pred + case "dyncfg-halfcosine-mimic": + noise_pred = conditioning - unconditioning + + noise_pred_magnitude = (torch.linalg.vector_norm(noise_pred, dim=(1)) + 0.0000000001)[:,None] + + time = time_mult.item() + time_factor = -(math.cos(0.5 * time * math.pi) / 2) + 1 + + latent = noise_pred + + mimic_latent = noise_pred * time_factor + mimic_flattened = mimic_latent.flatten(2) + mimic_means = mimic_flattened.mean(dim=2).unsqueeze(2) + mimic_recentered = mimic_flattened - mimic_means + mimic_abs = mimic_recentered.abs() + mimic_max = mimic_abs.max(dim=2).values.unsqueeze(2) + + latent_flattened = latent.flatten(2) + latent_means = latent_flattened.mean(dim=2).unsqueeze(2) + latent_recentered = latent_flattened - latent_means + latent_abs = latent_recentered.abs() + latent_q = torch.quantile(latent_abs, 0.995, dim=2).unsqueeze(2) + s = torch.maximum(latent_q, mimic_max) + pred_clamped = noise_pred.flatten(2).clamp(-s, s) + pred_normalized = pred_clamped / s + pred_renorm = pred_normalized * mimic_max + pred_uncentered = pred_renorm + latent_means + noise_pred_degraded = pred_uncentered.unflatten(2, noise_pred.shape[2:]) + + noise_pred /= noise_pred_magnitude + + noise_pred_timescaled_magnitude = (torch.linalg.vector_norm(noise_pred_degraded, dim=(1)) + 0.0000000001)[:,None] + noise_pred *= noise_pred_timescaled_magnitude + return noise_pred + + +class ModelSamplerLatentMegaModifier: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "sharpness_multiplier": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step": 0.1}), + "sharpness_method": (["anisotropic", "joint-anisotropic", "gaussian", "cas"], ), + "tonemap_multiplier": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.01}), + "tonemap_method": (["reinhard", "reinhard_perchannel", "arctan", "quantile", "gated", "cfg-mimic", "spatial-norm"], ), + "tonemap_percentile": ("FLOAT", {"default": 100.0, "min": 0.0, "max": 100.0, "step": 0.005}), + "contrast_multiplier": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step": 0.1}), + "combat_method": (["subtract", "subtract_channels", "subtract_median", "sharpen"], ), + "combat_cfg_drift": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}), + "rescale_cfg_phi": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}), + "extra_noise_type": (["gaussian", "uniform", "perlin", "pink", "green", "pyramid"], ), + "extra_noise_method": (["add", "add_scaled", "speckle", "cads", "cads_rescaled", "cads_speckle", "cads_speckle_rescaled"], ), + "extra_noise_multiplier": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step": 0.1}), + "extra_noise_lowpass": ("INT", {"default": 100, "min": 0, "max": 1000, "step": 1}), + "divisive_norm_size": ("INT", {"default": 127, "min": 1, "max": 255, "step": 1}), + "divisive_norm_multiplier": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}), + "spectral_mod_mode": (["hard_clamp", "soft_clamp"], ), + "spectral_mod_percentile": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 50.0, "step": 0.01}), + "spectral_mod_multiplier": ("FLOAT", {"default": 0.0, "min": -15.0, "max": 15.0, "step": 0.01}), + "affect_uncond": (["None", "Sharpness"], ), + "dyn_cfg_augmentation": (["None", "dyncfg-halfcosine", "dyncfg-halfcosine-mimic"], ), + }, + "optional": { "seed": ("INT", {"min": 0, "max": 0xffffffffffffffff}) + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "mega_modify" + + CATEGORY = "clybNodes" + + def mega_modify(self, model, sharpness_multiplier, sharpness_method, tonemap_multiplier, tonemap_method, tonemap_percentile, contrast_multiplier, combat_method, combat_cfg_drift, rescale_cfg_phi, extra_noise_type, extra_noise_method, extra_noise_multiplier, extra_noise_lowpass, divisive_norm_size, divisive_norm_multiplier, spectral_mod_mode, spectral_mod_percentile, spectral_mod_multiplier, affect_uncond, dyn_cfg_augmentation, seed=None): + gen = None + rand = random + if seed is not None: + gen = torch.Generator(device='cpu') + rand = random.Random() + gen.manual_seed(seed) + rand.seed(seed) + + def modify_latent(args): + x_input = args["input"] + cond = args["cond"] + uncond = args["uncond"] + cond_scale = args["cond_scale"] + timestep = model.model.model_sampling.timestep(args["timestep"]) + sigma = args["sigma"] + sigma = sigma.view(sigma.shape[:1] + (1,) * (cond.ndim - 1)) + #print(model.model.model_sampling.timestep(timestep)) + + x = x_input / (sigma * sigma + 1.0) + cond = ((x - (x_input - cond)) * (sigma ** 2 + 1.0) ** 0.5) / (sigma) + uncond = ((x - (x_input - uncond)) * (sigma ** 2 + 1.0) ** 0.5) / (sigma) + + noise_pred = (cond - uncond) + + # Extra noise + if extra_noise_multiplier > 0: + match extra_noise_type: + case "gaussian": + extra_noise = gen_like(torch.randn, cond, generator=gen) + case "uniform": + extra_noise = (gen_like(torch.rand, cond, generator=gen) - 0.5) * 2 * 1.73 + case "perlin": + cond_size_0 = cond.size(dim=2) + cond_size_1 = cond.size(dim=3) + extra_noise = perlin_noise(grid_shape=(cond_size_0, cond_size_1), out_shape=(cond_size_0, cond_size_1), batch_size=4, generator=gen).to(cond.device).unsqueeze(0) + mean = torch.mean(extra_noise) + std = torch.std(extra_noise) + + extra_noise.sub_(mean).div_(std) + case "pink": + extra_noise = generate_1f_noise(cond, 2, extra_noise_multiplier, generator=gen).to(cond.device) + mean = torch.mean(extra_noise) + std = torch.std(extra_noise) + + extra_noise.sub_(mean).div_(std) + case "green": + cond_size_0 = cond.size(dim=2) + cond_size_1 = cond.size(dim=3) + extra_noise = green_noise(cond_size_0, cond_size_1, generator=gen).to(cond.device) + mean = torch.mean(extra_noise) + std = torch.std(extra_noise) + + extra_noise.sub_(mean).div_(std) + case "pyramid": + extra_noise = pyramid_noise_like(cond) + + if extra_noise_lowpass > 0: + extra_noise = get_low_frequency_noise(extra_noise, extra_noise_lowpass) + + alpha_noise = 1.0 - (timestep / 999.0)[:, None, None, None].clone() # Get alpha multiplier, lower alpha at high sigmas/high noise + alpha_noise *= 0.001 * extra_noise_multiplier # User-input and weaken the strength so we don't annihilate the latent. + match extra_noise_method: + case "add": + cond = cond + extra_noise * alpha_noise + uncond = uncond - extra_noise * alpha_noise + case "add_scaled": + cond = cond + train_difference(cond, extra_noise, cond) * alpha_noise + uncond = uncond - train_difference(uncond, extra_noise, uncond) * alpha_noise + case "speckle": + cond = cond + cond * extra_noise * alpha_noise + uncond = uncond - uncond * extra_noise * alpha_noise + case "cads": + cond = add_cads_custom_noise(cond, extra_noise, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, False) + uncond = add_cads_custom_noise(uncond, extra_noise, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, False) + case "cads_rescaled": + cond = add_cads_custom_noise(cond, extra_noise, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, True) + uncond = add_cads_custom_noise(uncond, extra_noise, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, True) + case "cads_speckle": + cond = add_cads_custom_noise(cond, extra_noise * cond, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, False) + uncond = add_cads_custom_noise(uncond, extra_noise * uncond, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, False) + case "cads_speckle_rescaled": + cond = add_cads_custom_noise(cond, extra_noise * cond, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, True) + uncond = add_cads_custom_noise(uncond, extra_noise * uncond, timestep, 0.6, 0.9, extra_noise_multiplier / 100., 1, True) + case _: + print("Haven't heard of a noise method named like that before... (Couldn't find method)") + + if sharpness_multiplier > 0.0 or sharpness_multiplier < 0.0: + match sharpness_method: + case "anisotropic": + degrade_func = bilateral_blur + case "joint-anisotropic": + degrade_func = lambda img: joint_bilateral_blur(img, (img - torch.mean(img, dim=(1, 2, 3), keepdim=True)) / torch.std(img, dim=(1, 2, 3), keepdim=True), 13, 3.0, 3.0, "reflect", "l1") + case "gaussian": + degrade_func = gaussian_filter_2d + case "cas": + degrade_func = lambda image: contrast_adaptive_sharpening(image, amount=sigma.clamp(max=1.00).item()) + case _: + print("For some reason, the sharpness filter could not be found.") + # Sharpness + alpha = 1.0 - (timestep / 999.0)[:, None, None, None].clone() # Get alpha multiplier, lower alpha at high sigmas/high noise + alpha *= 0.001 * sharpness_multiplier # User-input and weaken the strength so we don't annihilate the latent. + cond = degrade_func(cond) * alpha + cond * (1.0 - alpha) # Mix the modified latent with the existing latent by the alpha + if affect_uncond == "Sharpness": + uncond = degrade_func(uncond) * alpha + uncond * (1.0 - alpha) + + time_mult = 1.0 - (timestep / 999.0)[:, None, None, None].clone() + noise_pred_degraded = (cond - uncond) if dyn_cfg_augmentation == "None" else dyn_cfg_modifier(cond, uncond, dyn_cfg_augmentation, cond_scale, time_mult) # New noise pred + + # After this point, we use `noise_pred_degraded` instead of just `cond` for the final set of calculations + + # Tonemap noise + if tonemap_multiplier == 0: + new_magnitude = 1.0 + else: + match tonemap_method: + case "reinhard": + noise_pred_vector_magnitude = (torch.linalg.vector_norm(noise_pred_degraded, dim=(1)) + 0.0000000001)[:,None] + noise_pred_degraded /= noise_pred_vector_magnitude + + mean = torch.mean(noise_pred_vector_magnitude, dim=(1,2,3), keepdim=True) + std = torch.std(noise_pred_vector_magnitude, dim=(1,2,3), keepdim=True) + + top = (std * 3 * (100 / tonemap_percentile) + mean) * tonemap_multiplier + + noise_pred_vector_magnitude *= (1.0 / top) + new_magnitude = noise_pred_vector_magnitude / (noise_pred_vector_magnitude + 1.0) + new_magnitude *= top + + noise_pred_degraded *= new_magnitude + case "reinhard_perchannel": # Testing the flatten strategy + flattened = noise_pred_degraded.flatten(2) + noise_pred_vector_magnitude = (torch.linalg.vector_norm(flattened, dim=(2), keepdim=True) + 0.0000000001) + flattened /= noise_pred_vector_magnitude + + mean = torch.mean(noise_pred_vector_magnitude, dim=(2), keepdim=True) + + top = (3 * (100 / tonemap_percentile) + mean) * tonemap_multiplier + + noise_pred_vector_magnitude *= (1.0 / top) + + new_magnitude = noise_pred_vector_magnitude / (noise_pred_vector_magnitude + 1.0) + new_magnitude *= top + + flattened *= new_magnitude + noise_pred_degraded = flattened.unflatten(2, noise_pred_degraded.shape[2:]) + case "arctan": + noise_pred_vector_magnitude = (torch.linalg.vector_norm(noise_pred_degraded, dim=(1)) + 0.0000000001)[:,None] + noise_pred_degraded /= noise_pred_vector_magnitude + + noise_pred_degraded = (torch.arctan(noise_pred_degraded * tonemap_multiplier) * (1 / tonemap_multiplier)) + (noise_pred_degraded * (100 - tonemap_percentile) / 100) + + noise_pred_degraded *= noise_pred_vector_magnitude + case "quantile": + s: FloatTensor = torch.quantile( + (uncond + noise_pred_degraded * cond_scale).flatten(start_dim=1).abs(), + tonemap_percentile / 100, + dim = -1 + ) * tonemap_multiplier + s.clamp_(min = 1.) + s = s.reshape(*s.shape, 1, 1, 1) + noise_pred_degraded = noise_pred_degraded.clamp(-s, s) / s + case "gated": # https://birchlabs.co.uk/machine-learning#dynamic-thresholding-latents so based,.,.,...., + latent_scale = model.model.latent_format.scale_factor + + latent = uncond + noise_pred_degraded * cond_scale # Get full latent from CFG formula + latent /= latent_scale # Divide full CFG by latent scale (~0.13 for sdxl) + flattened = latent.flatten(2) + means = flattened.mean(dim=2).unsqueeze(2) + centered_magnitudes = (flattened - means).abs().max() # Get highest magnitude of full CFG + + flattened_pred = (noise_pred_degraded / latent_scale).flatten(2) + + floor = 3.0560 + ceil = 42. * tonemap_multiplier # as is the answer to life, unless you modify the multiplier cuz u aint a believer in life + + + thresholded_latent = dyn_thresh_gate(flattened_pred, centered_magnitudes, tonemap_percentile / 100., floor, ceil) # Threshold if passes ceil + thresholded_latent = thresholded_latent.unflatten(2, noise_pred_degraded.shape[2:]) + noise_pred_degraded = thresholded_latent * latent_scale # Rescale by latent + case "cfg-mimic": + latent = noise_pred_degraded + + mimic_latent = noise_pred_degraded * tonemap_multiplier + mimic_flattened = mimic_latent.flatten(2) + mimic_means = mimic_flattened.mean(dim=2).unsqueeze(2) + mimic_recentered = mimic_flattened - mimic_means + mimic_abs = mimic_recentered.abs() + mimic_max = mimic_abs.max(dim=2).values.unsqueeze(2) + + latent_flattened = latent.flatten(2) + latent_means = latent_flattened.mean(dim=2).unsqueeze(2) + latent_recentered = latent_flattened - latent_means + latent_abs = latent_recentered.abs() + latent_q = torch.quantile(latent_abs, tonemap_percentile / 100., dim=2).unsqueeze(2) + s = torch.maximum(latent_q, mimic_max) + pred_clamped = noise_pred_degraded.flatten(2).clamp(-s, s) + pred_normalized = pred_clamped / s + pred_renorm = pred_normalized * mimic_max + pred_uncentered = pred_renorm + mimic_means # Personal choice to re-mean from the mimic here... should be latent_means. + noise_pred_degraded = pred_uncentered.unflatten(2, noise_pred_degraded.shape[2:]) + case "spatial-norm": + #time = (1.0 - (timestep / 999.0)[:, None, None, None].clone().item()) + #time = -(math.cos(time * math.pi) / (3)) + (2/3) # 0.33333 to 1.0, half cosine + noise_pred_degraded = spatial_norm_chw_thresholding(noise_pred_degraded, tonemap_multiplier / 2 / cond_scale) + case _: + print("Could not tonemap, for the method was not found.") + + # Spectral Modification + if spectral_mod_multiplier > 0 or spectral_mod_multiplier < 0: + #alpha = 1. - (timestep / 999.0)[:, None, None, None].clone() # Get alpha multiplier, lower alpha at high sigmas/high noise + #alpha = spectral_mod_multiplier# User-input and weaken the strength so we don't annihilate the latent. + match spectral_mod_mode: + case "hard_clamp": + modulation_func = spectral_modulation + case "soft_clamp": + modulation_func = spectral_modulation_soft + modulation_diff = modulation_func(noise_pred_degraded, spectral_mod_multiplier, spectral_mod_percentile) - noise_pred_degraded + noise_pred_degraded += modulation_diff + + if contrast_multiplier > 0 or contrast_multiplier < 0: + contrast_func = contrast + # Contrast, after tonemapping, to ensure user-set contrast is expected to behave similarly across tonemapping settings + alpha = 1.0 - (timestep / 999.0)[:, None, None, None].clone() + alpha *= 0.001 * contrast_multiplier + noise_pred_degraded = contrast_func(noise_pred_degraded) * alpha + (noise_pred_degraded) * (1.0 - alpha) # Temporary fix for contrast is to add the input? Maybe? It just doesn't work like before... + + # Rescale CFG + if rescale_cfg_phi == 0: + x_final = uncond + noise_pred_degraded * cond_scale + else: + x_cfg = uncond + noise_pred_degraded * cond_scale + ro_pos = torch.std(cond, dim=(1,2,3), keepdim=True) + ro_cfg = torch.std(x_cfg, dim=(1,2,3), keepdim=True) + + x_rescaled = x_cfg * (ro_pos / ro_cfg) + x_final = rescale_cfg_phi * x_rescaled + (1.0 - rescale_cfg_phi) * x_cfg + + if combat_cfg_drift > 0 or combat_cfg_drift < 0: + alpha = (1. - (timestep / 999.0)[:, None, None, None].clone()) + alpha ** 0.025 # Alpha might as well be 1, but we want to protect the first steps (?). + alpha = alpha.clamp_(max=1) + match combat_method: + case "subtract": + combat_drift_func = center_latent_perchannel + alpha *= combat_cfg_drift + case "subtract_channels": + combat_drift_func = center_0channel + alpha *= combat_cfg_drift + case "subtract_median": + combat_drift_func = center_latent_median + alpha *= combat_cfg_drift + case "sharpen": + combat_drift_func = channel_sharpen + alpha *= combat_cfg_drift + x_final = combat_drift_func(x_final) * alpha + x_final * (1.0 - alpha) # Mix the modified latent with the existing latent by the alpha + + if divisive_norm_multiplier > 0: + alpha = 1. - (timestep / 999.0)[:, None, None, None].clone() + alpha ** 0.025 # Alpha might as well be 1, but we want to protect the beginning steps (?). + alpha *= divisive_norm_multiplier + high_noise = divisive_normalization(x_final, (divisive_norm_size * 2) + 1) + x_final = high_noise * alpha + x_final * (1.0 - alpha) + + + return x_input - (x - x_final * sigma / (sigma * sigma + 1.0) ** 0.5) # General formula for CFG. uncond + (cond - uncond) * cond_scale + + m = model.clone() + m.set_model_sampler_cfg_function(modify_latent) + return (m, ) \ No newline at end of file diff --git a/extensions-builtin/sd_forge_latent_modifier/scripts/forge_latent_modifier.py b/extensions-builtin/sd_forge_latent_modifier/scripts/forge_latent_modifier.py new file mode 100644 index 0000000000000000000000000000000000000000..172efb222d546b4cd1ba7891cd9df2aa46ebbb5d --- /dev/null +++ b/extensions-builtin/sd_forge_latent_modifier/scripts/forge_latent_modifier.py @@ -0,0 +1,106 @@ +import gradio as gr +from modules import scripts + +from lib_latent_modifier.sampler_mega_modifier import ModelSamplerLatentMegaModifier + +opModelSamplerLatentMegaModifier = ModelSamplerLatentMegaModifier().mega_modify + + +class LatentModifierForForge(scripts.Script): + sorting_priority = 15 + + def title(self): + return "LatentModifier Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + sharpness_multiplier = gr.Slider(label='Sharpness Multiplier', minimum=-100.0, maximum=100.0, step=0.1, + value=0.0) + sharpness_method = gr.Radio(label='Sharpness Method', + choices=['anisotropic', 'joint-anisotropic', 'gaussian', 'cas'], + value='anisotropic') + tonemap_multiplier = gr.Slider(label='Tonemap Multiplier', minimum=0.0, maximum=100.0, step=0.01, value=0.0) + tonemap_method = gr.Radio(label='Tonemap Method', + choices=['reinhard', 'reinhard_perchannel', 'arctan', 'quantile', 'gated', + 'cfg-mimic', 'spatial-norm'], value='reinhard') + tonemap_percentile = gr.Slider(label='Tonemap Percentile', minimum=0.0, maximum=100.0, step=0.005, + value=100.0) + contrast_multiplier = gr.Slider(label='Contrast Multiplier', minimum=-100.0, maximum=100.0, step=0.1, + value=0.0) + combat_method = gr.Radio(label='Combat Method', + choices=['subtract', 'subtract_channels', 'subtract_median', 'sharpen'], + value='subtract') + combat_cfg_drift = gr.Slider(label='Combat Cfg Drift', minimum=-10.0, maximum=10.0, step=0.01, value=0.0) + rescale_cfg_phi = gr.Slider(label='Rescale Cfg Phi', minimum=-10.0, maximum=10.0, step=0.01, value=0.0) + extra_noise_type = gr.Radio(label='Extra Noise Type', + choices=['gaussian', 'uniform', 'perlin', 'pink', 'green', 'pyramid'], + value='gaussian') + extra_noise_method = gr.Radio(label='Extra Noise Method', + choices=['add', 'add_scaled', 'speckle', 'cads', 'cads_rescaled', + 'cads_speckle', 'cads_speckle_rescaled'], value='add') + extra_noise_multiplier = gr.Slider(label='Extra Noise Multiplier', minimum=0.0, maximum=100.0, step=0.1, + value=0.0) + extra_noise_lowpass = gr.Slider(label='Extra Noise Lowpass', minimum=0, maximum=1000, step=1, value=100) + divisive_norm_size = gr.Slider(label='Divisive Norm Size', minimum=1, maximum=255, step=1, value=127) + divisive_norm_multiplier = gr.Slider(label='Divisive Norm Multiplier', minimum=0.0, maximum=1.0, step=0.01, + value=0.0) + spectral_mod_mode = gr.Radio(label='Spectral Mod Mode', choices=['hard_clamp', 'soft_clamp'], + value='hard_clamp') + spectral_mod_percentile = gr.Slider(label='Spectral Mod Percentile', minimum=0.0, maximum=50.0, step=0.01, + value=5.0) + spectral_mod_multiplier = gr.Slider(label='Spectral Mod Multiplier', minimum=-15.0, maximum=15.0, step=0.01, + value=0.0) + affect_uncond = gr.Radio(label='Affect Uncond', choices=['None', 'Sharpness'], value='None') + dyn_cfg_augmentation = gr.Radio(label='Dyn Cfg Augmentation', + choices=['None', 'dyncfg-halfcosine', 'dyncfg-halfcosine-mimic'], + value='None') + + return enabled, sharpness_multiplier, sharpness_method, tonemap_multiplier, tonemap_method, tonemap_percentile, contrast_multiplier, combat_method, combat_cfg_drift, rescale_cfg_phi, extra_noise_type, extra_noise_method, extra_noise_multiplier, extra_noise_lowpass, divisive_norm_size, divisive_norm_multiplier, spectral_mod_mode, spectral_mod_percentile, spectral_mod_multiplier, affect_uncond, dyn_cfg_augmentation + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + enabled, sharpness_multiplier, sharpness_method, tonemap_multiplier, tonemap_method, tonemap_percentile, contrast_multiplier, combat_method, combat_cfg_drift, rescale_cfg_phi, extra_noise_type, extra_noise_method, extra_noise_multiplier, extra_noise_lowpass, divisive_norm_size, divisive_norm_multiplier, spectral_mod_mode, spectral_mod_percentile, spectral_mod_multiplier, affect_uncond, dyn_cfg_augmentation = script_args + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opModelSamplerLatentMegaModifier(unet, sharpness_multiplier, sharpness_method, tonemap_multiplier, tonemap_method, tonemap_percentile, contrast_multiplier, combat_method, combat_cfg_drift, rescale_cfg_phi, extra_noise_type, extra_noise_method, extra_noise_multiplier, extra_noise_lowpass, divisive_norm_size, divisive_norm_multiplier, spectral_mod_mode, spectral_mod_percentile, spectral_mod_multiplier, affect_uncond, dyn_cfg_augmentation, seed=p.seeds[0])[0] + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + latent_modifier_enabled=enabled, + latent_modifier_sharpness_multiplier=sharpness_multiplier, + latent_modifier_sharpness_method=sharpness_method, + latent_modifier_tonemap_multiplier=tonemap_multiplier, + latent_modifier_tonemap_method=tonemap_method, + latent_modifier_tonemap_percentile=tonemap_percentile, + latent_modifier_contrast_multiplier=contrast_multiplier, + latent_modifier_combat_method=combat_method, + latent_modifier_combat_cfg_drift=combat_cfg_drift, + latent_modifier_rescale_cfg_phi=rescale_cfg_phi, + latent_modifier_extra_noise_type=extra_noise_type, + latent_modifier_extra_noise_method=extra_noise_method, + latent_modifier_extra_noise_multiplier=extra_noise_multiplier, + latent_modifier_extra_noise_lowpass=extra_noise_lowpass, + latent_modifier_divisive_norm_size=divisive_norm_size, + latent_modifier_divisive_norm_multiplier=divisive_norm_multiplier, + latent_modifier_spectral_mod_mode=spectral_mod_mode, + latent_modifier_spectral_mod_percentile=spectral_mod_percentile, + latent_modifier_spectral_mod_multiplier=spectral_mod_multiplier, + latent_modifier_affect_uncond=affect_uncond, + latent_modifier_dyn_cfg_augmentation=dyn_cfg_augmentation, + )) + + return diff --git a/extensions-builtin/sd_forge_multidiffusion/lib_multidiffusion/tiled_diffusion.py b/extensions-builtin/sd_forge_multidiffusion/lib_multidiffusion/tiled_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..398904043aede759f9d14928d929d85c540f2cdb --- /dev/null +++ b/extensions-builtin/sd_forge_multidiffusion/lib_multidiffusion/tiled_diffusion.py @@ -0,0 +1,616 @@ +# Tiled Diffusion +# 1st edit by https://github.com/pkuliyi2015/multidiffusion-upscaler-for-automatic1111 +# 2nd edit by https://github.com/shiimizu/ComfyUI-TiledDiffusion +# 3rd edit by Forge Official + + +from __future__ import division +import torch +from torch import Tensor +import ldm_patched.modules.model_management +from ldm_patched.modules.model_patcher import ModelPatcher +import ldm_patched.modules.model_patcher +from ldm_patched.modules.model_base import BaseModel +from typing import List, Union, Tuple, Dict +from ldm_patched.contrib.external import ImageScale +import ldm_patched.modules.utils +from ldm_patched.modules.controlnet import ControlNet, T2IAdapter + +opt_C = 4 +opt_f = 8 + +def ceildiv(big, small): + # Correct ceiling division that avoids floating-point errors and importing math.ceil. + return -(big // -small) + +from enum import Enum +class BlendMode(Enum): # i.e. LayerType + FOREGROUND = 'Foreground' + BACKGROUND = 'Background' + +class Processing: ... +class Device: ... +devices = Device() +devices.device = ldm_patched.modules.model_management.get_torch_device() + +def null_decorator(fn): + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + return wrapper + +keep_signature = null_decorator +controlnet = null_decorator +stablesr = null_decorator +grid_bbox = null_decorator +custom_bbox = null_decorator +noise_inverse = null_decorator + +class BBox: + ''' grid bbox ''' + + def __init__(self, x:int, y:int, w:int, h:int): + self.x = x + self.y = y + self.w = w + self.h = h + self.box = [x, y, x+w, y+h] + self.slicer = slice(None), slice(None), slice(y, y+h), slice(x, x+w) + + def __getitem__(self, idx:int) -> int: + return self.box[idx] + +def split_bboxes(w:int, h:int, tile_w:int, tile_h:int, overlap:int=16, init_weight:Union[Tensor, float]=1.0) -> Tuple[List[BBox], Tensor]: + cols = ceildiv((w - overlap) , (tile_w - overlap)) + rows = ceildiv((h - overlap) , (tile_h - overlap)) + dx = (w - tile_w) / (cols - 1) if cols > 1 else 0 + dy = (h - tile_h) / (rows - 1) if rows > 1 else 0 + + bbox_list: List[BBox] = [] + weight = torch.zeros((1, 1, h, w), device=devices.device, dtype=torch.float32) + for row in range(rows): + y = min(int(row * dy), h - tile_h) + for col in range(cols): + x = min(int(col * dx), w - tile_w) + + bbox = BBox(x, y, tile_w, tile_h) + bbox_list.append(bbox) + weight[bbox.slicer] += init_weight + + return bbox_list, weight + +class CustomBBox(BBox): + ''' region control bbox ''' + pass + +class AbstractDiffusion: + def __init__(self): + self.method = self.__class__.__name__ + self.pbar = None + + + self.w: int = 0 + self.h: int = 0 + self.tile_width: int = None + self.tile_height: int = None + self.tile_overlap: int = None + self.tile_batch_size: int = None + + # cache. final result of current sampling step, [B, C=4, H//8, W//8] + # avoiding overhead of creating new tensors and weight summing + self.x_buffer: Tensor = None + # self.w: int = int(self.p.width // opt_f) # latent size + # self.h: int = int(self.p.height // opt_f) + # weights for background & grid bboxes + self._weights: Tensor = None + # self.weights: Tensor = torch.zeros((1, 1, self.h, self.w), device=devices.device, dtype=torch.float32) + self._init_grid_bbox = None + self._init_done = None + + # count the step correctly + self.step_count = 0 + self.inner_loop_count = 0 + self.kdiff_step = -1 + + # ext. Grid tiling painting (grid bbox) + self.enable_grid_bbox: bool = False + self.tile_w: int = None + self.tile_h: int = None + self.tile_bs: int = None + self.num_tiles: int = None + self.num_batches: int = None + self.batched_bboxes: List[List[BBox]] = [] + + # ext. Region Prompt Control (custom bbox) + self.enable_custom_bbox: bool = False + self.custom_bboxes: List[CustomBBox] = [] + # self.cond_basis: Cond = None + # self.uncond_basis: Uncond = None + # self.draw_background: bool = True # by default we draw major prompts in grid tiles + # self.causal_layers: bool = None + + # ext. ControlNet + self.enable_controlnet: bool = False + # self.controlnet_script: ModuleType = None + self.control_tensor_batch_dict = {} + self.control_tensor_batch: List[List[Tensor]] = [[]] + # self.control_params: Dict[str, Tensor] = None # {} + self.control_params: Dict[Tuple, List[List[Tensor]]] = {} + self.control_tensor_cpu: bool = None + self.control_tensor_custom: List[List[Tensor]] = [] + + self.draw_background: bool = True # by default we draw major prompts in grid tiles + self.control_tensor_cpu = False + self.weights = None + self.imagescale = ImageScale() + + def reset(self): + tile_width = self.tile_width + tile_height = self.tile_height + tile_overlap = self.tile_overlap + tile_batch_size = self.tile_batch_size + self.__init__() + self.tile_width = tile_width + self.tile_height = tile_height + self.tile_overlap = tile_overlap + self.tile_batch_size = tile_batch_size + + def repeat_tensor(self, x:Tensor, n:int, concat=False, concat_to=0) -> Tensor: + ''' repeat the tensor on it's first dim ''' + if n == 1: return x + B = x.shape[0] + r_dims = len(x.shape) - 1 + if B == 1: # batch_size = 1 (not `tile_batch_size`) + shape = [n] + [-1] * r_dims # [N, -1, ...] + return x.expand(shape) # `expand` is much lighter than `tile` + else: + if concat: + return torch.cat([x for _ in range(n)], dim=0)[:concat_to] + shape = [n] + [1] * r_dims # [N, 1, ...] + return x.repeat(shape) + def update_pbar(self): + if self.pbar.n >= self.pbar.total: + self.pbar.close() + else: + # self.pbar.update() + sampling_step = 20 + if self.step_count == sampling_step: + self.inner_loop_count += 1 + if self.inner_loop_count < self.total_bboxes: + self.pbar.update() + else: + self.step_count = sampling_step + self.inner_loop_count = 0 + def reset_buffer(self, x_in:Tensor): + # Judge if the shape of x_in is the same as the shape of x_buffer + if self.x_buffer is None or self.x_buffer.shape != x_in.shape: + self.x_buffer = torch.zeros_like(x_in, device=x_in.device, dtype=x_in.dtype) + else: + self.x_buffer.zero_() + + @grid_bbox + def init_grid_bbox(self, tile_w:int, tile_h:int, overlap:int, tile_bs:int): + # if self._init_grid_bbox is not None: return + # self._init_grid_bbox = True + self.weights = torch.zeros((1, 1, self.h, self.w), device=devices.device, dtype=torch.float32) + self.enable_grid_bbox = True + + self.tile_w = min(tile_w, self.w) + self.tile_h = min(tile_h, self.h) + overlap = max(0, min(overlap, min(tile_w, tile_h) - 4)) + # split the latent into overlapped tiles, then batching + # weights basically indicate how many times a pixel is painted + bboxes, weights = split_bboxes(self.w, self.h, self.tile_w, self.tile_h, overlap, self.get_tile_weights()) + self.weights += weights + self.num_tiles = len(bboxes) + self.num_batches = ceildiv(self.num_tiles , tile_bs) + self.tile_bs = ceildiv(len(bboxes) , self.num_batches) # optimal_batch_size + self.batched_bboxes = [bboxes[i*self.tile_bs:(i+1)*self.tile_bs] for i in range(self.num_batches)] + + @grid_bbox + def get_tile_weights(self) -> Union[Tensor, float]: + return 1.0 + + @noise_inverse + def init_noise_inverse(self, steps:int, retouch:float, get_cache_callback, set_cache_callback, renoise_strength:float, renoise_kernel:int): + self.noise_inverse_enabled = True + self.noise_inverse_steps = steps + self.noise_inverse_retouch = float(retouch) + self.noise_inverse_renoise_strength = float(renoise_strength) + self.noise_inverse_renoise_kernel = int(renoise_kernel) + self.noise_inverse_set_cache = set_cache_callback + self.noise_inverse_get_cache = get_cache_callback + + def init_done(self): + ''' + Call this after all `init_*`, settings are done, now perform: + - settings sanity check + - pre-computations, cache init + - anything thing needed before denoising starts + ''' + + # if self._init_done is not None: return + # self._init_done = True + self.total_bboxes = 0 + if self.enable_grid_bbox: self.total_bboxes += self.num_batches + if self.enable_custom_bbox: self.total_bboxes += len(self.custom_bboxes) + assert self.total_bboxes > 0, "Nothing to paint! No background to draw and no custom bboxes were provided." + + # sampling_steps = _steps + # self.pbar = tqdm(total=(self.total_bboxes) * sampling_steps, desc=f"{self.method} Sampling: ") + + @controlnet + def prepare_controlnet_tensors(self, refresh:bool=False, tensor=None): + ''' Crop the control tensor into tiles and cache them ''' + if not refresh: + if self.control_tensor_batch is not None or self.control_params is not None: return + tensors = [tensor] + self.org_control_tensor_batch = tensors + self.control_tensor_batch = [] + for i in range(len(tensors)): + control_tile_list = [] + control_tensor = tensors[i] + for bboxes in self.batched_bboxes: + single_batch_tensors = [] + for bbox in bboxes: + if len(control_tensor.shape) == 3: + control_tensor.unsqueeze_(0) + control_tile = control_tensor[:, :, bbox[1]*opt_f:bbox[3]*opt_f, bbox[0]*opt_f:bbox[2]*opt_f] + single_batch_tensors.append(control_tile) + control_tile = torch.cat(single_batch_tensors, dim=0) + if self.control_tensor_cpu: + control_tile = control_tile.cpu() + control_tile_list.append(control_tile) + self.control_tensor_batch.append(control_tile_list) + + if len(self.custom_bboxes) > 0: + custom_control_tile_list = [] + for bbox in self.custom_bboxes: + if len(control_tensor.shape) == 3: + control_tensor.unsqueeze_(0) + control_tile = control_tensor[:, :, bbox[1]*opt_f:bbox[3]*opt_f, bbox[0]*opt_f:bbox[2]*opt_f] + if self.control_tensor_cpu: + control_tile = control_tile.cpu() + custom_control_tile_list.append(control_tile) + self.control_tensor_custom.append(custom_control_tile_list) + + @controlnet + def switch_controlnet_tensors(self, batch_id:int, x_batch_size:int, tile_batch_size:int, is_denoise=False): + # if not self.enable_controlnet: return + if self.control_tensor_batch is None: return + # self.control_params = [0] + + # for param_id in range(len(self.control_params)): + for param_id in range(len(self.control_tensor_batch)): + # tensor that was concatenated in `prepare_controlnet_tensors` + control_tile = self.control_tensor_batch[param_id][batch_id] + # broadcast to latent batch size + if x_batch_size > 1: # self.is_kdiff: + all_control_tile = [] + for i in range(tile_batch_size): + this_control_tile = [control_tile[i].unsqueeze(0)] * x_batch_size + all_control_tile.append(torch.cat(this_control_tile, dim=0)) + control_tile = torch.cat(all_control_tile, dim=0) # [:x_tile.shape[0]] + self.control_tensor_batch[param_id][batch_id] = control_tile + # else: + # control_tile = control_tile.repeat([x_batch_size if is_denoise else x_batch_size * 2, 1, 1, 1]) + # self.control_params[param_id].hint_cond = control_tile.to(devices.device) + + def process_controlnet(self, x_shape, x_dtype, c_in: dict, cond_or_uncond: List, bboxes, batch_size: int, batch_id: int): + control: ControlNet = c_in['control_model'] + param_id = -1 # current controlnet & previous_controlnets + tuple_key = tuple(cond_or_uncond) + tuple(x_shape) + while control is not None: + param_id += 1 + PH, PW = self.h*8, self.w*8 + + if self.control_params.get(tuple_key, None) is None: + self.control_params[tuple_key] = [[None]] + val = self.control_params[tuple_key] + if param_id+1 >= len(val): + val.extend([[None] for _ in range(param_id+1)]) + if len(self.batched_bboxes) >= len(val[param_id]): + val[param_id].extend([[None] for _ in range(len(self.batched_bboxes))]) + + # Below is taken from ldm_patched.modules.controlnet.py, but we need to additionally tile the cnets. + # if statement: eager eval. first time when cond_hint is None. + if self.refresh or control.cond_hint is None or not isinstance(self.control_params[tuple_key][param_id][batch_id], Tensor): + dtype = getattr(control, 'manual_cast_dtype', None) + if dtype is None: dtype = getattr(getattr(control, 'control_model', None), 'dtype', None) + if dtype is None: dtype = x_dtype + if isinstance(control, T2IAdapter): + width, height = control.scale_image_to(PW, PH) + control.cond_hint = ldm_patched.modules.utils.common_upscale(control.cond_hint_original, width, height, 'nearest-exact', "center").float().to(control.device) + if control.channels_in == 1 and control.cond_hint.shape[1] > 1: + control.cond_hint = torch.mean(control.cond_hint, 1, keepdim=True) + elif control.__class__.__name__ == 'ControlLLLiteAdvanced': + if control.sub_idxs is not None and control.cond_hint_original.shape[0] >= control.full_latent_length: + control.cond_hint = ldm_patched.modules.utils.common_upscale(control.cond_hint_original[control.sub_idxs], PW, PH, 'nearest-exact', "center").to(dtype=dtype, device=control.device) + else: + if (PH, PW) == (control.cond_hint_original.shape[-2], control.cond_hint_original.shape[-1]): + control.cond_hint = control.cond_hint_original.clone().to(dtype=dtype, device=control.device) + else: + control.cond_hint = ldm_patched.modules.utils.common_upscale(control.cond_hint_original, PW, PH, 'nearest-exact', "center").to(dtype=dtype, device=control.device) + else: + if (PH, PW) == (control.cond_hint_original.shape[-2], control.cond_hint_original.shape[-1]): + control.cond_hint = control.cond_hint_original.clone().to(dtype=dtype, device=control.device) + else: + control.cond_hint = ldm_patched.modules.utils.common_upscale(control.cond_hint_original, PW, PH, 'nearest-exact', 'center').to(dtype=dtype, device=control.device) + + # Broadcast then tile + # + # Below can be in the parent's if clause because self.refresh will trigger on resolution change, e.g. cause of ConditioningSetArea + # so that particular case isn't cached atm. + cond_hint_pre_tile = control.cond_hint + if control.cond_hint.shape[0] < batch_size : + cond_hint_pre_tile = self.repeat_tensor(control.cond_hint, ceildiv(batch_size, control.cond_hint.shape[0]))[:batch_size] + cns = [cond_hint_pre_tile[:, :, bbox[1]*opt_f:bbox[3]*opt_f, bbox[0]*opt_f:bbox[2]*opt_f] for bbox in bboxes] + control.cond_hint = torch.cat(cns, dim=0) + self.control_params[tuple_key][param_id][batch_id]=control.cond_hint + else: + control.cond_hint = self.control_params[tuple_key][param_id][batch_id] + control = control.previous_controlnet + +import numpy as np +from numpy import pi, exp, sqrt +def gaussian_weights(tile_w:int, tile_h:int) -> Tensor: + ''' + Copy from the original implementation of Mixture of Diffusers + https://github.com/albarji/mixture-of-diffusers/blob/master/mixdiff/tiling.py + This generates gaussian weights to smooth the noise of each tile. + This is critical for this method to work. + ''' + f = lambda x, midpoint, var=0.01: exp(-(x-midpoint)*(x-midpoint) / (tile_w*tile_w) / (2*var)) / sqrt(2*pi*var) + x_probs = [f(x, (tile_w - 1) / 2) for x in range(tile_w)] # -1 because index goes from 0 to latent_width - 1 + y_probs = [f(y, tile_h / 2) for y in range(tile_h)] + + w = np.outer(y_probs, x_probs) + return torch.from_numpy(w).to(devices.device, dtype=torch.float32) + +class CondDict: ... + +class MultiDiffusion(AbstractDiffusion): + + @torch.no_grad() + def __call__(self, model_function: BaseModel.apply_model, args: dict): + x_in: Tensor = args["input"] + t_in: Tensor = args["timestep"] + c_in: dict = args["c"] + cond_or_uncond: List = args["cond_or_uncond"] + c_crossattn: Tensor = c_in['c_crossattn'] + + N, C, H, W = x_in.shape + + # ldm_patched.modulesui can feed in a latent that's a different size cause of SetArea, so we'll refresh in that case. + self.refresh = False + if self.weights is None or self.h != H or self.w != W: + self.h, self.w = H, W + self.refresh = True + self.init_grid_bbox(self.tile_width, self.tile_height, self.tile_overlap, self.tile_batch_size) + # init everything done, perform sanity check & pre-computations + self.init_done() + self.h, self.w = H, W + # clear buffer canvas + self.reset_buffer(x_in) + + # Background sampling (grid bbox) + if self.draw_background: + for batch_id, bboxes in enumerate(self.batched_bboxes): + if ldm_patched.modules.model_management.processing_interrupted(): + # self.pbar.close() + return x_in + + # batching & compute tiles + x_tile = torch.cat([x_in[bbox.slicer] for bbox in bboxes], dim=0) # [TB, C, TH, TW] + n_rep = len(bboxes) + ts_tile = self.repeat_tensor(t_in, n_rep) + cond_tile = self.repeat_tensor(c_crossattn, n_rep) + c_tile = c_in.copy() + c_tile['c_crossattn'] = cond_tile + if 'time_context' in c_in: + c_tile['time_context'] = self.repeat_tensor(c_in['time_context'], n_rep) + for key in c_tile: + if key in ['y', 'c_concat']: + icond = c_tile[key] + if icond.shape[2:] == (self.h, self.w): + c_tile[key] = torch.cat([icond[bbox.slicer] for bbox in bboxes]) + else: + c_tile[key] = self.repeat_tensor(icond, n_rep) + + # controlnet tiling + # self.switch_controlnet_tensors(batch_id, N, len(bboxes)) + if 'control' in c_in: + self.process_controlnet(x_tile.shape, x_tile.dtype, c_in, cond_or_uncond, bboxes, N, batch_id) + c_tile['control'] = c_in['control_model'].get_control(x_tile, ts_tile, c_tile, len(cond_or_uncond)) + + # stablesr tiling + # self.switch_stablesr_tensors(batch_id) + + x_tile_out = model_function(x_tile, ts_tile, **c_tile) + + for i, bbox in enumerate(bboxes): + self.x_buffer[bbox.slicer] += x_tile_out[i*N:(i+1)*N, :, :, :] + del x_tile_out, x_tile, ts_tile, c_tile + + # update progress bar + # self.update_pbar() + + # Averaging background buffer + x_out = torch.where(self.weights > 1, self.x_buffer / self.weights, self.x_buffer) + + return x_out + +class MixtureOfDiffusers(AbstractDiffusion): + """ + Mixture-of-Diffusers Implementation + https://github.com/albarji/mixture-of-diffusers + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # weights for custom bboxes + self.custom_weights: List[Tensor] = [] + self.get_weight = gaussian_weights + + def init_done(self): + super().init_done() + # The original gaussian weights can be extremely small, so we rescale them for numerical stability + self.rescale_factor = 1 / self.weights + # Meanwhile, we rescale the custom weights in advance to save time of slicing + for bbox_id, bbox in enumerate(self.custom_bboxes): + if bbox.blend_mode == BlendMode.BACKGROUND: + self.custom_weights[bbox_id] *= self.rescale_factor[bbox.slicer] + + @grid_bbox + def get_tile_weights(self) -> Tensor: + # weights for grid bboxes + # if not hasattr(self, 'tile_weights'): + # x_in can change sizes cause of ConditioningSetArea, so we have to recalcualte each time + self.tile_weights = self.get_weight(self.tile_w, self.tile_h) + return self.tile_weights + + @torch.no_grad() + def __call__(self, model_function: BaseModel.apply_model, args: dict): + x_in: Tensor = args["input"] + t_in: Tensor = args["timestep"] + c_in: dict = args["c"] + cond_or_uncond: List= args["cond_or_uncond"] + c_crossattn: Tensor = c_in['c_crossattn'] + + N, C, H, W = x_in.shape + + self.refresh = False + # self.refresh = True + if self.weights is None or self.h != H or self.w != W: + self.h, self.w = H, W + self.refresh = True + self.init_grid_bbox(self.tile_width, self.tile_height, self.tile_overlap, self.tile_batch_size) + # init everything done, perform sanity check & pre-computations + self.init_done() + self.h, self.w = H, W + # clear buffer canvas + self.reset_buffer(x_in) + + # self.pbar = tqdm(total=(self.total_bboxes) * sampling_steps, desc=f"{self.method} Sampling: ") + # self.pbar = tqdm(total=len(self.batched_bboxes), desc=f"{self.method} Sampling: ") + + # Global sampling + if self.draw_background: + for batch_id, bboxes in enumerate(self.batched_bboxes): # batch_id is the `Latent tile batch size` + if ldm_patched.modules.model_management.processing_interrupted(): + # self.pbar.close() + return x_in + + # batching + x_tile_list = [] + t_tile_list = [] + icond_map = {} + # tcond_tile_list = [] + # icond_tile_list = [] + # vcond_tile_list = [] + # control_list = [] + for bbox in bboxes: + x_tile_list.append(x_in[bbox.slicer]) + t_tile_list.append(t_in) + if isinstance(c_in, dict): + # tcond + # tcond_tile = c_crossattn #self.get_tcond(c_in) # cond, [1, 77, 768] + # tcond_tile_list.append(tcond_tile) + # present in sdxl + for key in ['y', 'c_concat']: + if key in c_in: + icond=c_in[key] # self.get_icond(c_in) + if icond.shape[2:] == (self.h, self.w): + icond = icond[bbox.slicer] + if icond_map.get(key, None) is None: + icond_map[key] = [] + icond_map[key].append(icond) + # # vcond: + # vcond = self.get_vcond(c_in) + # vcond_tile_list.append(vcond) + else: + print('>> [WARN] not supported, make an issue on github!!') + n_rep = len(bboxes) + x_tile = torch.cat(x_tile_list, dim=0) # differs each + t_tile = self.repeat_tensor(t_in, n_rep) # just repeat + tcond_tile = self.repeat_tensor(c_crossattn, n_rep) # just repeat + c_tile = c_in.copy() + c_tile['c_crossattn'] = tcond_tile + if 'time_context' in c_in: + c_tile['time_context'] = self.repeat_tensor(c_in['time_context'], n_rep) # just repeat + for key in c_tile: + if key in ['y', 'c_concat']: + icond_tile = torch.cat(icond_map[key], dim=0) # differs each + c_tile[key] = icond_tile + # vcond_tile = torch.cat(vcond_tile_list, dim=0) if None not in vcond_tile_list else None # just repeat + + # controlnet + # self.switch_controlnet_tensors(batch_id, N, len(bboxes), is_denoise=True) + if 'control' in c_in: + control=c_in['control'] + self.process_controlnet(x_tile.shape, x_tile.dtype, c_in, cond_or_uncond, bboxes, N, batch_id) + c_tile['control'] = control.get_control(x_tile, t_tile, c_tile, len(cond_or_uncond)) + + # stablesr + # self.switch_stablesr_tensors(batch_id) + + # denoising: here the x is the noise + x_tile_out = model_function(x_tile, t_tile, **c_tile) + + # de-batching + for i, bbox in enumerate(bboxes): + # These weights can be calcluated in advance, but will cost a lot of vram + # when you have many tiles. So we calculate it here. + w = self.tile_weights * self.rescale_factor[bbox.slicer] + self.x_buffer[bbox.slicer] += x_tile_out[i*N:(i+1)*N, :, :, :] * w + del x_tile_out, x_tile, t_tile, c_tile + + # self.update_pbar() + # self.pbar.update() + # self.pbar.close() + x_out = self.x_buffer + + return x_out + + +MAX_RESOLUTION=8192 +class TiledDiffusion(): + @classmethod + def INPUT_TYPES(s): + return {"required": {"model": ("MODEL", ), + "method": (["MultiDiffusion", "Mixture of Diffusers"], {"default": "Mixture of Diffusers"}), + # "tile_width": ("INT", {"default": 96, "min": 16, "max": 256, "step": 16}), + "tile_width": ("INT", {"default": 96*opt_f, "min": 16, "max": MAX_RESOLUTION, "step": 16}), + # "tile_height": ("INT", {"default": 96, "min": 16, "max": 256, "step": 16}), + "tile_height": ("INT", {"default": 96*opt_f, "min": 16, "max": MAX_RESOLUTION, "step": 16}), + "tile_overlap": ("INT", {"default": 8*opt_f, "min": 0, "max": 256*opt_f, "step": 4*opt_f}), + "tile_batch_size": ("INT", {"default": 4, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "apply" + CATEGORY = "_for_testing" + + def apply(self, model: ModelPatcher, method, tile_width, tile_height, tile_overlap, tile_batch_size): + if method == "Mixture of Diffusers": + implement = MixtureOfDiffusers() + else: + implement = MultiDiffusion() + + # if noise_inversion: + # get_cache_callback = self.noise_inverse_get_cache + # set_cache_callback = None # lambda x0, xt, prompts: self.noise_inverse_set_cache(p, x0, xt, prompts, steps, retouch) + # implement.init_noise_inverse(steps, retouch, get_cache_callback, set_cache_callback, renoise_strength, renoise_kernel_size) + + implement.tile_width = tile_width // opt_f + implement.tile_height = tile_height // opt_f + implement.tile_overlap = tile_overlap // opt_f + implement.tile_batch_size = tile_batch_size + # implement.init_grid_bbox(tile_width, tile_height, tile_overlap, tile_batch_size) + # # init everything done, perform sanity check & pre-computations + # implement.init_done() + # hijack the behaviours + # implement.hook() + model = model.clone() + model.set_model_unet_function_wrapper(implement) + model.model_options['tiled_diffusion'] = True + return (model,) diff --git a/extensions-builtin/sd_forge_multidiffusion/scripts/forge_multidiffusion.py b/extensions-builtin/sd_forge_multidiffusion/scripts/forge_multidiffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..cbc9225e92ecdc1f348657edd32a033e8c72805b --- /dev/null +++ b/extensions-builtin/sd_forge_multidiffusion/scripts/forge_multidiffusion.py @@ -0,0 +1,58 @@ +import gradio as gr +from modules import scripts + +from lib_multidiffusion.tiled_diffusion import TiledDiffusion + + +opTiledDiffusion = TiledDiffusion().apply + + +class MultiDiffusionForForge(scripts.Script): + sorting_priority = 16 + + def title(self): + return "MultiDiffusion Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + method = gr.Radio(label='Method', choices=['MultiDiffusion', 'Mixture of Diffusers'], + value='MultiDiffusion') + tile_width = gr.Slider(label='Tile Width', minimum=16, maximum=8192, step=16, value=768) + tile_height = gr.Slider(label='Tile Height', minimum=16, maximum=8192, step=16, value=768) + tile_overlap = gr.Slider(label='Tile Overlap', minimum=0, maximum=2048, step=32, value=64) + tile_batch_size = gr.Slider(label='Tile Batch Size', minimum=1, maximum=8192, step=1, value=4) + + return enabled, method, tile_width, tile_height, tile_overlap, tile_batch_size + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + enabled, method, tile_width, tile_height, tile_overlap, tile_batch_size = script_args + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opTiledDiffusion(unet, method, tile_width, tile_height, tile_overlap, tile_batch_size)[0] + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + multidiffusion_enabled=enabled, + multidiffusion_method=method, + multidiffusion_tile_width=tile_width, + multidiffusion_tile_height=tile_height, + multidiffusion_tile_overlap=tile_overlap, + multidiffusion_tile_batch_size=tile_batch_size, + )) + + return diff --git a/extensions-builtin/sd_forge_neveroom/scripts/forge_never_oom.py b/extensions-builtin/sd_forge_neveroom/scripts/forge_never_oom.py new file mode 100644 index 0000000000000000000000000000000000000000..374e81233fe4c4e4a3e09f8fdc7340780d33d4fb --- /dev/null +++ b/extensions-builtin/sd_forge_neveroom/scripts/forge_never_oom.py @@ -0,0 +1,47 @@ +import gradio as gr + +from modules import scripts +from ldm_patched.modules import model_management + + +class NeverOOMForForge(scripts.Script): + sorting_priority = 18 + + def __init__(self): + self.previous_unet_enabled = False + self.original_vram_state = model_management.vram_state + + def title(self): + return "Never OOM Integrated" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + unet_enabled = gr.Checkbox(label='Enabled for UNet (always maximize offload)', value=False) + vae_enabled = gr.Checkbox(label='Enabled for VAE (always tiled)', value=False) + return unet_enabled, vae_enabled + + def process(self, p, *script_args, **kwargs): + unet_enabled, vae_enabled = script_args + + if unet_enabled: + print('NeverOOM Enabled for UNet (always maximize offload)') + + if vae_enabled: + print('NeverOOM Enabled for VAE (always tiled)') + + model_management.VAE_ALWAYS_TILED = vae_enabled + + if self.previous_unet_enabled != unet_enabled: + model_management.unload_all_models() + if unet_enabled: + self.original_vram_state = model_management.vram_state + model_management.vram_state = model_management.VRAMState.NO_VRAM + else: + model_management.vram_state = self.original_vram_state + print(f'VARM State Changed To {model_management.vram_state.name}') + self.previous_unet_enabled = unet_enabled + + return diff --git a/extensions-builtin/sd_forge_photomaker/scripts/forge_photomaker.py b/extensions-builtin/sd_forge_photomaker/scripts/forge_photomaker.py new file mode 100644 index 0000000000000000000000000000000000000000..016fbaeefa33b6b9b9415cd157196c6f1089dfdf --- /dev/null +++ b/extensions-builtin/sd_forge_photomaker/scripts/forge_photomaker.py @@ -0,0 +1,67 @@ +from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter +from modules_forge.shared import add_supported_preprocessor +from modules_forge.shared import add_supported_control_model +from modules_forge.supported_controlnet import ControlModelPatcher +from ldm_patched.contrib.external_photomaker import PhotoMakerEncode, PhotoMakerIDEncoder + + +opPhotoMakerEncode = PhotoMakerEncode().apply_photomaker + + +class PreprocessorClipvisionForPhotomaker(Preprocessor): + def __init__(self, name): + super().__init__() + self.name = name + self.tags = ['PhotoMaker'] + self.model_filename_filters = ['PhotoMaker', 'Photo_Maker', 'Photo-Maker'] + self.sorting_priority = 20 + self.slider_resolution = PreprocessorParameter(visible=False) + self.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab = False + self.show_control_mode = False + + +add_supported_preprocessor(PreprocessorClipvisionForPhotomaker( + name='ClipVision (Photomaker)', +)) + + +class PhotomakerPatcher(ControlModelPatcher): + @staticmethod + def try_build_from_state_dict(state_dict, ckpt_path): + if "id_encoder" not in state_dict: + return None + + state_dict = state_dict["id_encoder"] + + photomaker_model = PhotoMakerIDEncoder() + photomaker_model.load_state_dict(state_dict) + + return PhotomakerPatcher(photomaker_model) + + def __init__(self, model): + super().__init__() + self.model = model + return + + def process_before_every_sampling(self, process, cond, mask, *args, **kwargs): + unet = process.sd_model.forge_objects.unet.clone() + clip = process.sd_model.forge_objects.clip + text = process.prompts[0] + + cond_modified = opPhotoMakerEncode(photomaker=self.model, image=cond.movedim(1, -1), clip=clip, text=text)[0] + cond_modified = unet.encode_conds_after_clip(conds=cond_modified, noise=kwargs['x'])[0] + + def conditioning_modifier(model, x, timestep, uncond, cond, cond_scale, model_options, seed): + cond = cond.copy() + for c in cond: + c['pooled_output'] = cond_modified['pooled_output'] + c['cross_attn'] = cond_modified['cross_attn'] + c['model_conds'].update(cond_modified['model_conds']) + return model, x, timestep, uncond, cond, cond_scale, model_options, seed + + unet.add_conditioning_modifier(conditioning_modifier) + process.sd_model.forge_objects.unet = unet + return + + +add_supported_control_model(PhotomakerPatcher) diff --git a/extensions-builtin/sd_forge_sag/scripts/forge_sag.py b/extensions-builtin/sd_forge_sag/scripts/forge_sag.py new file mode 100644 index 0000000000000000000000000000000000000000..764fe4635ce1cdd029ad114cd45348ba3e38f9d4 --- /dev/null +++ b/extensions-builtin/sd_forge_sag/scripts/forge_sag.py @@ -0,0 +1,45 @@ +import gradio as gr + +from modules import scripts +from ldm_patched.contrib.external_sag import SelfAttentionGuidance + + +opSelfAttentionGuidance = SelfAttentionGuidance() + + +class SAGForForge(scripts.Script): + sorting_priority = 12.5 + + def title(self): + return "SelfAttentionGuidance Integrated" + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + enabled = gr.Checkbox(label='Enabled', value=False) + scale = gr.Slider(label='Scale', minimum=-2.0, maximum=5.0, step=0.01, value=0.5) + blur_sigma = gr.Slider(label='Blur Sigma', minimum=0.0, maximum=10.0, step=0.01, value=2.0) + + return enabled, scale, blur_sigma + + def process_before_every_sampling(self, p, *script_args, **kwargs): + enabled, scale, blur_sigma = script_args + + if not enabled: + return + + unet = p.sd_model.forge_objects.unet + + unet = opSelfAttentionGuidance.patch(unet, scale, blur_sigma)[0] + + p.sd_model.forge_objects.unet = unet + + p.extra_generation_params.update(dict( + sag_enabled=enabled, + sag_scale=scale, + sag_blur_sigma=blur_sigma + )) + + return diff --git a/extensions-builtin/sd_forge_stylealign/scripts/forge_stylealign.py b/extensions-builtin/sd_forge_stylealign/scripts/forge_stylealign.py new file mode 100644 index 0000000000000000000000000000000000000000..541e2be501918c9d4e2dac15833e3f9efd1ab22f --- /dev/null +++ b/extensions-builtin/sd_forge_stylealign/scripts/forge_stylealign.py @@ -0,0 +1,80 @@ +import torch +import gradio as gr + +from modules import scripts +import ldm_patched.ldm.modules.attention as attention + + +def sdp(q, k, v, transformer_options): + return attention.optimized_attention(q, k, v, heads=transformer_options["n_heads"], mask=None) + + +class StyleAlignForForge(scripts.Script): + sorting_priority = 17 + + def title(self): + return "StyleAlign Integrated" + + def show(self, is_img2img): + # make this extension visible in both txt2img and img2img tab. + return scripts.AlwaysVisible + + def ui(self, *args, **kwargs): + with gr.Accordion(open=False, label=self.title()): + shared_attention = gr.Checkbox(label='Share attention in batch', value=False) + + return [shared_attention] + + def process_before_every_sampling(self, p, *script_args, **kwargs): + # This will be called before every sampling. + # If you use highres fix, this will be called twice. + + shared_attention = script_args[0] + + if not shared_attention: + return + + unet = p.sd_model.forge_objects.unet.clone() + + def join(x): + b, f, c = x.shape + return x.reshape(1, b * f, c) + + def aligned_attention(q, k, v, transformer_options): + b, f, c = q.shape + o = sdp(join(q), join(k), join(v), transformer_options) + b2, f2, c2 = o.shape + o = o.reshape(b, b2 * f2 // b, c2) + return o + + def attn1_proc(q, k, v, transformer_options): + cond_indices = transformer_options['cond_indices'] + uncond_indices = transformer_options['uncond_indices'] + cond_or_uncond = transformer_options['cond_or_uncond'] + results = [] + + for cx in cond_or_uncond: + if cx == 0: + indices = cond_indices + else: + indices = uncond_indices + + if len(indices) > 0: + bq, bk, bv = q[indices], k[indices], v[indices] + bo = aligned_attention(bq, bk, bv, transformer_options) + results.append(bo) + + results = torch.cat(results, dim=0) + return results + + unet.set_model_replace_all(attn1_proc, 'attn1') + + p.sd_model.forge_objects.unet = unet + + # Below codes will add some logs to the texts below the image outputs on UI. + # The extra_generation_params does not influence results. + p.extra_generation_params.update(dict( + stylealign_enabled=shared_attention, + )) + + return diff --git a/extensions-builtin/sd_forge_svd/scripts/forge_svd.py b/extensions-builtin/sd_forge_svd/scripts/forge_svd.py new file mode 100644 index 0000000000000000000000000000000000000000..73486a6a77a5ff8ea5b6bf8734edcc035977ff8f --- /dev/null +++ b/extensions-builtin/sd_forge_svd/scripts/forge_svd.py @@ -0,0 +1,121 @@ +import torch +import gradio as gr +import os +import pathlib + +import modules.infotext_utils as parameters_copypaste +from modules import script_callbacks +from modules.paths import models_path +from modules.ui_common import ToolButton, refresh_symbol +from modules.ui_components import ResizeHandleRow +from modules import shared + +from modules_forge.forge_util import numpy_to_pytorch, pytorch_to_numpy, write_images_to_mp4 +from ldm_patched.modules.sd import load_checkpoint_guess_config +from ldm_patched.contrib.external_video_model import VideoLinearCFGGuidance, SVD_img2vid_Conditioning +from ldm_patched.contrib.external import KSampler, VAEDecode + + +opVideoLinearCFGGuidance = VideoLinearCFGGuidance() +opSVD_img2vid_Conditioning = SVD_img2vid_Conditioning() +opKSampler = KSampler() +opVAEDecode = VAEDecode() + +svd_root = os.path.join(models_path, 'svd') +os.makedirs(svd_root, exist_ok=True) +svd_filenames = [] + + +def update_svd_filenames(): + global svd_filenames + svd_filenames = [ + pathlib.Path(x).name for x in + shared.walk_files(svd_root, allowed_extensions=[".pt", ".ckpt", ".safetensors"]) + ] + return svd_filenames + + +@torch.inference_mode() +@torch.no_grad() +def predict(filename, width, height, video_frames, motion_bucket_id, fps, augmentation_level, + sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, + sampling_denoise, guidance_min_cfg, input_image): + filename = os.path.join(svd_root, filename) + model_raw, _, vae, clip_vision = \ + load_checkpoint_guess_config(filename, output_vae=True, output_clip=False, output_clipvision=True) + model = opVideoLinearCFGGuidance.patch(model_raw, guidance_min_cfg)[0] + init_image = numpy_to_pytorch(input_image) + positive, negative, latent_image = opSVD_img2vid_Conditioning.encode( + clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level) + output_latent = opKSampler.sample(model, sampling_seed, sampling_steps, sampling_cfg, + sampling_sampler_name, sampling_scheduler, positive, + negative, latent_image, sampling_denoise)[0] + output_pixels = opVAEDecode.decode(vae, output_latent)[0] + outputs = pytorch_to_numpy(output_pixels) + + video_filename = write_images_to_mp4(outputs, fps=fps) + + return outputs, video_filename + + +def on_ui_tabs(): + with gr.Blocks() as svd_block: + with ResizeHandleRow(): + with gr.Column(): + input_image = gr.Image(label='Input Image', source='upload', type='numpy', height=400) + + with gr.Row(): + filename = gr.Dropdown(label="SVD Checkpoint Filename", + choices=svd_filenames, + value=svd_filenames[0] if len(svd_filenames) > 0 else None) + refresh_button = ToolButton(value=refresh_symbol, tooltip="Refresh") + refresh_button.click( + fn=lambda: gr.update(choices=update_svd_filenames()), + inputs=[], outputs=filename) + + width = gr.Slider(label='Width', minimum=16, maximum=8192, step=8, value=1024) + height = gr.Slider(label='Height', minimum=16, maximum=8192, step=8, value=576) + video_frames = gr.Slider(label='Video Frames', minimum=1, maximum=4096, step=1, value=14) + motion_bucket_id = gr.Slider(label='Motion Bucket Id', minimum=1, maximum=1023, step=1, value=127) + fps = gr.Slider(label='Fps', minimum=1, maximum=1024, step=1, value=6) + augmentation_level = gr.Slider(label='Augmentation Level', minimum=0.0, maximum=10.0, step=0.01, + value=0.0) + sampling_steps = gr.Slider(label='Sampling Steps', minimum=1, maximum=200, step=1, value=20) + sampling_cfg = gr.Slider(label='CFG Scale', minimum=0.0, maximum=50.0, step=0.1, value=2.5) + sampling_denoise = gr.Slider(label='Sampling Denoise', minimum=0.0, maximum=1.0, step=0.01, value=1.0) + guidance_min_cfg = gr.Slider(label='Guidance Min Cfg', minimum=0.0, maximum=100.0, step=0.5, value=1.0) + sampling_sampler_name = gr.Radio(label='Sampler Name', + choices=['euler', 'euler_ancestral', 'heun', 'heunpp2', 'dpm_2', + 'dpm_2_ancestral', 'lms', 'dpm_fast', 'dpm_adaptive', + 'dpmpp_2s_ancestral', 'dpmpp_sde', 'dpmpp_sde_gpu', + 'dpmpp_2m', 'dpmpp_2m_sde', 'dpmpp_2m_sde_gpu', + 'dpmpp_3m_sde', 'dpmpp_3m_sde_gpu', 'ddpm', 'lcm', 'ddim', + 'uni_pc', 'uni_pc_bh2'], value='euler') + sampling_scheduler = gr.Radio(label='Scheduler', + choices=['normal', 'karras', 'exponential', 'sgm_uniform', 'simple', + 'ddim_uniform'], value='karras') + sampling_seed = gr.Number(label='Seed', value=12345, precision=0) + + generate_button = gr.Button(value="Generate") + + ctrls = [filename, width, height, video_frames, motion_bucket_id, fps, augmentation_level, + sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, + sampling_denoise, guidance_min_cfg, input_image] + + with gr.Column(): + output_video = gr.Video(autoplay=True) + output_gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', + visible=True, height=1024, columns=4) + + generate_button.click(predict, inputs=ctrls, outputs=[output_gallery, output_video]) + PasteField = parameters_copypaste.PasteField + paste_fields = [ + PasteField(width, "Size-1", api="width"), + PasteField(height, "Size-2", api="height"), + ] + parameters_copypaste.add_paste_fields("svd", init_img=input_image, fields=paste_fields) + return [(svd_block, "SVD", "svd")] + + +update_svd_filenames() +script_callbacks.on_ui_tabs(on_ui_tabs) diff --git a/extensions-builtin/sd_forge_z123/scripts/forge_z123.py b/extensions-builtin/sd_forge_z123/scripts/forge_z123.py new file mode 100644 index 0000000000000000000000000000000000000000..4ed320076ee752ce6aab3932a3df8de761fd1231 --- /dev/null +++ b/extensions-builtin/sd_forge_z123/scripts/forge_z123.py @@ -0,0 +1,101 @@ +import torch +import gradio as gr +import os +import pathlib + +from modules import script_callbacks +from modules.paths import models_path +from modules.ui_common import ToolButton, refresh_symbol +from modules.ui_components import ResizeHandleRow +from modules import shared + +from modules_forge.forge_util import numpy_to_pytorch, pytorch_to_numpy +from ldm_patched.modules.sd import load_checkpoint_guess_config +from ldm_patched.contrib.external_stable3d import StableZero123_Conditioning +from ldm_patched.contrib.external import KSampler, VAEDecode + + +opStableZero123_Conditioning = StableZero123_Conditioning() +opKSampler = KSampler() +opVAEDecode = VAEDecode() + +model_root = os.path.join(models_path, 'z123') +os.makedirs(model_root, exist_ok=True) +model_filenames = [] + + +def update_model_filenames(): + global model_filenames + model_filenames = [ + pathlib.Path(x).name for x in + shared.walk_files(model_root, allowed_extensions=[".pt", ".ckpt", ".safetensors"]) + ] + return model_filenames + + +@torch.inference_mode() +@torch.no_grad() +def predict(filename, width, height, batch_size, elevation, azimuth, + sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, sampling_denoise, input_image): + filename = os.path.join(model_root, filename) + model, _, vae, clip_vision = \ + load_checkpoint_guess_config(filename, output_vae=True, output_clip=False, output_clipvision=True) + init_image = numpy_to_pytorch(input_image) + positive, negative, latent_image = opStableZero123_Conditioning.encode( + clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth) + output_latent = opKSampler.sample(model, sampling_seed, sampling_steps, sampling_cfg, + sampling_sampler_name, sampling_scheduler, positive, + negative, latent_image, sampling_denoise)[0] + output_pixels = opVAEDecode.decode(vae, output_latent)[0] + outputs = pytorch_to_numpy(output_pixels) + return outputs + + +def on_ui_tabs(): + with gr.Blocks() as model_block: + with ResizeHandleRow(): + with gr.Column(): + input_image = gr.Image(label='Input Image', source='upload', type='numpy', height=400) + + with gr.Row(): + filename = gr.Dropdown(label="Zero123 Checkpoint Filename", + choices=model_filenames, + value=model_filenames[0] if len(model_filenames) > 0 else None) + refresh_button = ToolButton(value=refresh_symbol, tooltip="Refresh") + refresh_button.click( + fn=lambda: gr.update(choices=update_model_filenames), + inputs=[], outputs=filename) + + width = gr.Slider(label='Width', minimum=16, maximum=8192, step=8, value=256) + height = gr.Slider(label='Height', minimum=16, maximum=8192, step=8, value=256) + batch_size = gr.Slider(label='Batch Size', minimum=1, maximum=4096, step=1, value=4) + elevation = gr.Slider(label='Elevation', minimum=-180.0, maximum=180.0, step=0.001, value=10.0) + azimuth = gr.Slider(label='Azimuth', minimum=-180.0, maximum=180.0, step=0.001, value=142.0) + sampling_denoise = gr.Slider(label='Sampling Denoise', minimum=0.0, maximum=1.0, step=0.01, value=1.0) + sampling_steps = gr.Slider(label='Sampling Steps', minimum=1, maximum=10000, step=1, value=20) + sampling_cfg = gr.Slider(label='CFG Scale', minimum=0.0, maximum=100.0, step=0.1, value=5.0) + sampling_sampler_name = gr.Radio(label='Sampler Name', + choices=['euler', 'euler_ancestral', 'heun', 'heunpp2', 'dpm_2', + 'dpm_2_ancestral', 'lms', 'dpm_fast', 'dpm_adaptive', + 'dpmpp_2s_ancestral', 'dpmpp_sde', 'dpmpp_sde_gpu', + 'dpmpp_2m', 'dpmpp_2m_sde', 'dpmpp_2m_sde_gpu', + 'dpmpp_3m_sde', 'dpmpp_3m_sde_gpu', 'ddpm', 'lcm', 'ddim', + 'uni_pc', 'uni_pc_bh2'], value='euler') + sampling_scheduler = gr.Radio(label='Sampling Scheduler', + choices=['normal', 'karras', 'exponential', 'sgm_uniform', 'simple', + 'ddim_uniform'], value='sgm_uniform') + sampling_seed = gr.Number(label='Seed', value=12345, precision=0) + generate_button = gr.Button(value="Generate") + + ctrls = [filename, width, height, batch_size, elevation, azimuth, sampling_seed, sampling_steps, sampling_cfg, sampling_sampler_name, sampling_scheduler, sampling_denoise, input_image] + + with gr.Column(): + output_gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', + visible=True, height=1024, columns=4) + + generate_button.click(predict, inputs=ctrls, outputs=[output_gallery]) + return [(model_block, "Z123", "z123")] + + +update_model_filenames() +script_callbacks.on_ui_tabs(on_ui_tabs) diff --git a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py new file mode 100644 index 0000000000000000000000000000000000000000..d90243442515bb34f482213526d3fc8250b390ae --- /dev/null +++ b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py @@ -0,0 +1,747 @@ +import numpy as np +import gradio as gr +import math +from modules.ui_components import InputAccordion +import modules.scripts as scripts + + +class SoftInpaintingSettings: + def __init__(self, + mask_blend_power, + mask_blend_scale, + inpaint_detail_preservation, + composite_mask_influence, + composite_difference_threshold, + composite_difference_contrast): + self.mask_blend_power = mask_blend_power + self.mask_blend_scale = mask_blend_scale + self.inpaint_detail_preservation = inpaint_detail_preservation + self.composite_mask_influence = composite_mask_influence + self.composite_difference_threshold = composite_difference_threshold + self.composite_difference_contrast = composite_difference_contrast + + def add_generation_params(self, dest): + dest[enabled_gen_param_label] = True + dest[gen_param_labels.mask_blend_power] = self.mask_blend_power + dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale + dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation + dest[gen_param_labels.composite_mask_influence] = self.composite_mask_influence + dest[gen_param_labels.composite_difference_threshold] = self.composite_difference_threshold + dest[gen_param_labels.composite_difference_contrast] = self.composite_difference_contrast + + +# ------------------- Methods ------------------- + +def processing_uses_inpainting(p): + # TODO: Figure out a better way to determine if inpainting is being used by p + if getattr(p, "image_mask", None) is not None: + return True + + if getattr(p, "mask", None) is not None: + return True + + if getattr(p, "nmask", None) is not None: + return True + + return False + + +def latent_blend(settings, a, b, t): + """ + Interpolates two latent image representations according to the parameter t, + where the interpolated vectors' magnitudes are also interpolated separately. + The "detail_preservation" factor biases the magnitude interpolation towards + the larger of the two magnitudes. + """ + import torch + + # NOTE: We use inplace operations wherever possible. + + # [4][w][h] to [1][4][w][h] + t2 = t.unsqueeze(0) + # [4][w][h] to [1][1][w][h] - the [4] seem redundant. + t3 = t[0].unsqueeze(0).unsqueeze(0) + + one_minus_t2 = 1 - t2 + one_minus_t3 = 1 - t3 + + # Linearly interpolate the image vectors. + a_scaled = a * one_minus_t2 + b_scaled = b * t2 + image_interp = a_scaled + image_interp.add_(b_scaled) + result_type = image_interp.dtype + del a_scaled, b_scaled, t2, one_minus_t2 + + # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) + # 64-bit operations are used here to allow large exponents. + current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001) + + # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). + a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_( + settings.inpaint_detail_preservation) * one_minus_t3 + b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_( + settings.inpaint_detail_preservation) * t3 + desired_magnitude = a_magnitude + desired_magnitude.add_(b_magnitude).pow_(1 / settings.inpaint_detail_preservation) + del a_magnitude, b_magnitude, t3, one_minus_t3 + + # Change the linearly interpolated image vectors' magnitudes to the value we want. + # This is the last 64-bit operation. + image_interp_scaling_factor = desired_magnitude + image_interp_scaling_factor.div_(current_magnitude) + image_interp_scaling_factor = image_interp_scaling_factor.to(result_type) + image_interp_scaled = image_interp + image_interp_scaled.mul_(image_interp_scaling_factor) + del current_magnitude + del desired_magnitude + del image_interp + del image_interp_scaling_factor + del result_type + + return image_interp_scaled + + +def get_modified_nmask(settings, nmask, sigma): + """ + Converts a negative mask representing the transparency of the original latent vectors being overlayed + to a mask that is scaled according to the denoising strength for this step. + + Where: + 0 = fully opaque, infinite density, fully masked + 1 = fully transparent, zero density, fully unmasked + + We bring this transparency to a power, as this allows one to simulate N number of blending operations + where N can be any positive real value. Using this one can control the balance of influence between + the denoiser and the original latents according to the sigma value. + + NOTE: "mask" is not used + """ + import torch + return torch.pow(nmask, (sigma ** settings.mask_blend_power) * settings.mask_blend_scale) + + +def apply_adaptive_masks( + settings: SoftInpaintingSettings, + nmask, + latent_orig, + latent_processed, + overlay_images, + width, height, + paste_to): + import torch + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control. + latent_mask = nmask[0].float() + # convert the original mask into a form we use to scale distances for thresholding + mask_scalar = 1 - (torch.clamp(latent_mask, min=0, max=1) ** (settings.mask_blend_scale / 2)) + mask_scalar = (0.5 * (1 - settings.composite_mask_influence) + + mask_scalar * settings.composite_mask_influence) + mask_scalar = mask_scalar / (1.00001 - mask_scalar) + mask_scalar = mask_scalar.cpu().numpy() + + latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1) + + kernel, kernel_center = get_gaussian_kernel(stddev_radius=1.5, max_radius=2) + + masks_for_overlay = [] + + for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)): + converted_mask = distance_map.float().cpu().numpy() + converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.9, percentile_max=1, min_width=1) + converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center, + percentile_min=0.25, percentile_max=0.75, min_width=1) + + # The distance at which opacity of original decreases to 50% + half_weighted_distance = settings.composite_difference_threshold * mask_scalar + converted_mask = converted_mask / half_weighted_distance + + converted_mask = 1 / (1 + converted_mask ** settings.composite_difference_contrast) + converted_mask = smootherstep(converted_mask) + converted_mask = 1 - converted_mask + converted_mask = 255. * converted_mask + converted_mask = converted_mask.astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc.uncrop(converted_mask, + (overlay_image.width, overlay_image.height), + paste_to) + + masks_for_overlay.append(converted_mask) + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + + return masks_for_overlay + + +def apply_masks( + settings, + nmask, + overlay_images, + width, height, + paste_to): + import torch + import modules.processing as proc + import modules.images as images + from PIL import Image, ImageOps, ImageFilter + + converted_mask = nmask[0].float() + converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(settings.mask_blend_scale / 2) + converted_mask = 255. * converted_mask + converted_mask = converted_mask.cpu().numpy().astype(np.uint8) + converted_mask = Image.fromarray(converted_mask) + converted_mask = images.resize_image(2, converted_mask, width, height) + converted_mask = proc.create_binary_mask(converted_mask, round=False) + + # Remove aliasing artifacts using a gaussian blur. + converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4)) + + # Expand the mask to fit the whole image if needed. + if paste_to is not None: + converted_mask = proc.uncrop(converted_mask, + (width, height), + paste_to) + + masks_for_overlay = [] + + for i, overlay_image in enumerate(overlay_images): + masks_for_overlay[i] = converted_mask + + image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height)) + image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"), + mask=ImageOps.invert(converted_mask.convert('L'))) + + overlay_images[i] = image_masked.convert('RGBA') + + return masks_for_overlay + + +def weighted_histogram_filter(img, kernel, kernel_center, percentile_min=0.0, percentile_max=1.0, min_width=1.0): + """ + Generalization convolution filter capable of applying + weighted mean, median, maximum, and minimum filters + parametrically using an arbitrary kernel. + + Args: + img (nparray): + The image, a 2-D array of floats, to which the filter is being applied. + kernel (nparray): + The kernel, a 2-D array of floats. + kernel_center (nparray): + The kernel center coordinate, a 1-D array with two elements. + percentile_min (float): + The lower bound of the histogram window used by the filter, + from 0 to 1. + percentile_max (float): + The upper bound of the histogram window used by the filter, + from 0 to 1. + min_width (float): + The minimum size of the histogram window bounds, in weight units. + Must be greater than 0. + + Returns: + (nparray): A filtered copy of the input image "img", a 2-D array of floats. + """ + + # Converts an index tuple into a vector. + def vec(x): + return np.array(x) + + kernel_min = -kernel_center + kernel_max = vec(kernel.shape) - kernel_center + + def weighted_histogram_filter_single(idx): + idx = vec(idx) + min_index = np.maximum(0, idx + kernel_min) + max_index = np.minimum(vec(img.shape), idx + kernel_max) + window_shape = max_index - min_index + + class WeightedElement: + """ + An element of the histogram, its weight + and bounds. + """ + + def __init__(self, value, weight): + self.value: float = value + self.weight: float = weight + self.window_min: float = 0.0 + self.window_max: float = 1.0 + + # Collect the values in the image as WeightedElements, + # weighted by their corresponding kernel values. + values = [] + for window_tup in np.ndindex(tuple(window_shape)): + window_index = vec(window_tup) + image_index = window_index + min_index + centered_kernel_index = image_index - idx + kernel_index = centered_kernel_index + kernel_center + element = WeightedElement(img[tuple(image_index)], kernel[tuple(kernel_index)]) + values.append(element) + + def sort_key(x: WeightedElement): + return x.value + + values.sort(key=sort_key) + + # Calculate the height of the stack (sum) + # and each sample's range they occupy in the stack + sum = 0 + for i in range(len(values)): + values[i].window_min = sum + sum += values[i].weight + values[i].window_max = sum + + # Calculate what range of this stack ("window") + # we want to get the weighted average across. + window_min = sum * percentile_min + window_max = sum * percentile_max + window_width = window_max - window_min + + # Ensure the window is within the stack and at least a certain size. + if window_width < min_width: + window_center = (window_min + window_max) / 2 + window_min = window_center - min_width / 2 + window_max = window_center + min_width / 2 + + if window_max > sum: + window_max = sum + window_min = sum - min_width + + if window_min < 0: + window_min = 0 + window_max = min_width + + value = 0 + value_weight = 0 + + # Get the weighted average of all the samples + # that overlap with the window, weighted + # by the size of their overlap. + for i in range(len(values)): + if window_min >= values[i].window_max: + continue + if window_max <= values[i].window_min: + break + + s = max(window_min, values[i].window_min) + e = min(window_max, values[i].window_max) + w = e - s + + value += values[i].value * w + value_weight += w + + return value / value_weight if value_weight != 0 else 0 + + img_out = img.copy() + + # Apply the kernel operation over each pixel. + for index in np.ndindex(img.shape): + img_out[index] = weighted_histogram_filter_single(index) + + return img_out + + +def smoothstep(x): + """ + The smoothstep function, input should be clamped to 0-1 range. + Turns a diagonal line (f(x) = x) into a sigmoid-like curve. + """ + return x * x * (3 - 2 * x) + + +def smootherstep(x): + """ + The smootherstep function, input should be clamped to 0-1 range. + Turns a diagonal line (f(x) = x) into a sigmoid-like curve. + """ + return x * x * x * (x * (6 * x - 15) + 10) + + +def get_gaussian_kernel(stddev_radius=1.0, max_radius=2): + """ + Creates a Gaussian kernel with thresholded edges. + + Args: + stddev_radius (float): + Standard deviation of the gaussian kernel, in pixels. + max_radius (int): + The size of the filter kernel. The number of pixels is (max_radius*2+1) ** 2. + The kernel is thresholded so that any values one pixel beyond this radius + is weighted at 0. + + Returns: + (nparray, nparray): A kernel array (shape: (N, N)), its center coordinate (shape: (2)) + """ + + # Evaluates a 0-1 normalized gaussian function for a given square distance from the mean. + def gaussian(sqr_mag): + return math.exp(-sqr_mag / (stddev_radius * stddev_radius)) + + # Helper function for converting a tuple to an array. + def vec(x): + return np.array(x) + + """ + Since a gaussian is unbounded, we need to limit ourselves + to a finite range. + We taper the ends off at the end of that range so they equal zero + while preserving the maximum value of 1 at the mean. + """ + zero_radius = max_radius + 1.0 + gauss_zero = gaussian(zero_radius * zero_radius) + gauss_kernel_scale = 1 / (1 - gauss_zero) + + def gaussian_kernel_func(coordinate): + x = coordinate[0] ** 2.0 + coordinate[1] ** 2.0 + x = gaussian(x) + x -= gauss_zero + x *= gauss_kernel_scale + x = max(0.0, x) + return x + + size = max_radius * 2 + 1 + kernel_center = max_radius + kernel = np.zeros((size, size)) + + for index in np.ndindex(kernel.shape): + kernel[index] = gaussian_kernel_func(vec(index) - kernel_center) + + return kernel, kernel_center + + +# ------------------- Constants ------------------- + + +default = SoftInpaintingSettings(1, 0.5, 4, 0, 0.5, 2) + +enabled_ui_label = "Soft inpainting" +enabled_gen_param_label = "Soft inpainting enabled" +enabled_el_id = "soft_inpainting_enabled" + +ui_labels = SoftInpaintingSettings( + "Schedule bias", + "Preservation strength", + "Transition contrast boost", + "Mask influence", + "Difference threshold", + "Difference contrast") + +ui_info = SoftInpaintingSettings( + "Shifts when preservation of original content occurs during denoising.", + "How strongly partially masked content should be preserved.", + "Amplifies the contrast that may be lost in partially masked regions.", + "How strongly the original mask should bias the difference threshold.", + "How much an image region can change before the original pixels are not blended in anymore.", + "How sharp the transition should be between blended and not blended.") + +gen_param_labels = SoftInpaintingSettings( + "Soft inpainting schedule bias", + "Soft inpainting preservation strength", + "Soft inpainting transition contrast boost", + "Soft inpainting mask influence", + "Soft inpainting difference threshold", + "Soft inpainting difference contrast") + +el_ids = SoftInpaintingSettings( + "mask_blend_power", + "mask_blend_scale", + "inpaint_detail_preservation", + "composite_mask_influence", + "composite_difference_threshold", + "composite_difference_contrast") + + +# ------------------- Script ------------------- + + +class Script(scripts.Script): + def __init__(self): + self.section = "inpaint" + self.masks_for_overlay = None + self.overlay_images = None + + def title(self): + return "Soft Inpainting" + + def show(self, is_img2img): + return scripts.AlwaysVisible if is_img2img else False + + def ui(self, is_img2img): + if not is_img2img: + return + + with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled: + with gr.Group(): + gr.Markdown( + """ + Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity. + **High _Mask blur_** values are recommended! + """) + + power = \ + gr.Slider(label=ui_labels.mask_blend_power, + info=ui_info.mask_blend_power, + minimum=0, + maximum=8, + step=0.1, + value=default.mask_blend_power, + elem_id=el_ids.mask_blend_power) + scale = \ + gr.Slider(label=ui_labels.mask_blend_scale, + info=ui_info.mask_blend_scale, + minimum=0, + maximum=8, + step=0.05, + value=default.mask_blend_scale, + elem_id=el_ids.mask_blend_scale) + detail = \ + gr.Slider(label=ui_labels.inpaint_detail_preservation, + info=ui_info.inpaint_detail_preservation, + minimum=1, + maximum=32, + step=0.5, + value=default.inpaint_detail_preservation, + elem_id=el_ids.inpaint_detail_preservation) + + gr.Markdown( + """ + ### Pixel Composite Settings + """) + + mask_inf = \ + gr.Slider(label=ui_labels.composite_mask_influence, + info=ui_info.composite_mask_influence, + minimum=0, + maximum=1, + step=0.05, + value=default.composite_mask_influence, + elem_id=el_ids.composite_mask_influence) + + dif_thresh = \ + gr.Slider(label=ui_labels.composite_difference_threshold, + info=ui_info.composite_difference_threshold, + minimum=0, + maximum=8, + step=0.25, + value=default.composite_difference_threshold, + elem_id=el_ids.composite_difference_threshold) + + dif_contr = \ + gr.Slider(label=ui_labels.composite_difference_contrast, + info=ui_info.composite_difference_contrast, + minimum=0, + maximum=8, + step=0.25, + value=default.composite_difference_contrast, + elem_id=el_ids.composite_difference_contrast) + + with gr.Accordion("Help", open=False): + gr.Markdown( + f""" + ### {ui_labels.mask_blend_power} + + The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas). + This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step. + This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation. + + - **Below 1**: Stronger preservation near the end (with low sigma) + - **1**: Balanced (proportional to sigma) + - **Above 1**: Stronger preservation in the beginning (with high sigma) + """) + gr.Markdown( + f""" + ### {ui_labels.mask_blend_scale} + + Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content. + This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength. + + - **Low values**: Favors generated content. + - **High values**: Favors original content. + """) + gr.Markdown( + f""" + ### {ui_labels.inpaint_detail_preservation} + + This parameter controls how the original latent vectors and denoised latent vectors are interpolated. + With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors. + This can prevent the loss of contrast that occurs with linear interpolation. + + - **Low values**: Softer blending, details may fade. + - **High values**: Stronger contrast, may over-saturate colors. + """) + + gr.Markdown( + """ + ## Pixel Composite Settings + + Masks are generated based on how much a part of the image changed after denoising. + These masks are used to blend the original and final images together. + If the difference is low, the original pixels are used instead of the pixels returned by the inpainting process. + """) + + gr.Markdown( + f""" + ### {ui_labels.composite_mask_influence} + + This parameter controls how much the mask should bias this sensitivity to difference. + + - **0**: Ignore the mask, only consider differences in image content. + - **1**: Follow the mask closely despite image content changes. + """) + + gr.Markdown( + f""" + ### {ui_labels.composite_difference_threshold} + + This value represents the difference at which the original pixels will have less than 50% opacity. + + - **Low values**: Two images patches must be almost the same in order to retain original pixels. + - **High values**: Two images patches can be very different and still retain original pixels. + """) + + gr.Markdown( + f""" + ### {ui_labels.composite_difference_contrast} + + This value represents the contrast between the opacity of the original and inpainted content. + + - **Low values**: The blend will be more gradual and have longer transitions, but may cause ghosting. + - **High values**: Ghosting will be less common, but transitions may be very sudden. + """) + + self.infotext_fields = [(soft_inpainting_enabled, enabled_gen_param_label), + (power, gen_param_labels.mask_blend_power), + (scale, gen_param_labels.mask_blend_scale), + (detail, gen_param_labels.inpaint_detail_preservation), + (mask_inf, gen_param_labels.composite_mask_influence), + (dif_thresh, gen_param_labels.composite_difference_threshold), + (dif_contr, gen_param_labels.composite_difference_contrast)] + + self.paste_field_names = [] + for _, field_name in self.infotext_fields: + self.paste_field_names.append(field_name) + + return [soft_inpainting_enabled, + power, + scale, + detail, + mask_inf, + dif_thresh, + dif_contr] + + def process(self, p, enabled, power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr): + if not enabled: + return + + if not processing_uses_inpainting(p): + return + + # Shut off the rounding it normally does. + p.mask_round = False + + settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr) + + # p.extra_generation_params["Mask rounding"] = False + settings.add_generation_params(p.extra_generation_params) + + def on_mask_blend(self, p, mba: scripts.MaskBlendArgs, enabled, power, scale, detail_preservation, mask_inf, + dif_thresh, dif_contr): + if not enabled: + return + + if not processing_uses_inpainting(p): + return + + if mba.is_final_blend: + mba.blended_latent = mba.current_latent + return + + settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr) + + # todo: Why is sigma 2D? Both values are the same. + mba.blended_latent = latent_blend(settings, + mba.init_latent, + mba.current_latent, + get_modified_nmask(settings, mba.nmask, mba.sigma[0])) + + def post_sample(self, p, ps: scripts.PostSampleArgs, enabled, power, scale, detail_preservation, mask_inf, + dif_thresh, dif_contr): + if not enabled: + return + + if not processing_uses_inpainting(p): + return + + nmask = getattr(p, "nmask", None) + if nmask is None: + return + + from modules import images + from modules.shared import opts + + settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr) + + # since the original code puts holes in the existing overlay images, + # we have to rebuild them. + self.overlay_images = [] + for img in p.init_images: + + image = images.flatten(img, opts.img2img_background_color) + + if p.paste_to is None and p.resize_mode != 3: + image = images.resize_image(p.resize_mode, image, p.width, p.height) + + self.overlay_images.append(image.convert('RGBA')) + + if len(p.init_images) == 1: + self.overlay_images = self.overlay_images * p.batch_size + + if getattr(ps.samples, 'already_decoded', False): + self.masks_for_overlay = apply_masks(settings=settings, + nmask=nmask, + overlay_images=self.overlay_images, + width=p.width, + height=p.height, + paste_to=p.paste_to) + else: + self.masks_for_overlay = apply_adaptive_masks(settings=settings, + nmask=nmask, + latent_orig=p.init_latent, + latent_processed=ps.samples, + overlay_images=self.overlay_images, + width=p.width, + height=p.height, + paste_to=p.paste_to) + + def postprocess_maskoverlay(self, p, ppmo: scripts.PostProcessMaskOverlayArgs, enabled, power, scale, + detail_preservation, mask_inf, dif_thresh, dif_contr): + if not enabled: + return + + if not processing_uses_inpainting(p): + return + + if self.masks_for_overlay is None: + return + + if self.overlay_images is None: + return + + ppmo.mask_for_overlay = self.masks_for_overlay[ppmo.index] + ppmo.overlay_image = self.overlay_images[ppmo.index] diff --git a/html/card-no-preview.png b/html/card-no-preview.png new file mode 100644 index 0000000000000000000000000000000000000000..e2beb2692067db56ac5f7bd5bfc3d895d9063371 Binary files /dev/null and b/html/card-no-preview.png differ diff --git a/html/extra-networks-card.html b/html/extra-networks-card.html new file mode 100644 index 0000000000000000000000000000000000000000..f1d959a6733b3c714dbadedba46c36329b759700 --- /dev/null +++ b/html/extra-networks-card.html @@ -0,0 +1,9 @@ +
+ {background_image} +
{copy_path_button}{metadata_button}{edit_button}
+
+
{search_terms}
+ {name} + {description} +
+
diff --git a/html/extra-networks-copy-path-button.html b/html/extra-networks-copy-path-button.html new file mode 100644 index 0000000000000000000000000000000000000000..8083bb0335789543eb6f3d2ddb394e05644677ad --- /dev/null +++ b/html/extra-networks-copy-path-button.html @@ -0,0 +1,5 @@ +
+
\ No newline at end of file diff --git a/html/extra-networks-edit-item-button.html b/html/extra-networks-edit-item-button.html new file mode 100644 index 0000000000000000000000000000000000000000..0fe43082ad1f4255af5122bffc4252eaa6ea385a --- /dev/null +++ b/html/extra-networks-edit-item-button.html @@ -0,0 +1,4 @@ +
+
\ No newline at end of file diff --git a/html/extra-networks-metadata-button.html b/html/extra-networks-metadata-button.html new file mode 100644 index 0000000000000000000000000000000000000000..285b5b3b65869cb5ee97e2cff19992bee3e56625 --- /dev/null +++ b/html/extra-networks-metadata-button.html @@ -0,0 +1,4 @@ + \ No newline at end of file diff --git a/html/extra-networks-no-cards.html b/html/extra-networks-no-cards.html new file mode 100644 index 0000000000000000000000000000000000000000..389358d6c4b383fdc3c5686e029e7b3b1ae9a493 --- /dev/null +++ b/html/extra-networks-no-cards.html @@ -0,0 +1,8 @@ +
+

Nothing here. Add some content to the following directories:

+ +
    +{dirs} +
+
+ diff --git a/html/extra-networks-pane.html b/html/extra-networks-pane.html new file mode 100644 index 0000000000000000000000000000000000000000..0c763f710584855760140055998d22940f5451e2 --- /dev/null +++ b/html/extra-networks-pane.html @@ -0,0 +1,55 @@ +
+ +
+
+ {tree_html} +
+
+ {items_html} +
+
+
\ No newline at end of file diff --git a/html/extra-networks-tree-button.html b/html/extra-networks-tree-button.html new file mode 100644 index 0000000000000000000000000000000000000000..9dc2e2a40c8f0f14cb14dbc04f566d9254e659f5 --- /dev/null +++ b/html/extra-networks-tree-button.html @@ -0,0 +1,23 @@ + +
+ + {action_list_item_action_leading} + + + {action_list_item_visual_leading} + + + {action_list_item_label} + + + {action_list_item_visual_trailing} + + + {action_list_item_action_trailing} + +
\ No newline at end of file diff --git a/html/footer.html b/html/footer.html new file mode 100644 index 0000000000000000000000000000000000000000..8739a0f4752fd00b941d888d9a676158a3ba31a2 --- /dev/null +++ b/html/footer.html @@ -0,0 +1,15 @@ +
+ API +  •  + Github +  •  + Gradio +  •  + Startup profile +  •  + Reload UI +
+
+
+{versions} +
diff --git a/html/licenses.html b/html/licenses.html new file mode 100644 index 0000000000000000000000000000000000000000..e14bf3cceed0bacd75431442bfe6825de2cbff71 --- /dev/null +++ b/html/licenses.html @@ -0,0 +1,382 @@ + + +

InvokeAI

+Some code for compatibility with OSX is taken from lstein's repository. +
+MIT License
+
+Copyright (c) 2022 InvokeAI Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+ +

LDSR

+Code added by contirubtors, most likely copied from this repository. +
+MIT License
+
+Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+ +

CLIP Interrogator

+Some small amounts of code borrowed and reworked. +
+MIT License
+
+Copyright (c) 2022 pharmapsychotic
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+ +

Memory Efficient Attention

+The sub-quadratic cross attention optimization uses modified code from the Memory Efficient Attention package that Alex Birch optimized for 3D tensors. This license is updated to reflect that. +
+MIT License
+
+Copyright (c) 2023 Alex Birch
+Copyright (c) 2023 Amin Rezaei
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+ +

Scaled Dot Product Attention

+Some small amounts of code borrowed and reworked. +
+   Copyright 2023 The HuggingFace Team. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ +

Curated transformers

+The MPS workaround for nn.Linear on macOS 13.2.X is based on the MPS workaround for nn.Linear created by danieldk for Curated transformers +
+The MIT License (MIT)
+
+Copyright (C) 2021 ExplosionAI GmbH
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ +

TAESD

+Tiny AutoEncoder for Stable Diffusion option for live previews +
+MIT License
+
+Copyright (c) 2023 Ollin Boer Bohan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/javascript/aspectRatioOverlay.js b/javascript/aspectRatioOverlay.js new file mode 100644 index 0000000000000000000000000000000000000000..2cf2d571fc02a026b6cdedcf589a217ef0d65d27 --- /dev/null +++ b/javascript/aspectRatioOverlay.js @@ -0,0 +1,113 @@ + +let currentWidth = null; +let currentHeight = null; +let arFrameTimeout = setTimeout(function() {}, 0); + +function dimensionChange(e, is_width, is_height) { + + if (is_width) { + currentWidth = e.target.value * 1.0; + } + if (is_height) { + currentHeight = e.target.value * 1.0; + } + + var inImg2img = gradioApp().querySelector("#tab_img2img").style.display == "block"; + + if (!inImg2img) { + return; + } + + var targetElement = null; + + var tabIndex = get_tab_index('mode_img2img'); + if (tabIndex == 0) { // img2img + targetElement = gradioApp().querySelector('#img2img_image div[data-testid=image] img'); + } else if (tabIndex == 1) { //Sketch + targetElement = gradioApp().querySelector('#img2img_sketch div[data-testid=image] img'); + } else if (tabIndex == 2) { // Inpaint + targetElement = gradioApp().querySelector('#img2maskimg div[data-testid=image] img'); + } else if (tabIndex == 3) { // Inpaint sketch + targetElement = gradioApp().querySelector('#inpaint_sketch div[data-testid=image] img'); + } + + + if (targetElement) { + + var arPreviewRect = gradioApp().querySelector('#imageARPreview'); + if (!arPreviewRect) { + arPreviewRect = document.createElement('div'); + arPreviewRect.id = "imageARPreview"; + gradioApp().appendChild(arPreviewRect); + } + + + + var viewportOffset = targetElement.getBoundingClientRect(); + + var viewportscale = Math.min(targetElement.clientWidth / targetElement.naturalWidth, targetElement.clientHeight / targetElement.naturalHeight); + + var scaledx = targetElement.naturalWidth * viewportscale; + var scaledy = targetElement.naturalHeight * viewportscale; + + var cleintRectTop = (viewportOffset.top + window.scrollY); + var cleintRectLeft = (viewportOffset.left + window.scrollX); + var cleintRectCentreY = cleintRectTop + (targetElement.clientHeight / 2); + var cleintRectCentreX = cleintRectLeft + (targetElement.clientWidth / 2); + + var arscale = Math.min(scaledx / currentWidth, scaledy / currentHeight); + var arscaledx = currentWidth * arscale; + var arscaledy = currentHeight * arscale; + + var arRectTop = cleintRectCentreY - (arscaledy / 2); + var arRectLeft = cleintRectCentreX - (arscaledx / 2); + var arRectWidth = arscaledx; + var arRectHeight = arscaledy; + + arPreviewRect.style.top = arRectTop + 'px'; + arPreviewRect.style.left = arRectLeft + 'px'; + arPreviewRect.style.width = arRectWidth + 'px'; + arPreviewRect.style.height = arRectHeight + 'px'; + + clearTimeout(arFrameTimeout); + arFrameTimeout = setTimeout(function() { + arPreviewRect.style.display = 'none'; + }, 2000); + + arPreviewRect.style.display = 'block'; + + } + +} + + +onAfterUiUpdate(function() { + var arPreviewRect = gradioApp().querySelector('#imageARPreview'); + if (arPreviewRect) { + arPreviewRect.style.display = 'none'; + } + var tabImg2img = gradioApp().querySelector("#tab_img2img"); + if (tabImg2img) { + var inImg2img = tabImg2img.style.display == "block"; + if (inImg2img) { + let inputs = gradioApp().querySelectorAll('input'); + inputs.forEach(function(e) { + var is_width = e.parentElement.id == "img2img_width"; + var is_height = e.parentElement.id == "img2img_height"; + + if ((is_width || is_height) && !e.classList.contains('scrollwatch')) { + e.addEventListener('input', function(e) { + dimensionChange(e, is_width, is_height); + }); + e.classList.add('scrollwatch'); + } + if (is_width) { + currentWidth = e.value * 1.0; + } + if (is_height) { + currentHeight = e.value * 1.0; + } + }); + } + } +}); diff --git a/javascript/contextMenus.js b/javascript/contextMenus.js new file mode 100644 index 0000000000000000000000000000000000000000..ccae242f2b6a731e89d8752814aae6b78e143482 --- /dev/null +++ b/javascript/contextMenus.js @@ -0,0 +1,176 @@ + +var contextMenuInit = function() { + let eventListenerApplied = false; + let menuSpecs = new Map(); + + const uid = function() { + return Date.now().toString(36) + Math.random().toString(36).substring(2); + }; + + function showContextMenu(event, element, menuEntries) { + let posx = event.clientX + document.body.scrollLeft + document.documentElement.scrollLeft; + let posy = event.clientY + document.body.scrollTop + document.documentElement.scrollTop; + + let oldMenu = gradioApp().querySelector('#context-menu'); + if (oldMenu) { + oldMenu.remove(); + } + + let baseStyle = window.getComputedStyle(uiCurrentTab); + + const contextMenu = document.createElement('nav'); + contextMenu.id = "context-menu"; + contextMenu.style.background = baseStyle.background; + contextMenu.style.color = baseStyle.color; + contextMenu.style.fontFamily = baseStyle.fontFamily; + contextMenu.style.top = posy + 'px'; + contextMenu.style.left = posx + 'px'; + + + + const contextMenuList = document.createElement('ul'); + contextMenuList.className = 'context-menu-items'; + contextMenu.append(contextMenuList); + + menuEntries.forEach(function(entry) { + let contextMenuEntry = document.createElement('a'); + contextMenuEntry.innerHTML = entry['name']; + contextMenuEntry.addEventListener("click", function() { + entry['func'](); + }); + contextMenuList.append(contextMenuEntry); + + }); + + gradioApp().appendChild(contextMenu); + + let menuWidth = contextMenu.offsetWidth + 4; + let menuHeight = contextMenu.offsetHeight + 4; + + let windowWidth = window.innerWidth; + let windowHeight = window.innerHeight; + + if ((windowWidth - posx) < menuWidth) { + contextMenu.style.left = windowWidth - menuWidth + "px"; + } + + if ((windowHeight - posy) < menuHeight) { + contextMenu.style.top = windowHeight - menuHeight + "px"; + } + + } + + function appendContextMenuOption(targetElementSelector, entryName, entryFunction) { + + var currentItems = menuSpecs.get(targetElementSelector); + + if (!currentItems) { + currentItems = []; + menuSpecs.set(targetElementSelector, currentItems); + } + let newItem = { + id: targetElementSelector + '_' + uid(), + name: entryName, + func: entryFunction, + isNew: true + }; + + currentItems.push(newItem); + return newItem['id']; + } + + function removeContextMenuOption(uid) { + menuSpecs.forEach(function(v) { + let index = -1; + v.forEach(function(e, ei) { + if (e['id'] == uid) { + index = ei; + } + }); + if (index >= 0) { + v.splice(index, 1); + } + }); + } + + function addContextMenuEventListener() { + if (eventListenerApplied) { + return; + } + gradioApp().addEventListener("click", function(e) { + if (!e.isTrusted) { + return; + } + + let oldMenu = gradioApp().querySelector('#context-menu'); + if (oldMenu) { + oldMenu.remove(); + } + }); + gradioApp().addEventListener("contextmenu", function(e) { + let oldMenu = gradioApp().querySelector('#context-menu'); + if (oldMenu) { + oldMenu.remove(); + } + menuSpecs.forEach(function(v, k) { + if (e.composedPath()[0].matches(k)) { + showContextMenu(e, e.composedPath()[0], v); + e.preventDefault(); + } + }); + }); + eventListenerApplied = true; + + } + + return [appendContextMenuOption, removeContextMenuOption, addContextMenuEventListener]; +}; + +var initResponse = contextMenuInit(); +var appendContextMenuOption = initResponse[0]; +var removeContextMenuOption = initResponse[1]; +var addContextMenuEventListener = initResponse[2]; + +(function() { + //Start example Context Menu Items + let generateOnRepeat = function(genbuttonid, interruptbuttonid) { + let genbutton = gradioApp().querySelector(genbuttonid); + let interruptbutton = gradioApp().querySelector(interruptbuttonid); + if (!interruptbutton.offsetParent) { + genbutton.click(); + } + clearInterval(window.generateOnRepeatInterval); + window.generateOnRepeatInterval = setInterval(function() { + if (!interruptbutton.offsetParent) { + genbutton.click(); + } + }, + 500); + }; + + let generateOnRepeat_txt2img = function() { + generateOnRepeat('#txt2img_generate', '#txt2img_interrupt'); + }; + + let generateOnRepeat_img2img = function() { + generateOnRepeat('#img2img_generate', '#img2img_interrupt'); + }; + + appendContextMenuOption('#txt2img_generate', 'Generate forever', generateOnRepeat_txt2img); + appendContextMenuOption('#txt2img_interrupt', 'Generate forever', generateOnRepeat_txt2img); + appendContextMenuOption('#img2img_generate', 'Generate forever', generateOnRepeat_img2img); + appendContextMenuOption('#img2img_interrupt', 'Generate forever', generateOnRepeat_img2img); + + let cancelGenerateForever = function() { + clearInterval(window.generateOnRepeatInterval); + }; + + appendContextMenuOption('#txt2img_interrupt', 'Cancel generate forever', cancelGenerateForever); + appendContextMenuOption('#txt2img_generate', 'Cancel generate forever', cancelGenerateForever); + appendContextMenuOption('#img2img_interrupt', 'Cancel generate forever', cancelGenerateForever); + appendContextMenuOption('#img2img_generate', 'Cancel generate forever', cancelGenerateForever); + +})(); +//End example Context Menu Items + +onAfterUiUpdate(addContextMenuEventListener); diff --git a/javascript/dragdrop.js b/javascript/dragdrop.js new file mode 100644 index 0000000000000000000000000000000000000000..d680daf52f28c8ace0a99706d60e5ea756fb258e --- /dev/null +++ b/javascript/dragdrop.js @@ -0,0 +1,130 @@ +// allows drag-dropping files into gradio image elements, and also pasting images from clipboard + +function isValidImageList(files) { + return files && files?.length === 1 && ['image/png', 'image/gif', 'image/jpeg'].includes(files[0].type); +} + +function dropReplaceImage(imgWrap, files) { + if (!isValidImageList(files)) { + return; + } + + const tmpFile = files[0]; + + imgWrap.querySelector('.modify-upload button + button, .touch-none + div button + button')?.click(); + const callback = () => { + const fileInput = imgWrap.querySelector('input[type="file"]'); + if (fileInput) { + if (files.length === 0) { + files = new DataTransfer(); + files.items.add(tmpFile); + fileInput.files = files.files; + } else { + fileInput.files = files; + } + fileInput.dispatchEvent(new Event('change')); + } + }; + + if (imgWrap.closest('#pnginfo_image')) { + // special treatment for PNG Info tab, wait for fetch request to finish + const oldFetch = window.fetch; + window.fetch = async(input, options) => { + const response = await oldFetch(input, options); + if ('api/predict/' === input) { + const content = await response.text(); + window.fetch = oldFetch; + window.requestAnimationFrame(() => callback()); + return new Response(content, { + status: response.status, + statusText: response.statusText, + headers: response.headers + }); + } + return response; + }; + } else { + window.requestAnimationFrame(() => callback()); + } +} + +function eventHasFiles(e) { + if (!e.dataTransfer || !e.dataTransfer.files) return false; + if (e.dataTransfer.files.length > 0) return true; + if (e.dataTransfer.items.length > 0 && e.dataTransfer.items[0].kind == "file") return true; + + return false; +} + +function dragDropTargetIsPrompt(target) { + if (target?.placeholder && target?.placeholder.indexOf("Prompt") >= 0) return true; + if (target?.parentNode?.parentNode?.className?.indexOf("prompt") > 0) return true; + return false; +} + +window.document.addEventListener('dragover', e => { + const target = e.composedPath()[0]; + if (!eventHasFiles(e)) return; + + var targetImage = target.closest('[data-testid="image"]'); + if (!dragDropTargetIsPrompt(target) && !targetImage) return; + + e.stopPropagation(); + e.preventDefault(); + e.dataTransfer.dropEffect = 'copy'; +}); + +window.document.addEventListener('drop', e => { + const target = e.composedPath()[0]; + if (!eventHasFiles(e)) return; + + if (dragDropTargetIsPrompt(target)) { + e.stopPropagation(); + e.preventDefault(); + + let prompt_target = get_tab_index('tabs') == 1 ? "img2img_prompt_image" : "txt2img_prompt_image"; + + const imgParent = gradioApp().getElementById(prompt_target); + const files = e.dataTransfer.files; + const fileInput = imgParent.querySelector('input[type="file"]'); + if (fileInput) { + fileInput.files = files; + fileInput.dispatchEvent(new Event('change')); + } + } + + var targetImage = target.closest('[data-testid="image"]'); + if (targetImage) { + e.stopPropagation(); + e.preventDefault(); + const files = e.dataTransfer.files; + dropReplaceImage(targetImage, files); + return; + } +}); + +window.addEventListener('paste', e => { + const files = e.clipboardData.files; + if (!isValidImageList(files)) { + return; + } + + const visibleImageFields = [...gradioApp().querySelectorAll('[data-testid="image"]')] + .filter(el => uiElementIsVisible(el)) + .sort((a, b) => uiElementInSight(b) - uiElementInSight(a)); + + + if (!visibleImageFields.length) { + return; + } + + const firstFreeImageField = visibleImageFields + .filter(el => !el.querySelector('img'))?.[0]; + + dropReplaceImage( + firstFreeImageField ? + firstFreeImageField : + visibleImageFields[visibleImageFields.length - 1] + , files + ); +}); diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js new file mode 100644 index 0000000000000000000000000000000000000000..688c2f112d6161877c947d8d17428fac77aa1df6 --- /dev/null +++ b/javascript/edit-attention.js @@ -0,0 +1,148 @@ +function keyupEditAttention(event) { + let target = event.originalTarget || event.composedPath()[0]; + if (!target.matches("*:is([id*='_toprow'] [id*='_prompt'], .prompt) textarea")) return; + if (!(event.metaKey || event.ctrlKey)) return; + + let isPlus = event.key == "ArrowUp"; + let isMinus = event.key == "ArrowDown"; + if (!isPlus && !isMinus) return; + + let selectionStart = target.selectionStart; + let selectionEnd = target.selectionEnd; + let text = target.value; + + function selectCurrentParenthesisBlock(OPEN, CLOSE) { + if (selectionStart !== selectionEnd) return false; + + // Find opening parenthesis around current cursor + const before = text.substring(0, selectionStart); + let beforeParen = before.lastIndexOf(OPEN); + if (beforeParen == -1) return false; + + let beforeClosingParen = before.lastIndexOf(CLOSE); + if (beforeClosingParen != -1 && beforeClosingParen > beforeParen) return false; + + // Find closing parenthesis around current cursor + const after = text.substring(selectionStart); + let afterParen = after.indexOf(CLOSE); + if (afterParen == -1) return false; + + let afterOpeningParen = after.indexOf(OPEN); + if (afterOpeningParen != -1 && afterOpeningParen < afterParen) return false; + + // Set the selection to the text between the parenthesis + const parenContent = text.substring(beforeParen + 1, selectionStart + afterParen); + if (/.*:-?[\d.]+/s.test(parenContent)) { + const lastColon = parenContent.lastIndexOf(":"); + selectionStart = beforeParen + 1; + selectionEnd = selectionStart + lastColon; + } else { + selectionStart = beforeParen + 1; + selectionEnd = selectionStart + parenContent.length; + } + + target.setSelectionRange(selectionStart, selectionEnd); + return true; + } + + function selectCurrentWord() { + if (selectionStart !== selectionEnd) return false; + const whitespace_delimiters = {"Tab": "\t", "Carriage Return": "\r", "Line Feed": "\n"}; + let delimiters = opts.keyedit_delimiters; + + for (let i of opts.keyedit_delimiters_whitespace) { + delimiters += whitespace_delimiters[i]; + } + + // seek backward to find beginning + while (!delimiters.includes(text[selectionStart - 1]) && selectionStart > 0) { + selectionStart--; + } + + // seek forward to find end + while (!delimiters.includes(text[selectionEnd]) && selectionEnd < text.length) { + selectionEnd++; + } + + target.setSelectionRange(selectionStart, selectionEnd); + return true; + } + + // If the user hasn't selected anything, let's select their current parenthesis block or word + if (!selectCurrentParenthesisBlock('<', '>') && !selectCurrentParenthesisBlock('(', ')') && !selectCurrentParenthesisBlock('[', ']')) { + selectCurrentWord(); + } + + event.preventDefault(); + + var closeCharacter = ')'; + var delta = opts.keyedit_precision_attention; + var start = selectionStart > 0 ? text[selectionStart - 1] : ""; + var end = text[selectionEnd]; + + if (start == '<') { + closeCharacter = '>'; + delta = opts.keyedit_precision_extra; + } else if (start == '(' && end == ')' || start == '[' && end == ']') { // convert old-style (((emphasis))) + let numParen = 0; + + while (text[selectionStart - numParen - 1] == start && text[selectionEnd + numParen] == end) { + numParen++; + } + + if (start == "[") { + weight = (1 / 1.1) ** numParen; + } else { + weight = 1.1 ** numParen; + } + + weight = Math.round(weight / opts.keyedit_precision_attention) * opts.keyedit_precision_attention; + + text = text.slice(0, selectionStart - numParen) + "(" + text.slice(selectionStart, selectionEnd) + ":" + weight + ")" + text.slice(selectionEnd + numParen); + selectionStart -= numParen - 1; + selectionEnd -= numParen - 1; + } else if (start != '(') { + // do not include spaces at the end + while (selectionEnd > selectionStart && text[selectionEnd - 1] == ' ') { + selectionEnd--; + } + + if (selectionStart == selectionEnd) { + return; + } + + text = text.slice(0, selectionStart) + "(" + text.slice(selectionStart, selectionEnd) + ":1.0)" + text.slice(selectionEnd); + + selectionStart++; + selectionEnd++; + } + + if (text[selectionEnd] != ':') return; + var weightLength = text.slice(selectionEnd + 1).indexOf(closeCharacter) + 1; + var weight = parseFloat(text.slice(selectionEnd + 1, selectionEnd + weightLength)); + if (isNaN(weight)) return; + + weight += isPlus ? delta : -delta; + weight = parseFloat(weight.toPrecision(12)); + if (Number.isInteger(weight)) weight += ".0"; + + if (closeCharacter == ')' && weight == 1) { + var endParenPos = text.substring(selectionEnd).indexOf(')'); + text = text.slice(0, selectionStart - 1) + text.slice(selectionStart, selectionEnd) + text.slice(selectionEnd + endParenPos + 1); + selectionStart--; + selectionEnd--; + } else { + text = text.slice(0, selectionEnd + 1) + weight + text.slice(selectionEnd + weightLength); + } + + target.focus(); + target.value = text; + target.selectionStart = selectionStart; + target.selectionEnd = selectionEnd; + + updateInput(target); +} + +addEventListener('keydown', (event) => { + keyupEditAttention(event); +}); diff --git a/javascript/edit-order.js b/javascript/edit-order.js new file mode 100644 index 0000000000000000000000000000000000000000..ed4ef9ac399a6d0bd83435958dc4d46837760c6a --- /dev/null +++ b/javascript/edit-order.js @@ -0,0 +1,41 @@ +/* alt+left/right moves text in prompt */ + +function keyupEditOrder(event) { + if (!opts.keyedit_move) return; + + let target = event.originalTarget || event.composedPath()[0]; + if (!target.matches("*:is([id*='_toprow'] [id*='_prompt'], .prompt) textarea")) return; + if (!event.altKey) return; + + let isLeft = event.key == "ArrowLeft"; + let isRight = event.key == "ArrowRight"; + if (!isLeft && !isRight) return; + event.preventDefault(); + + let selectionStart = target.selectionStart; + let selectionEnd = target.selectionEnd; + let text = target.value; + let items = text.split(","); + let indexStart = (text.slice(0, selectionStart).match(/,/g) || []).length; + let indexEnd = (text.slice(0, selectionEnd).match(/,/g) || []).length; + let range = indexEnd - indexStart + 1; + + if (isLeft && indexStart > 0) { + items.splice(indexStart - 1, 0, ...items.splice(indexStart, range)); + target.value = items.join(); + target.selectionStart = items.slice(0, indexStart - 1).join().length + (indexStart == 1 ? 0 : 1); + target.selectionEnd = items.slice(0, indexEnd).join().length; + } else if (isRight && indexEnd < items.length - 1) { + items.splice(indexStart + 1, 0, ...items.splice(indexStart, range)); + target.value = items.join(); + target.selectionStart = items.slice(0, indexStart + 1).join().length + 1; + target.selectionEnd = items.slice(0, indexEnd + 2).join().length; + } + + event.preventDefault(); + updateInput(target); +} + +addEventListener('keydown', (event) => { + keyupEditOrder(event); +}); diff --git a/javascript/extensions.js b/javascript/extensions.js new file mode 100644 index 0000000000000000000000000000000000000000..cc8ee220b170f2df39d5267a1181f856c53d2c83 --- /dev/null +++ b/javascript/extensions.js @@ -0,0 +1,95 @@ + +function extensions_apply(_disabled_list, _update_list, disable_all) { + var disable = []; + var update = []; + const extensions_input = gradioApp().querySelectorAll('#extensions input[type="checkbox"]'); + if (extensions_input.length == 0) { + throw Error("Extensions page not yet loaded."); + } + extensions_input.forEach(function(x) { + if (x.name.startsWith("enable_") && !x.checked) { + disable.push(x.name.substring(7)); + } + + if (x.name.startsWith("update_") && x.checked) { + update.push(x.name.substring(7)); + } + }); + + restart_reload(); + + return [JSON.stringify(disable), JSON.stringify(update), disable_all]; +} + +function extensions_check() { + var disable = []; + + gradioApp().querySelectorAll('#extensions input[type="checkbox"]').forEach(function(x) { + if (x.name.startsWith("enable_") && !x.checked) { + disable.push(x.name.substring(7)); + } + }); + + gradioApp().querySelectorAll('#extensions .extension_status').forEach(function(x) { + x.innerHTML = "Loading..."; + }); + + + var id = randomId(); + requestProgress(id, gradioApp().getElementById('extensions_installed_html'), null, function() { + + }); + + return [id, JSON.stringify(disable)]; +} + +function install_extension_from_index(button, url) { + button.disabled = "disabled"; + button.value = "Installing..."; + + var textarea = gradioApp().querySelector('#extension_to_install textarea'); + textarea.value = url; + updateInput(textarea); + + gradioApp().querySelector('#install_extension_button').click(); +} + +function config_state_confirm_restore(_, config_state_name, config_restore_type) { + if (config_state_name == "Current") { + return [false, config_state_name, config_restore_type]; + } + let restored = ""; + if (config_restore_type == "extensions") { + restored = "all saved extension versions"; + } else if (config_restore_type == "webui") { + restored = "the webui version"; + } else { + restored = "the webui version and all saved extension versions"; + } + let confirmed = confirm("Are you sure you want to restore from this state?\nThis will reset " + restored + "."); + if (confirmed) { + restart_reload(); + gradioApp().querySelectorAll('#extensions .extension_status').forEach(function(x) { + x.innerHTML = "Loading..."; + }); + } + return [confirmed, config_state_name, config_restore_type]; +} + +function toggle_all_extensions(event) { + gradioApp().querySelectorAll('#extensions .extension_toggle').forEach(function(checkbox_el) { + checkbox_el.checked = event.target.checked; + }); +} + +function toggle_extension() { + let all_extensions_toggled = true; + for (const checkbox_el of gradioApp().querySelectorAll('#extensions .extension_toggle')) { + if (!checkbox_el.checked) { + all_extensions_toggled = false; + break; + } + } + + gradioApp().querySelector('#extensions .all_extensions_toggle').checked = all_extensions_toggled; +} diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js new file mode 100644 index 0000000000000000000000000000000000000000..d5855fe96a4dd3ae9307013c016047b8b94108ad --- /dev/null +++ b/javascript/extraNetworks.js @@ -0,0 +1,647 @@ +function toggleCss(key, css, enable) { + var style = document.getElementById(key); + if (enable && !style) { + style = document.createElement('style'); + style.id = key; + style.type = 'text/css'; + document.head.appendChild(style); + } + if (style && !enable) { + document.head.removeChild(style); + } + if (style) { + style.innerHTML == ''; + style.appendChild(document.createTextNode(css)); + } +} + +function setupExtraNetworksForTab(tabname) { + function registerPrompt(tabname, id) { + var textarea = gradioApp().querySelector("#" + id + " > label > textarea"); + + if (!activePromptTextarea[tabname]) { + activePromptTextarea[tabname] = textarea; + } + + textarea.addEventListener("focus", function() { + activePromptTextarea[tabname] = textarea; + }); + } + + var tabnav = gradioApp().querySelector('#' + tabname + '_extra_tabs > div.tab-nav'); + var controlsDiv = document.createElement('DIV'); + controlsDiv.classList.add('extra-networks-controls-div'); + tabnav.appendChild(controlsDiv); + tabnav.insertBefore(controlsDiv, null); + + var this_tab = gradioApp().querySelector('#' + tabname + '_extra_tabs'); + this_tab.querySelectorAll(":scope > [id^='" + tabname + "_']").forEach(function(elem) { + // tabname_full = {tabname}_{extra_networks_tabname} + var tabname_full = elem.id; + var search = gradioApp().querySelector("#" + tabname_full + "_extra_search"); + var sort_mode = gradioApp().querySelector("#" + tabname_full + "_extra_sort"); + var sort_dir = gradioApp().querySelector("#" + tabname_full + "_extra_sort_dir"); + var refresh = gradioApp().querySelector("#" + tabname_full + "_extra_refresh"); + + // If any of the buttons above don't exist, we want to skip this iteration of the loop. + if (!search || !sort_mode || !sort_dir || !refresh) { + return; // `return` is equivalent of `continue` but for forEach loops. + } + + var applyFilter = function(force) { + var searchTerm = search.value.toLowerCase(); + gradioApp().querySelectorAll('#' + tabname + '_extra_tabs div.card').forEach(function(elem) { + var searchOnly = elem.querySelector('.search_only'); + var text = Array.prototype.map.call(elem.querySelectorAll('.search_terms'), function(t) { + return t.textContent.toLowerCase(); + }).join(" "); + + var visible = text.indexOf(searchTerm) != -1; + if (searchOnly && searchTerm.length < 4) { + visible = false; + } + if (visible) { + elem.classList.remove("hidden"); + } else { + elem.classList.add("hidden"); + } + }); + + applySort(force); + }; + + var applySort = function(force) { + var cards = gradioApp().querySelectorAll('#' + tabname + '_extra_tabs div.card'); + var reverse = sort_dir.dataset.sortdir == "Descending"; + var sortKey = sort_mode.dataset.sortmode.toLowerCase().replace("sort", "").replaceAll(" ", "_").replace(/_+$/, "").trim() || "name"; + sortKey = "sort" + sortKey.charAt(0).toUpperCase() + sortKey.slice(1); + var sortKeyStore = sortKey + "-" + (reverse ? "Descending" : "Ascending") + "-" + cards.length; + + if (sortKeyStore == sort_mode.dataset.sortkey && !force) { + return; + } + sort_mode.dataset.sortkey = sortKeyStore; + + cards.forEach(function(card) { + card.originalParentElement = card.parentElement; + }); + var sortedCards = Array.from(cards); + sortedCards.sort(function(cardA, cardB) { + var a = cardA.dataset[sortKey]; + var b = cardB.dataset[sortKey]; + if (!isNaN(a) && !isNaN(b)) { + return parseInt(a) - parseInt(b); + } + + return (a < b ? -1 : (a > b ? 1 : 0)); + }); + if (reverse) { + sortedCards.reverse(); + } + cards.forEach(function(card) { + card.remove(); + }); + sortedCards.forEach(function(card) { + card.originalParentElement.appendChild(card); + }); + }; + + search.addEventListener("input", applyFilter); + applySort(); + applyFilter(); + extraNetworksApplySort[tabname_full] = applySort; + extraNetworksApplyFilter[tabname_full] = applyFilter; + + var controls = gradioApp().querySelector("#" + tabname_full + "_controls"); + controlsDiv.insertBefore(controls, null); + + if (elem.style.display != "none") { + extraNetworksShowControlsForPage(tabname, tabname_full); + } + }); + + registerPrompt(tabname, tabname + "_prompt"); + registerPrompt(tabname, tabname + "_neg_prompt"); +} + +function extraNetworksMovePromptToTab(tabname, id, showPrompt, showNegativePrompt) { + if (!gradioApp().querySelector('.toprow-compact-tools')) return; // only applicable for compact prompt layout + + var promptContainer = gradioApp().getElementById(tabname + '_prompt_container'); + var prompt = gradioApp().getElementById(tabname + '_prompt_row'); + var negPrompt = gradioApp().getElementById(tabname + '_neg_prompt_row'); + var elem = id ? gradioApp().getElementById(id) : null; + + if (showNegativePrompt && elem) { + elem.insertBefore(negPrompt, elem.firstChild); + } else { + promptContainer.insertBefore(negPrompt, promptContainer.firstChild); + } + + if (showPrompt && elem) { + elem.insertBefore(prompt, elem.firstChild); + } else { + promptContainer.insertBefore(prompt, promptContainer.firstChild); + } + + if (elem) { + elem.classList.toggle('extra-page-prompts-active', showNegativePrompt || showPrompt); + } +} + + +function extraNetworksShowControlsForPage(tabname, tabname_full) { + gradioApp().querySelectorAll('#' + tabname + '_extra_tabs .extra-networks-controls-div > div').forEach(function(elem) { + var targetId = tabname_full + "_controls"; + elem.style.display = elem.id == targetId ? "" : "none"; + }); +} + + +function extraNetworksUnrelatedTabSelected(tabname) { // called from python when user selects an unrelated tab (generate) + extraNetworksMovePromptToTab(tabname, '', false, false); + + extraNetworksShowControlsForPage(tabname, null); +} + +function extraNetworksTabSelected(tabname, id, showPrompt, showNegativePrompt, tabname_full) { // called from python when user selects an extra networks tab + extraNetworksMovePromptToTab(tabname, id, showPrompt, showNegativePrompt); + + extraNetworksShowControlsForPage(tabname, tabname_full); +} + +function applyExtraNetworkFilter(tabname_full) { + var doFilter = function() { + var applyFunction = extraNetworksApplyFilter[tabname_full]; + + if (applyFunction) { + applyFunction(true); + } + }; + setTimeout(doFilter, 1); +} + +function applyExtraNetworkSort(tabname_full) { + var doSort = function() { + extraNetworksApplySort[tabname_full](true); + }; + setTimeout(doSort, 1); +} + +var extraNetworksApplyFilter = {}; +var extraNetworksApplySort = {}; +var activePromptTextarea = {}; + +function setupExtraNetworks() { + setupExtraNetworksForTab('txt2img'); + setupExtraNetworksForTab('img2img'); +} + +var re_extranet = /<([^:^>]+:[^:]+):[\d.]+>(.*)/; +var re_extranet_g = /<([^:^>]+:[^:]+):[\d.]+>/g; + +var re_extranet_neg = /\(([^:^>]+:[\d.]+)\)/; +var re_extranet_g_neg = /\(([^:^>]+:[\d.]+)\)/g; +function tryToRemoveExtraNetworkFromPrompt(textarea, text, isNeg) { + var m = text.match(isNeg ? re_extranet_neg : re_extranet); + var replaced = false; + var newTextareaText; + var extraTextBeforeNet = opts.extra_networks_add_text_separator; + if (m) { + var extraTextAfterNet = m[2]; + var partToSearch = m[1]; + var foundAtPosition = -1; + newTextareaText = textarea.value.replaceAll(isNeg ? re_extranet_g_neg : re_extranet_g, function(found, net, pos) { + m = found.match(isNeg ? re_extranet_neg : re_extranet); + if (m[1] == partToSearch) { + replaced = true; + foundAtPosition = pos; + return ""; + } + return found; + }); + if (foundAtPosition >= 0) { + if (extraTextAfterNet && newTextareaText.substr(foundAtPosition, extraTextAfterNet.length) == extraTextAfterNet) { + newTextareaText = newTextareaText.substr(0, foundAtPosition) + newTextareaText.substr(foundAtPosition + extraTextAfterNet.length); + } + if (newTextareaText.substr(foundAtPosition - extraTextBeforeNet.length, extraTextBeforeNet.length) == extraTextBeforeNet) { + newTextareaText = newTextareaText.substr(0, foundAtPosition - extraTextBeforeNet.length) + newTextareaText.substr(foundAtPosition); + } + } + } else { + newTextareaText = textarea.value.replaceAll(new RegExp(`((?:${extraTextBeforeNet})?${text})`, "g"), ""); + replaced = (newTextareaText != textarea.value); + } + + if (replaced) { + textarea.value = newTextareaText; + return true; + } + + return false; +} + +function updatePromptArea(text, textArea, isNeg) { + if (!tryToRemoveExtraNetworkFromPrompt(textArea, text, isNeg)) { + textArea.value = textArea.value + opts.extra_networks_add_text_separator + text; + } + + updateInput(textArea); +} + +function cardClicked(tabname, textToAdd, textToAddNegative, allowNegativePrompt) { + if (textToAddNegative.length > 0) { + updatePromptArea(textToAdd, gradioApp().querySelector("#" + tabname + "_prompt > label > textarea")); + updatePromptArea(textToAddNegative, gradioApp().querySelector("#" + tabname + "_neg_prompt > label > textarea"), true); + } else { + var textarea = allowNegativePrompt ? activePromptTextarea[tabname] : gradioApp().querySelector("#" + tabname + "_prompt > label > textarea"); + updatePromptArea(textToAdd, textarea); + } +} + +function saveCardPreview(event, tabname, filename) { + var textarea = gradioApp().querySelector("#" + tabname + '_preview_filename > label > textarea'); + var button = gradioApp().getElementById(tabname + '_save_preview'); + + textarea.value = filename; + updateInput(textarea); + + button.click(); + + event.stopPropagation(); + event.preventDefault(); +} + +function extraNetworksTreeProcessFileClick(event, btn, tabname, extra_networks_tabname) { + /** + * Processes `onclick` events when user clicks on files in tree. + * + * @param event The generated event. + * @param btn The clicked `tree-list-item` button. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + // NOTE: Currently unused. + return; +} + +function extraNetworksTreeProcessDirectoryClick(event, btn, tabname, extra_networks_tabname) { + /** + * Processes `onclick` events when user clicks on directories in tree. + * + * Here is how the tree reacts to clicks for various states: + * unselected unopened directory: Diretory is selected and expanded. + * unselected opened directory: Directory is selected. + * selected opened directory: Directory is collapsed and deselected. + * chevron is clicked: Directory is expanded or collapsed. Selected state unchanged. + * + * @param event The generated event. + * @param btn The clicked `tree-list-item` button. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + var ul = btn.nextElementSibling; + // This is the actual target that the user clicked on within the target button. + // We use this to detect if the chevron was clicked. + var true_targ = event.target; + + function _expand_or_collapse(_ul, _btn) { + // Expands
    if it is collapsed, collapses otherwise. Updates button attributes. + if (_ul.hasAttribute("hidden")) { + _ul.removeAttribute("hidden"); + _btn.dataset.expanded = ""; + } else { + _ul.setAttribute("hidden", ""); + delete _btn.dataset.expanded; + } + } + + function _remove_selected_from_all() { + // Removes the `selected` attribute from all buttons. + var sels = document.querySelectorAll("div.tree-list-content"); + [...sels].forEach(el => { + delete el.dataset.selected; + }); + } + + function _select_button(_btn) { + // Removes `data-selected` attribute from all buttons then adds to passed button. + _remove_selected_from_all(); + _btn.dataset.selected = ""; + } + + function _update_search(_tabname, _extra_networks_tabname, _search_text) { + // Update search input with select button's path. + var search_input_elem = gradioApp().querySelector("#" + tabname + "_" + extra_networks_tabname + "_extra_search"); + search_input_elem.value = _search_text; + updateInput(search_input_elem); + } + + + // If user clicks on the chevron, then we do not select the folder. + if (true_targ.matches(".tree-list-item-action--leading, .tree-list-item-action-chevron")) { + _expand_or_collapse(ul, btn); + } else { + // User clicked anywhere else on the button. + if ("selected" in btn.dataset && !(ul.hasAttribute("hidden"))) { + // If folder is select and open, collapse and deselect button. + _expand_or_collapse(ul, btn); + delete btn.dataset.selected; + _update_search(tabname, extra_networks_tabname, ""); + } else if (!(!("selected" in btn.dataset) && !(ul.hasAttribute("hidden")))) { + // If folder is open and not selected, then we don't collapse; just select. + // NOTE: Double inversion sucks but it is the clearest way to show the branching here. + _expand_or_collapse(ul, btn); + _select_button(btn, tabname, extra_networks_tabname); + _update_search(tabname, extra_networks_tabname, btn.dataset.path); + } else { + // All other cases, just select the button. + _select_button(btn, tabname, extra_networks_tabname); + _update_search(tabname, extra_networks_tabname, btn.dataset.path); + } + } +} + +function extraNetworksTreeOnClick(event, tabname, extra_networks_tabname) { + /** + * Handles `onclick` events for buttons within an `extra-network-tree .tree-list--tree`. + * + * Determines whether the clicked button in the tree is for a file entry or a directory + * then calls the appropriate function. + * + * @param event The generated event. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + var btn = event.currentTarget; + var par = btn.parentElement; + if (par.dataset.treeEntryType === "file") { + extraNetworksTreeProcessFileClick(event, btn, tabname, extra_networks_tabname); + } else { + extraNetworksTreeProcessDirectoryClick(event, btn, tabname, extra_networks_tabname); + } +} + +function extraNetworksControlSortOnClick(event, tabname, extra_networks_tabname) { + /** + * Handles `onclick` events for the Sort Mode button. + * + * Modifies the data attributes of the Sort Mode button to cycle between + * various sorting modes. + * + * @param event The generated event. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + var curr_mode = event.currentTarget.dataset.sortmode; + var el_sort_dir = gradioApp().querySelector("#" + tabname + "_" + extra_networks_tabname + "_extra_sort_dir"); + var sort_dir = el_sort_dir.dataset.sortdir; + if (curr_mode == "path") { + event.currentTarget.dataset.sortmode = "name"; + event.currentTarget.dataset.sortkey = "sortName-" + sort_dir + "-640"; + event.currentTarget.setAttribute("title", "Sort by filename"); + } else if (curr_mode == "name") { + event.currentTarget.dataset.sortmode = "date_created"; + event.currentTarget.dataset.sortkey = "sortDate_created-" + sort_dir + "-640"; + event.currentTarget.setAttribute("title", "Sort by date created"); + } else if (curr_mode == "date_created") { + event.currentTarget.dataset.sortmode = "date_modified"; + event.currentTarget.dataset.sortkey = "sortDate_modified-" + sort_dir + "-640"; + event.currentTarget.setAttribute("title", "Sort by date modified"); + } else { + event.currentTarget.dataset.sortmode = "path"; + event.currentTarget.dataset.sortkey = "sortPath-" + sort_dir + "-640"; + event.currentTarget.setAttribute("title", "Sort by path"); + } + applyExtraNetworkSort(tabname + "_" + extra_networks_tabname); +} + +function extraNetworksControlSortDirOnClick(event, tabname, extra_networks_tabname) { + /** + * Handles `onclick` events for the Sort Direction button. + * + * Modifies the data attributes of the Sort Direction button to cycle between + * ascending and descending sort directions. + * + * @param event The generated event. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + if (event.currentTarget.dataset.sortdir == "Ascending") { + event.currentTarget.dataset.sortdir = "Descending"; + event.currentTarget.setAttribute("title", "Sort descending"); + } else { + event.currentTarget.dataset.sortdir = "Ascending"; + event.currentTarget.setAttribute("title", "Sort ascending"); + } + applyExtraNetworkSort(tabname + "_" + extra_networks_tabname); +} + +function extraNetworksControlTreeViewOnClick(event, tabname, extra_networks_tabname) { + /** + * Handles `onclick` events for the Tree View button. + * + * Toggles the tree view in the extra networks pane. + * + * @param event The generated event. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + gradioApp().getElementById(tabname + "_" + extra_networks_tabname + "_tree").classList.toggle("hidden"); + event.currentTarget.classList.toggle("extra-network-control--enabled"); +} + +function extraNetworksControlRefreshOnClick(event, tabname, extra_networks_tabname) { + /** + * Handles `onclick` events for the Refresh Page button. + * + * In order to actually call the python functions in `ui_extra_networks.py` + * to refresh the page, we created an empty gradio button in that file with an + * event handler that refreshes the page. So what this function here does + * is it manually raises a `click` event on that button. + * + * @param event The generated event. + * @param tabname The name of the active tab in the sd webui. Ex: txt2img, img2img, etc. + * @param extra_networks_tabname The id of the active extraNetworks tab. Ex: lora, checkpoints, etc. + */ + var btn_refresh_internal = gradioApp().getElementById(tabname + "_" + extra_networks_tabname + "_extra_refresh_internal"); + btn_refresh_internal.dispatchEvent(new Event("click")); +} + +var globalPopup = null; +var globalPopupInner = null; + +function closePopup() { + if (!globalPopup) return; + globalPopup.style.display = "none"; +} + +function popup(contents) { + if (!globalPopup) { + globalPopup = document.createElement('div'); + globalPopup.classList.add('global-popup'); + + var close = document.createElement('div'); + close.classList.add('global-popup-close'); + close.addEventListener("click", closePopup); + close.title = "Close"; + globalPopup.appendChild(close); + + globalPopupInner = document.createElement('div'); + globalPopupInner.classList.add('global-popup-inner'); + globalPopup.appendChild(globalPopupInner); + + gradioApp().querySelector('.main').appendChild(globalPopup); + } + + globalPopupInner.innerHTML = ''; + globalPopupInner.appendChild(contents); + + globalPopup.style.display = "flex"; +} + +var storedPopupIds = {}; +function popupId(id) { + if (!storedPopupIds[id]) { + storedPopupIds[id] = gradioApp().getElementById(id); + } + + popup(storedPopupIds[id]); +} + +function extraNetworksShowMetadata(text) { + var elem = document.createElement('pre'); + elem.classList.add('popup-metadata'); + elem.textContent = text; + + popup(elem); +} + +function requestGet(url, data, handler, errorHandler) { + var xhr = new XMLHttpRequest(); + var args = Object.keys(data).map(function(k) { + return encodeURIComponent(k) + '=' + encodeURIComponent(data[k]); + }).join('&'); + xhr.open("GET", url + "?" + args, true); + + xhr.onreadystatechange = function() { + if (xhr.readyState === 4) { + if (xhr.status === 200) { + try { + var js = JSON.parse(xhr.responseText); + handler(js); + } catch (error) { + console.error(error); + errorHandler(); + } + } else { + errorHandler(); + } + } + }; + var js = JSON.stringify(data); + xhr.send(js); +} + +function extraNetworksCopyCardPath(event, path) { + navigator.clipboard.writeText(path); + event.stopPropagation(); +} + +function extraNetworksRequestMetadata(event, extraPage, cardName) { + var showError = function() { + extraNetworksShowMetadata("there was an error getting metadata"); + }; + + requestGet("./sd_extra_networks/metadata", {page: extraPage, item: cardName}, function(data) { + if (data && data.metadata) { + extraNetworksShowMetadata(data.metadata); + } else { + showError(); + } + }, showError); + + event.stopPropagation(); +} + +var extraPageUserMetadataEditors = {}; + +function extraNetworksEditUserMetadata(event, tabname, extraPage, cardName) { + var id = tabname + '_' + extraPage + '_edit_user_metadata'; + + var editor = extraPageUserMetadataEditors[id]; + if (!editor) { + editor = {}; + editor.page = gradioApp().getElementById(id); + editor.nameTextarea = gradioApp().querySelector("#" + id + "_name" + ' textarea'); + editor.button = gradioApp().querySelector("#" + id + "_button"); + extraPageUserMetadataEditors[id] = editor; + } + + editor.nameTextarea.value = cardName; + updateInput(editor.nameTextarea); + + editor.button.click(); + + popup(editor.page); + + event.stopPropagation(); +} + +function extraNetworksRefreshSingleCard(page, tabname, name) { + requestGet("./sd_extra_networks/get-single-card", {page: page, tabname: tabname, name: name}, function(data) { + if (data && data.html) { + var card = gradioApp().querySelector(`#${tabname}_${page.replace(" ", "_")}_cards > .card[data-name="${name}"]`); + + var newDiv = document.createElement('DIV'); + newDiv.innerHTML = data.html; + var newCard = newDiv.firstElementChild; + + newCard.style.display = ''; + card.parentElement.insertBefore(newCard, card); + card.parentElement.removeChild(card); + } + }); +} + +window.addEventListener("keydown", function(event) { + if (event.key == "Escape") { + closePopup(); + } +}); + +/** + * Setup custom loading for this script. + * We need to wait for all of our HTML to be generated in the extra networks tabs + * before we can actually run the `setupExtraNetworks` function. + * The `onUiLoaded` function actually runs before all of our extra network tabs are + * finished generating. Thus we needed this new method. + * + */ + +var uiAfterScriptsCallbacks = []; +var uiAfterScriptsTimeout = null; +var executedAfterScripts = false; + +function scheduleAfterScriptsCallbacks() { + clearTimeout(uiAfterScriptsTimeout); + uiAfterScriptsTimeout = setTimeout(function() { + executeCallbacks(uiAfterScriptsCallbacks); + }, 200); +} + +onUiLoaded(function() { + var mutationObserver = new MutationObserver(function(m) { + let existingSearchfields = gradioApp().querySelectorAll("[id$='_extra_search']").length; + let neededSearchfields = gradioApp().querySelectorAll("[id$='_extra_tabs'] > .tab-nav > button").length - 2; + + if (!executedAfterScripts && existingSearchfields >= neededSearchfields) { + mutationObserver.disconnect(); + executedAfterScripts = true; + scheduleAfterScriptsCallbacks(); + } + }); + mutationObserver.observe(gradioApp(), {childList: true, subtree: true}); +}); + +uiAfterScriptsCallbacks.push(setupExtraNetworks); diff --git a/javascript/generationParams.js b/javascript/generationParams.js new file mode 100644 index 0000000000000000000000000000000000000000..7c0fd221d63313ab063f545570eb0da780b9da3a --- /dev/null +++ b/javascript/generationParams.js @@ -0,0 +1,35 @@ +// attaches listeners to the txt2img and img2img galleries to update displayed generation param text when the image changes + +let txt2img_gallery, img2img_gallery, modal = undefined; +onAfterUiUpdate(function() { + if (!txt2img_gallery) { + txt2img_gallery = attachGalleryListeners("txt2img"); + } + if (!img2img_gallery) { + img2img_gallery = attachGalleryListeners("img2img"); + } + if (!modal) { + modal = gradioApp().getElementById('lightboxModal'); + modalObserver.observe(modal, {attributes: true, attributeFilter: ['style']}); + } +}); + +let modalObserver = new MutationObserver(function(mutations) { + mutations.forEach(function(mutationRecord) { + let selectedTab = gradioApp().querySelector('#tabs div button.selected')?.innerText; + if (mutationRecord.target.style.display === 'none' && (selectedTab === 'txt2img' || selectedTab === 'img2img')) { + gradioApp().getElementById(selectedTab + "_generation_info_button")?.click(); + } + }); +}); + +function attachGalleryListeners(tab_name) { + var gallery = gradioApp().querySelector('#' + tab_name + '_gallery'); + gallery?.addEventListener('click', () => gradioApp().getElementById(tab_name + "_generation_info_button").click()); + gallery?.addEventListener('keydown', (e) => { + if (e.keyCode == 37 || e.keyCode == 39) { // left or right arrow + gradioApp().getElementById(tab_name + "_generation_info_button").click(); + } + }); + return gallery; +} diff --git a/javascript/hints.js b/javascript/hints.js new file mode 100644 index 0000000000000000000000000000000000000000..6de9372e8ea8c9fb032351e241d0f9c265995290 --- /dev/null +++ b/javascript/hints.js @@ -0,0 +1,203 @@ +// mouseover tooltips for various UI elements + +var titles = { + "Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results", + "Sampling method": "Which algorithm to use to produce the image", + "GFPGAN": "Restore low quality faces using GFPGAN neural network", + "Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps higher than 30-40 does not help", + "DDIM": "Denoising Diffusion Implicit Models - best at inpainting", + "UniPC": "Unified Predictor-Corrector Framework for Fast Sampling of Diffusion Models", + "DPM adaptive": "Ignores step count - uses a number of steps determined by the CFG and resolution", + + "\u{1F4D0}": "Auto detect size from img2img", + "Batch count": "How many batches of images to create (has no impact on generation performance or VRAM usage)", + "Batch size": "How many image to create in a single batch (increases generation performance at cost of higher VRAM usage)", + "CFG Scale": "Classifier Free Guidance Scale - how strongly the image should conform to prompt - lower values produce more creative results", + "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result", + "\u{1f3b2}\ufe0f": "Set seed to -1, which will cause a new random number to be used every time", + "\u267b\ufe0f": "Reuse seed from last generation, mostly useful if it was randomized", + "\u2199\ufe0f": "Read generation parameters from prompt or last generation if prompt is empty into user interface.", + "\u{1f4c2}": "Open images output directory", + "\u{1f4be}": "Save style", + "\u{1f5d1}\ufe0f": "Clear prompt", + "\u{1f4cb}": "Apply selected styles to current prompt", + "\u{1f4d2}": "Paste available values into the field", + "\u{1f3b4}": "Show/hide extra networks", + "\u{1f300}": "Restore progress", + + "Inpaint a part of image": "Draw a mask over an image, and the script will regenerate the masked area with content according to prompt", + "SD upscale": "Upscale image normally, split result into tiles, improve each tile using img2img, merge whole image back", + + "Just resize": "Resize image to target resolution. Unless height and width match, you will get incorrect aspect ratio.", + "Crop and resize": "Resize the image so that entirety of target resolution is filled with the image. Crop parts that stick out.", + "Resize and fill": "Resize the image so that entirety of image is inside target resolution. Fill empty space with image's colors.", + + "Mask blur": "How much to blur the mask before processing, in pixels.", + "Masked content": "What to put inside the masked area before processing it with Stable Diffusion.", + "fill": "fill it with colors of the image", + "original": "keep whatever was there originally", + "latent noise": "fill it with latent space noise", + "latent nothing": "fill it with latent space zeroes", + "Inpaint at full resolution": "Upscale masked region to target resolution, do inpainting, downscale back and paste into original image", + + "Denoising strength": "Determines how little respect the algorithm should have for image's content. At 0, nothing will change, and at 1 you'll get an unrelated image. With values below 1.0, processing will take less steps than the Sampling Steps slider specifies.", + + "Skip": "Stop processing current image and continue processing.", + "Interrupt": "Stop processing images and return any results accumulated so far.", + "Save": "Write image to a directory (default - log/images) and generation parameters into csv file.", + + "X values": "Separate values for X axis using commas.", + "Y values": "Separate values for Y axis using commas.", + + "None": "Do not do anything special", + "Prompt matrix": "Separate prompts into parts using vertical pipe character (|) and the script will create a picture for every combination of them (except for the first part, which will be present in all combinations)", + "X/Y/Z plot": "Create grid(s) where images will have different parameters. Use inputs below to specify which parameters will be shared by columns and rows", + "Custom code": "Run Python code. Advanced user only. Must run program with --allow-code for this to work", + + "Prompt S/R": "Separate a list of words with commas, and the first word will be used as a keyword: script will search for this word in the prompt, and replace it with others", + "Prompt order": "Separate a list of words with commas, and the script will make a variation of prompt with those words for their every possible order", + + "Tiling": "Produce an image that can be tiled.", + "Tile overlap": "For SD upscale, how much overlap in pixels should there be between tiles. Tiles overlap so that when they are merged back into one picture, there is no clearly visible seam.", + + "Variation seed": "Seed of a different picture to be mixed into the generation.", + "Variation strength": "How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something).", + "Resize seed from height": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution", + "Resize seed from width": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution", + + "Interrogate": "Reconstruct prompt from existing image and put it into the prompt field.", + + "Images filename pattern": "Use tags like [seed] and [date] to define how filenames for images are chosen. Leave empty for default.", + "Directory name pattern": "Use tags like [seed] and [date] to define how subdirectories for images and grids are chosen. Leave empty for default.", + "Max prompt words": "Set the maximum number of words to be used in the [prompt_words] option; ATTENTION: If the words are too long, they may exceed the maximum length of the file path that the system can handle", + + "Loopback": "Performs img2img processing multiple times. Output images are used as input for the next loop.", + "Loops": "How many times to process an image. Each output is used as the input of the next loop. If set to 1, behavior will be as if this script were not used.", + "Final denoising strength": "The denoising strength for the final loop of each image in the batch.", + "Denoising strength curve": "The denoising curve controls the rate of denoising strength change each loop. Aggressive: Most of the change will happen towards the start of the loops. Linear: Change will be constant through all loops. Lazy: Most of the change will happen towards the end of the loops.", + + "Style 1": "Style to apply; styles have components for both positive and negative prompts and apply to both", + "Style 2": "Style to apply; styles have components for both positive and negative prompts and apply to both", + "Apply style": "Insert selected styles into prompt fields", + "Create style": "Save current prompts as a style. If you add the token {prompt} to the text, the style uses that as a placeholder for your prompt when you use the style in the future.", + + "Checkpoint name": "Loads weights from checkpoint before making images. You can either use hash or a part of filename (as seen in settings) for checkpoint name. Recommended to use with Y axis for less switching.", + "Inpainting conditioning mask strength": "Only applies to inpainting models. Determines how strongly to mask off the original image for inpainting and img2img. 1.0 means fully masked, which is the default behaviour. 0.0 means a fully unmasked conditioning. Lower values will help preserve the overall composition of the image, but will struggle with large changes.", + + "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.", + + "Filename word regex": "This regular expression will be used extract words from filename, and they will be joined using the option below into label text used for training. Leave empty to keep filename text as it is.", + "Filename join string": "This string will be used to join split words into a single line if the option above is enabled.", + + "Quicksettings list": "List of setting names, separated by commas, for settings that should go to the quick access bar at the top, rather than the usual setting tab. See modules/shared.py for setting names. Requires restarting to apply.", + + "Weighted sum": "Result = A * (1 - M) + B * M", + "Add difference": "Result = A + (B - C) * M", + "No interpolation": "Result = A", + + "Initialization text": "If the number of tokens is more than the number of vectors, some may be skipped.\nLeave the textbox empty to start with zeroed out vectors", + "Learning rate": "How fast should training go. Low values will take longer to train, high values may fail to converge (not generate accurate results) and/or may break the embedding (This has happened if you see Loss: nan in the training info textbox. If this happens, you need to manually restore your embedding from an older not-broken backup).\n\nYou can set a single numeric value, or multiple learning rates using the syntax:\n\n rate_1:max_steps_1, rate_2:max_steps_2, ...\n\nEG: 0.005:100, 1e-3:1000, 1e-5\n\nWill train with rate of 0.005 for first 100 steps, then 1e-3 until 1000 steps, then 1e-5 for all remaining steps.", + + "Clip skip": "Early stopping parameter for CLIP model; 1 is stop at last layer as usual, 2 is stop at penultimate layer, etc.", + + "Approx NN": "Cheap neural network approximation. Very fast compared to VAE, but produces pictures with 4 times smaller horizontal/vertical resolution and lower quality.", + "Approx cheap": "Very cheap approximation. Very fast compared to VAE, but produces pictures with 8 times smaller horizontal/vertical resolution and extremely low quality.", + + "Hires. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition", + "Hires steps": "Number of sampling steps for upscaled picture. If 0, uses same as for original.", + "Upscale by": "Adjusts the size of the image by multiplying the original width and height by the selected value. Ignored if either Resize width to or Resize height to are non-zero.", + "Resize width to": "Resizes image to this width. If 0, width is inferred from either of two nearby sliders.", + "Resize height to": "Resizes image to this height. If 0, height is inferred from either of two nearby sliders.", + "Discard weights with matching name": "Regular expression; if weights's name matches it, the weights is not written to the resulting checkpoint. Use ^model_ema to discard EMA weights.", + "Extra networks tab order": "Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order listed.", + "Negative Guidance minimum sigma": "Skip negative prompt for steps where image is already mostly denoised; the higher this value, the more skips there will be; provides increased performance in exchange for minor quality reduction." +}; + +function updateTooltip(element) { + if (element.title) return; // already has a title + + let text = element.textContent; + let tooltip = localization[titles[text]] || titles[text]; + + if (!tooltip) { + let value = element.value; + if (value) tooltip = localization[titles[value]] || titles[value]; + } + + if (!tooltip) { + // Gradio dropdown options have `data-value`. + let dataValue = element.dataset.value; + if (dataValue) tooltip = localization[titles[dataValue]] || titles[dataValue]; + } + + if (!tooltip) { + for (const c of element.classList) { + if (c in titles) { + tooltip = localization[titles[c]] || titles[c]; + break; + } + } + } + + if (tooltip) { + element.title = tooltip; + } +} + +// Nodes to check for adding tooltips. +const tooltipCheckNodes = new Set(); +// Timer for debouncing tooltip check. +let tooltipCheckTimer = null; + +function processTooltipCheckNodes() { + for (const node of tooltipCheckNodes) { + updateTooltip(node); + } + tooltipCheckNodes.clear(); +} + +onUiUpdate(function(mutationRecords) { + for (const record of mutationRecords) { + if (record.type === "childList" && record.target.classList.contains("options")) { + // This smells like a Gradio dropdown menu having changed, + // so let's enqueue an update for the input element that shows the current value. + let wrap = record.target.parentNode; + let input = wrap?.querySelector("input"); + if (input) { + input.title = ""; // So we'll even have a chance to update it. + tooltipCheckNodes.add(input); + } + } + for (const node of record.addedNodes) { + if (node.nodeType === Node.ELEMENT_NODE && !node.classList.contains("hide")) { + if (!node.title) { + if ( + node.tagName === "SPAN" || + node.tagName === "BUTTON" || + node.tagName === "P" || + node.tagName === "INPUT" || + (node.tagName === "LI" && node.classList.contains("item")) // Gradio dropdown item + ) { + tooltipCheckNodes.add(node); + } + } + node.querySelectorAll('span, button, p').forEach(n => tooltipCheckNodes.add(n)); + } + } + } + if (tooltipCheckNodes.size) { + clearTimeout(tooltipCheckTimer); + tooltipCheckTimer = setTimeout(processTooltipCheckNodes, 1000); + } +}); + +onUiLoaded(function() { + for (var comp of window.gradio_config.components) { + if (comp.props.webui_tooltip && comp.props.elem_id) { + var elem = gradioApp().getElementById(comp.props.elem_id); + if (elem) { + elem.title = comp.props.webui_tooltip; + } + } + } +}); diff --git a/javascript/hires_fix.js b/javascript/hires_fix.js new file mode 100644 index 0000000000000000000000000000000000000000..0d04ab3b424338634af3e71a2f9d8796a5f00224 --- /dev/null +++ b/javascript/hires_fix.js @@ -0,0 +1,18 @@ + +function onCalcResolutionHires(enable, width, height, hr_scale, hr_resize_x, hr_resize_y) { + function setInactive(elem, inactive) { + elem.classList.toggle('inactive', !!inactive); + } + + var hrUpscaleBy = gradioApp().getElementById('txt2img_hr_scale'); + var hrResizeX = gradioApp().getElementById('txt2img_hr_resize_x'); + var hrResizeY = gradioApp().getElementById('txt2img_hr_resize_y'); + + gradioApp().getElementById('txt2img_hires_fix_row2').style.display = opts.use_old_hires_fix_width_height ? "none" : ""; + + setInactive(hrUpscaleBy, opts.use_old_hires_fix_width_height || hr_resize_x > 0 || hr_resize_y > 0); + setInactive(hrResizeX, opts.use_old_hires_fix_width_height || hr_resize_x == 0); + setInactive(hrResizeY, opts.use_old_hires_fix_width_height || hr_resize_y == 0); + + return [enable, width, height, hr_scale, hr_resize_x, hr_resize_y]; +} diff --git a/javascript/imageMaskFix.js b/javascript/imageMaskFix.js new file mode 100644 index 0000000000000000000000000000000000000000..900c56f32fdf7128f0433621df25a0fbd14c4e42 --- /dev/null +++ b/javascript/imageMaskFix.js @@ -0,0 +1,43 @@ +/** + * temporary fix for https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/668 + * @see https://github.com/gradio-app/gradio/issues/1721 + */ +function imageMaskResize() { + const canvases = gradioApp().querySelectorAll('#img2maskimg .touch-none canvas'); + if (!canvases.length) { + window.removeEventListener('resize', imageMaskResize); + return; + } + + const wrapper = canvases[0].closest('.touch-none'); + const previewImage = wrapper.previousElementSibling; + + if (!previewImage.complete) { + previewImage.addEventListener('load', imageMaskResize); + return; + } + + const w = previewImage.width; + const h = previewImage.height; + const nw = previewImage.naturalWidth; + const nh = previewImage.naturalHeight; + const portrait = nh > nw; + + const wW = Math.min(w, portrait ? h / nh * nw : w / nw * nw); + const wH = Math.min(h, portrait ? h / nh * nh : w / nw * nh); + + wrapper.style.width = `${wW}px`; + wrapper.style.height = `${wH}px`; + wrapper.style.left = `0px`; + wrapper.style.top = `0px`; + + canvases.forEach(c => { + c.style.width = c.style.height = ''; + c.style.maxWidth = '100%'; + c.style.maxHeight = '100%'; + c.style.objectFit = 'contain'; + }); +} + +onAfterUiUpdate(imageMaskResize); +window.addEventListener('resize', imageMaskResize); diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js new file mode 100644 index 0000000000000000000000000000000000000000..625c5d148df27f33de315b9db0a446176b7ab8cb --- /dev/null +++ b/javascript/imageviewer.js @@ -0,0 +1,262 @@ +// A full size 'lightbox' preview modal shown when left clicking on gallery previews +function closeModal() { + gradioApp().getElementById("lightboxModal").style.display = "none"; +} + +function showModal(event) { + const source = event.target || event.srcElement; + const modalImage = gradioApp().getElementById("modalImage"); + const lb = gradioApp().getElementById("lightboxModal"); + modalImage.src = source.src; + if (modalImage.style.display === 'none') { + lb.style.setProperty('background-image', 'url(' + source.src + ')'); + } + lb.style.display = "flex"; + lb.focus(); + + const tabTxt2Img = gradioApp().getElementById("tab_txt2img"); + const tabImg2Img = gradioApp().getElementById("tab_img2img"); + // show the save button in modal only on txt2img or img2img tabs + if (tabTxt2Img.style.display != "none" || tabImg2Img.style.display != "none") { + gradioApp().getElementById("modal_save").style.display = "inline"; + } else { + gradioApp().getElementById("modal_save").style.display = "none"; + } + event.stopPropagation(); +} + +function negmod(n, m) { + return ((n % m) + m) % m; +} + +function updateOnBackgroundChange() { + const modalImage = gradioApp().getElementById("modalImage"); + if (modalImage && modalImage.offsetParent) { + let currentButton = selected_gallery_button(); + let preview = gradioApp().querySelectorAll('.livePreview > img'); + if (opts.js_live_preview_in_modal_lightbox && preview.length > 0) { + // show preview image if available + modalImage.src = preview[preview.length - 1].src; + } else if (currentButton?.children?.length > 0 && modalImage.src != currentButton.children[0].src) { + modalImage.src = currentButton.children[0].src; + if (modalImage.style.display === 'none') { + const modal = gradioApp().getElementById("lightboxModal"); + modal.style.setProperty('background-image', `url(${modalImage.src})`); + } + } + } +} + +function modalImageSwitch(offset) { + var galleryButtons = all_gallery_buttons(); + + if (galleryButtons.length > 1) { + var currentButton = selected_gallery_button(); + + var result = -1; + galleryButtons.forEach(function(v, i) { + if (v == currentButton) { + result = i; + } + }); + + if (result != -1) { + var nextButton = galleryButtons[negmod((result + offset), galleryButtons.length)]; + nextButton.click(); + const modalImage = gradioApp().getElementById("modalImage"); + const modal = gradioApp().getElementById("lightboxModal"); + modalImage.src = nextButton.children[0].src; + if (modalImage.style.display === 'none') { + modal.style.setProperty('background-image', `url(${modalImage.src})`); + } + setTimeout(function() { + modal.focus(); + }, 10); + } + } +} + +function saveImage() { + const tabTxt2Img = gradioApp().getElementById("tab_txt2img"); + const tabImg2Img = gradioApp().getElementById("tab_img2img"); + const saveTxt2Img = "save_txt2img"; + const saveImg2Img = "save_img2img"; + if (tabTxt2Img.style.display != "none") { + gradioApp().getElementById(saveTxt2Img).click(); + } else if (tabImg2Img.style.display != "none") { + gradioApp().getElementById(saveImg2Img).click(); + } else { + console.error("missing implementation for saving modal of this type"); + } +} + +function modalSaveImage(event) { + saveImage(); + event.stopPropagation(); +} + +function modalNextImage(event) { + modalImageSwitch(1); + event.stopPropagation(); +} + +function modalPrevImage(event) { + modalImageSwitch(-1); + event.stopPropagation(); +} + +function modalKeyHandler(event) { + switch (event.key) { + case "s": + saveImage(); + break; + case "ArrowLeft": + modalPrevImage(event); + break; + case "ArrowRight": + modalNextImage(event); + break; + case "Escape": + closeModal(); + break; + } +} + +function setupImageForLightbox(e) { + if (e.dataset.modded) { + return; + } + + e.dataset.modded = true; + e.style.cursor = 'pointer'; + e.style.userSelect = 'none'; + + var isFirefox = navigator.userAgent.toLowerCase().indexOf('firefox') > -1; + + // For Firefox, listening on click first switched to next image then shows the lightbox. + // If you know how to fix this without switching to mousedown event, please. + // For other browsers the event is click to make it possiblr to drag picture. + var event = isFirefox ? 'mousedown' : 'click'; + + e.addEventListener(event, function(evt) { + if (evt.button == 1) { + open(evt.target.src); + evt.preventDefault(); + return; + } + if (!opts.js_modal_lightbox || evt.button != 0) return; + + modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed); + evt.preventDefault(); + showModal(evt); + }, true); + +} + +function modalZoomSet(modalImage, enable) { + if (modalImage) modalImage.classList.toggle('modalImageFullscreen', !!enable); +} + +function modalZoomToggle(event) { + var modalImage = gradioApp().getElementById("modalImage"); + modalZoomSet(modalImage, !modalImage.classList.contains('modalImageFullscreen')); + event.stopPropagation(); +} + +function modalTileImageToggle(event) { + const modalImage = gradioApp().getElementById("modalImage"); + const modal = gradioApp().getElementById("lightboxModal"); + const isTiling = modalImage.style.display === 'none'; + if (isTiling) { + modalImage.style.display = 'block'; + modal.style.setProperty('background-image', 'none'); + } else { + modalImage.style.display = 'none'; + modal.style.setProperty('background-image', `url(${modalImage.src})`); + } + + event.stopPropagation(); +} + +onAfterUiUpdate(function() { + var fullImg_preview = gradioApp().querySelectorAll('.gradio-gallery > div > img'); + if (fullImg_preview != null) { + fullImg_preview.forEach(setupImageForLightbox); + } + updateOnBackgroundChange(); +}); + +document.addEventListener("DOMContentLoaded", function() { + //const modalFragment = document.createDocumentFragment(); + const modal = document.createElement('div'); + modal.onclick = closeModal; + modal.id = "lightboxModal"; + modal.tabIndex = 0; + modal.addEventListener('keydown', modalKeyHandler, true); + + const modalControls = document.createElement('div'); + modalControls.className = 'modalControls gradio-container'; + modal.append(modalControls); + + const modalZoom = document.createElement('span'); + modalZoom.className = 'modalZoom cursor'; + modalZoom.innerHTML = '⤡'; + modalZoom.addEventListener('click', modalZoomToggle, true); + modalZoom.title = "Toggle zoomed view"; + modalControls.appendChild(modalZoom); + + const modalTileImage = document.createElement('span'); + modalTileImage.className = 'modalTileImage cursor'; + modalTileImage.innerHTML = '⊞'; + modalTileImage.addEventListener('click', modalTileImageToggle, true); + modalTileImage.title = "Preview tiling"; + modalControls.appendChild(modalTileImage); + + const modalSave = document.createElement("span"); + modalSave.className = "modalSave cursor"; + modalSave.id = "modal_save"; + modalSave.innerHTML = "🖫"; + modalSave.addEventListener("click", modalSaveImage, true); + modalSave.title = "Save Image(s)"; + modalControls.appendChild(modalSave); + + const modalClose = document.createElement('span'); + modalClose.className = 'modalClose cursor'; + modalClose.innerHTML = '×'; + modalClose.onclick = closeModal; + modalClose.title = "Close image viewer"; + modalControls.appendChild(modalClose); + + const modalImage = document.createElement('img'); + modalImage.id = 'modalImage'; + modalImage.onclick = closeModal; + modalImage.tabIndex = 0; + modalImage.addEventListener('keydown', modalKeyHandler, true); + modal.appendChild(modalImage); + + const modalPrev = document.createElement('a'); + modalPrev.className = 'modalPrev'; + modalPrev.innerHTML = '❮'; + modalPrev.tabIndex = 0; + modalPrev.addEventListener('click', modalPrevImage, true); + modalPrev.addEventListener('keydown', modalKeyHandler, true); + modal.appendChild(modalPrev); + + const modalNext = document.createElement('a'); + modalNext.className = 'modalNext'; + modalNext.innerHTML = '❯'; + modalNext.tabIndex = 0; + modalNext.addEventListener('click', modalNextImage, true); + modalNext.addEventListener('keydown', modalKeyHandler, true); + + modal.appendChild(modalNext); + + try { + gradioApp().appendChild(modal); + } catch (e) { + gradioApp().body.appendChild(modal); + } + + document.body.appendChild(modal); + +}); diff --git a/javascript/imageviewerGamepad.js b/javascript/imageviewerGamepad.js new file mode 100644 index 0000000000000000000000000000000000000000..a22c7e6e6435f677c7a86dbbae5da86af8fdc9eb --- /dev/null +++ b/javascript/imageviewerGamepad.js @@ -0,0 +1,63 @@ +let gamepads = []; + +window.addEventListener('gamepadconnected', (e) => { + const index = e.gamepad.index; + let isWaiting = false; + gamepads[index] = setInterval(async() => { + if (!opts.js_modal_lightbox_gamepad || isWaiting) return; + const gamepad = navigator.getGamepads()[index]; + const xValue = gamepad.axes[0]; + if (xValue <= -0.3) { + modalPrevImage(e); + isWaiting = true; + } else if (xValue >= 0.3) { + modalNextImage(e); + isWaiting = true; + } + if (isWaiting) { + await sleepUntil(() => { + const xValue = navigator.getGamepads()[index].axes[0]; + if (xValue < 0.3 && xValue > -0.3) { + return true; + } + }, opts.js_modal_lightbox_gamepad_repeat); + isWaiting = false; + } + }, 10); +}); + +window.addEventListener('gamepaddisconnected', (e) => { + clearInterval(gamepads[e.gamepad.index]); +}); + +/* +Primarily for vr controller type pointer devices. +I use the wheel event because there's currently no way to do it properly with web xr. + */ +let isScrolling = false; +window.addEventListener('wheel', (e) => { + if (!opts.js_modal_lightbox_gamepad || isScrolling) return; + isScrolling = true; + + if (e.deltaX <= -0.6) { + modalPrevImage(e); + } else if (e.deltaX >= 0.6) { + modalNextImage(e); + } + + setTimeout(() => { + isScrolling = false; + }, opts.js_modal_lightbox_gamepad_repeat); +}); + +function sleepUntil(f, timeout) { + return new Promise((resolve) => { + const timeStart = new Date(); + const wait = setInterval(function() { + if (f() || new Date() - timeStart > timeout) { + clearInterval(wait); + resolve(); + } + }, 20); + }); +} diff --git a/javascript/inputAccordion.js b/javascript/inputAccordion.js new file mode 100644 index 0000000000000000000000000000000000000000..7570309aa73fe051b41481db0da46dca94e57ab9 --- /dev/null +++ b/javascript/inputAccordion.js @@ -0,0 +1,68 @@ +function inputAccordionChecked(id, checked) { + var accordion = gradioApp().getElementById(id); + accordion.visibleCheckbox.checked = checked; + accordion.onVisibleCheckboxChange(); +} + +function setupAccordion(accordion) { + var labelWrap = accordion.querySelector('.label-wrap'); + var gradioCheckbox = gradioApp().querySelector('#' + accordion.id + "-checkbox input"); + var extra = gradioApp().querySelector('#' + accordion.id + "-extra"); + var span = labelWrap.querySelector('span'); + var linked = true; + + var isOpen = function() { + return labelWrap.classList.contains('open'); + }; + + var observerAccordionOpen = new MutationObserver(function(mutations) { + mutations.forEach(function(mutationRecord) { + accordion.classList.toggle('input-accordion-open', isOpen()); + + if (linked) { + accordion.visibleCheckbox.checked = isOpen(); + accordion.onVisibleCheckboxChange(); + } + }); + }); + observerAccordionOpen.observe(labelWrap, {attributes: true, attributeFilter: ['class']}); + + if (extra) { + labelWrap.insertBefore(extra, labelWrap.lastElementChild); + } + + accordion.onChecked = function(checked) { + if (isOpen() != checked) { + labelWrap.click(); + } + }; + + var visibleCheckbox = document.createElement('INPUT'); + visibleCheckbox.type = 'checkbox'; + visibleCheckbox.checked = isOpen(); + visibleCheckbox.id = accordion.id + "-visible-checkbox"; + visibleCheckbox.className = gradioCheckbox.className + " input-accordion-checkbox"; + span.insertBefore(visibleCheckbox, span.firstChild); + + accordion.visibleCheckbox = visibleCheckbox; + accordion.onVisibleCheckboxChange = function() { + if (linked && isOpen() != visibleCheckbox.checked) { + labelWrap.click(); + } + + gradioCheckbox.checked = visibleCheckbox.checked; + updateInput(gradioCheckbox); + }; + + visibleCheckbox.addEventListener('click', function(event) { + linked = false; + event.stopPropagation(); + }); + visibleCheckbox.addEventListener('input', accordion.onVisibleCheckboxChange); +} + +onUiLoaded(function() { + for (var accordion of gradioApp().querySelectorAll('.input-accordion')) { + setupAccordion(accordion); + } +}); diff --git a/javascript/localStorage.js b/javascript/localStorage.js new file mode 100644 index 0000000000000000000000000000000000000000..dc1a36c328799ea3df1843001d397aa638935952 --- /dev/null +++ b/javascript/localStorage.js @@ -0,0 +1,26 @@ + +function localSet(k, v) { + try { + localStorage.setItem(k, v); + } catch (e) { + console.warn(`Failed to save ${k} to localStorage: ${e}`); + } +} + +function localGet(k, def) { + try { + return localStorage.getItem(k); + } catch (e) { + console.warn(`Failed to load ${k} from localStorage: ${e}`); + } + + return def; +} + +function localRemove(k) { + try { + return localStorage.removeItem(k); + } catch (e) { + console.warn(`Failed to remove ${k} from localStorage: ${e}`); + } +} diff --git a/javascript/localization.js b/javascript/localization.js new file mode 100644 index 0000000000000000000000000000000000000000..8f00c18686057e3e12154f657170b014b13320a5 --- /dev/null +++ b/javascript/localization.js @@ -0,0 +1,205 @@ + +// localization = {} -- the dict with translations is created by the backend + +var ignore_ids_for_localization = { + setting_sd_hypernetwork: 'OPTION', + setting_sd_model_checkpoint: 'OPTION', + modelmerger_primary_model_name: 'OPTION', + modelmerger_secondary_model_name: 'OPTION', + modelmerger_tertiary_model_name: 'OPTION', + train_embedding: 'OPTION', + train_hypernetwork: 'OPTION', + txt2img_styles: 'OPTION', + img2img_styles: 'OPTION', + setting_random_artist_categories: 'OPTION', + setting_face_restoration_model: 'OPTION', + setting_realesrgan_enabled_models: 'OPTION', + extras_upscaler_1: 'OPTION', + extras_upscaler_2: 'OPTION', +}; + +var re_num = /^[.\d]+$/; +var re_emoji = /[\p{Extended_Pictographic}\u{1F3FB}-\u{1F3FF}\u{1F9B0}-\u{1F9B3}]/u; + +var original_lines = {}; +var translated_lines = {}; + +function hasLocalization() { + return window.localization && Object.keys(window.localization).length > 0; +} + +function textNodesUnder(el) { + var n, a = [], walk = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null, false); + while ((n = walk.nextNode())) a.push(n); + return a; +} + +function canBeTranslated(node, text) { + if (!text) return false; + if (!node.parentElement) return false; + + var parentType = node.parentElement.nodeName; + if (parentType == 'SCRIPT' || parentType == 'STYLE' || parentType == 'TEXTAREA') return false; + + if (parentType == 'OPTION' || parentType == 'SPAN') { + var pnode = node; + for (var level = 0; level < 4; level++) { + pnode = pnode.parentElement; + if (!pnode) break; + + if (ignore_ids_for_localization[pnode.id] == parentType) return false; + } + } + + if (re_num.test(text)) return false; + if (re_emoji.test(text)) return false; + return true; +} + +function getTranslation(text) { + if (!text) return undefined; + + if (translated_lines[text] === undefined) { + original_lines[text] = 1; + } + + var tl = localization[text]; + if (tl !== undefined) { + translated_lines[tl] = 1; + } + + return tl; +} + +function processTextNode(node) { + var text = node.textContent.trim(); + + if (!canBeTranslated(node, text)) return; + + var tl = getTranslation(text); + if (tl !== undefined) { + node.textContent = tl; + } +} + +function processNode(node) { + if (node.nodeType == 3) { + processTextNode(node); + return; + } + + if (node.title) { + let tl = getTranslation(node.title); + if (tl !== undefined) { + node.title = tl; + } + } + + if (node.placeholder) { + let tl = getTranslation(node.placeholder); + if (tl !== undefined) { + node.placeholder = tl; + } + } + + textNodesUnder(node).forEach(function(node) { + processTextNode(node); + }); +} + +function localizeWholePage() { + processNode(gradioApp()); + + function elem(comp) { + var elem_id = comp.props.elem_id ? comp.props.elem_id : "component-" + comp.id; + return gradioApp().getElementById(elem_id); + } + + for (var comp of window.gradio_config.components) { + if (comp.props.webui_tooltip) { + let e = elem(comp); + + let tl = e ? getTranslation(e.title) : undefined; + if (tl !== undefined) { + e.title = tl; + } + } + if (comp.props.placeholder) { + let e = elem(comp); + let textbox = e ? e.querySelector('[placeholder]') : null; + + let tl = textbox ? getTranslation(textbox.placeholder) : undefined; + if (tl !== undefined) { + textbox.placeholder = tl; + } + } + } +} + +function dumpTranslations() { + if (!hasLocalization()) { + // If we don't have any localization, + // we will not have traversed the app to find + // original_lines, so do that now. + localizeWholePage(); + } + var dumped = {}; + if (localization.rtl) { + dumped.rtl = true; + } + + for (const text in original_lines) { + if (dumped[text] !== undefined) continue; + dumped[text] = localization[text] || text; + } + + return dumped; +} + +function download_localization() { + var text = JSON.stringify(dumpTranslations(), null, 4); + + var element = document.createElement('a'); + element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); + element.setAttribute('download', "localization.json"); + element.style.display = 'none'; + document.body.appendChild(element); + + element.click(); + + document.body.removeChild(element); +} + +document.addEventListener("DOMContentLoaded", function() { + if (!hasLocalization()) { + return; + } + + onUiUpdate(function(m) { + m.forEach(function(mutation) { + mutation.addedNodes.forEach(function(node) { + processNode(node); + }); + }); + }); + + localizeWholePage(); + + if (localization.rtl) { // if the language is from right to left, + (new MutationObserver((mutations, observer) => { // wait for the style to load + mutations.forEach(mutation => { + mutation.addedNodes.forEach(node => { + if (node.tagName === 'STYLE') { + observer.disconnect(); + + for (const x of node.sheet.rules) { // find all rtl media rules + if (Array.from(x.media || []).includes('rtl')) { + x.media.appendMedium('all'); // enable them + } + } + } + }); + }); + })).observe(gradioApp(), {childList: true}); + } +}); diff --git a/javascript/notification.js b/javascript/notification.js new file mode 100644 index 0000000000000000000000000000000000000000..3ee972ae1661b62066171e01af69b84b854aeef7 --- /dev/null +++ b/javascript/notification.js @@ -0,0 +1,53 @@ +// Monitors the gallery and sends a browser notification when the leading image is new. + +let lastHeadImg = null; + +let notificationButton = null; + +onAfterUiUpdate(function() { + if (notificationButton == null) { + notificationButton = gradioApp().getElementById('request_notifications'); + + if (notificationButton != null) { + notificationButton.addEventListener('click', () => { + void Notification.requestPermission(); + }, true); + } + } + + const galleryPreviews = gradioApp().querySelectorAll('div[id^="tab_"] div[id$="_results"] .thumbnail-item > img'); + + if (galleryPreviews == null) return; + + const headImg = galleryPreviews[0]?.src; + + if (headImg == null || headImg == lastHeadImg) return; + + lastHeadImg = headImg; + + // play notification sound if available + const notificationAudio = gradioApp().querySelector('#audio_notification audio'); + if (notificationAudio) { + notificationAudio.volume = opts.notification_volume / 100.0 || 1.0; + notificationAudio.play(); + } + + if (document.hasFocus()) return; + + // Multiple copies of the images are in the DOM when one is selected. Dedup with a Set to get the real number generated. + const imgs = new Set(Array.from(galleryPreviews).map(img => img.src)); + + const notification = new Notification( + 'Stable Diffusion', + { + body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`, + icon: headImg, + image: headImg, + } + ); + + notification.onclick = function(_) { + parent.focus(); + this.close(); + }; +}); diff --git a/javascript/profilerVisualization.js b/javascript/profilerVisualization.js new file mode 100644 index 0000000000000000000000000000000000000000..9d8e5f42f327f93db42773ebf0b97ee1e9671806 --- /dev/null +++ b/javascript/profilerVisualization.js @@ -0,0 +1,153 @@ + +function createRow(table, cellName, items) { + var tr = document.createElement('tr'); + var res = []; + + items.forEach(function(x, i) { + if (x === undefined) { + res.push(null); + return; + } + + var td = document.createElement(cellName); + td.textContent = x; + tr.appendChild(td); + res.push(td); + + var colspan = 1; + for (var n = i + 1; n < items.length; n++) { + if (items[n] !== undefined) { + break; + } + + colspan += 1; + } + + if (colspan > 1) { + td.colSpan = colspan; + } + }); + + table.appendChild(tr); + + return res; +} + +function showProfile(path, cutoff = 0.05) { + requestGet(path, {}, function(data) { + var table = document.createElement('table'); + table.className = 'popup-table'; + + data.records['total'] = data.total; + var keys = Object.keys(data.records).sort(function(a, b) { + return data.records[b] - data.records[a]; + }); + var items = keys.map(function(x) { + return {key: x, parts: x.split('/'), time: data.records[x]}; + }); + var maxLength = items.reduce(function(a, b) { + return Math.max(a, b.parts.length); + }, 0); + + var cols = createRow(table, 'th', ['record', 'seconds']); + cols[0].colSpan = maxLength; + + function arraysEqual(a, b) { + return !(a < b || b < a); + } + + var addLevel = function(level, parent, hide) { + var matching = items.filter(function(x) { + return x.parts[level] && !x.parts[level + 1] && arraysEqual(x.parts.slice(0, level), parent); + }); + var sorted = matching.sort(function(a, b) { + return b.time - a.time; + }); + var othersTime = 0; + var othersList = []; + var othersRows = []; + var childrenRows = []; + sorted.forEach(function(x) { + var visible = x.time >= cutoff && !hide; + + var cells = []; + for (var i = 0; i < maxLength; i++) { + cells.push(x.parts[i]); + } + cells.push(x.time.toFixed(3)); + var cols = createRow(table, 'td', cells); + for (i = 0; i < level; i++) { + cols[i].className = 'muted'; + } + + var tr = cols[0].parentNode; + if (!visible) { + tr.classList.add("hidden"); + } + + if (x.time >= cutoff) { + childrenRows.push(tr); + } else { + othersTime += x.time; + othersList.push(x.parts[level]); + othersRows.push(tr); + } + + var children = addLevel(level + 1, parent.concat([x.parts[level]]), true); + if (children.length > 0) { + var cell = cols[level]; + var onclick = function() { + cell.classList.remove("link"); + cell.removeEventListener("click", onclick); + children.forEach(function(x) { + x.classList.remove("hidden"); + }); + }; + cell.classList.add("link"); + cell.addEventListener("click", onclick); + } + }); + + if (othersTime > 0) { + var cells = []; + for (var i = 0; i < maxLength; i++) { + cells.push(parent[i]); + } + cells.push(othersTime.toFixed(3)); + cells[level] = 'others'; + var cols = createRow(table, 'td', cells); + for (i = 0; i < level; i++) { + cols[i].className = 'muted'; + } + + var cell = cols[level]; + var tr = cell.parentNode; + var onclick = function() { + tr.classList.add("hidden"); + cell.classList.remove("link"); + cell.removeEventListener("click", onclick); + othersRows.forEach(function(x) { + x.classList.remove("hidden"); + }); + }; + + cell.title = othersList.join(", "); + cell.classList.add("link"); + cell.addEventListener("click", onclick); + + if (hide) { + tr.classList.add("hidden"); + } + + childrenRows.push(tr); + } + + return childrenRows; + }; + + addLevel(0, []); + + popup(table); + }); +} + diff --git a/javascript/progressbar.js b/javascript/progressbar.js new file mode 100644 index 0000000000000000000000000000000000000000..f068bac6abaa4e3dbbaf6e1340fc36f87d34c7d9 --- /dev/null +++ b/javascript/progressbar.js @@ -0,0 +1,193 @@ +// code related to showing and updating progressbar shown as the image is being made + +function rememberGallerySelection() { + +} + +function getGallerySelectedIndex() { + +} + +function request(url, data, handler, errorHandler) { + var xhr = new XMLHttpRequest(); + xhr.open("POST", url, true); + xhr.setRequestHeader("Content-Type", "application/json"); + xhr.onreadystatechange = function() { + if (xhr.readyState === 4) { + if (xhr.status === 200) { + try { + var js = JSON.parse(xhr.responseText); + handler(js); + } catch (error) { + console.error(error); + errorHandler(); + } + } else { + errorHandler(); + } + } + }; + var js = JSON.stringify(data); + xhr.send(js); +} + +function pad2(x) { + return x < 10 ? '0' + x : x; +} + +function formatTime(secs) { + if (secs > 3600) { + return pad2(Math.floor(secs / 60 / 60)) + ":" + pad2(Math.floor(secs / 60) % 60) + ":" + pad2(Math.floor(secs) % 60); + } else if (secs > 60) { + return pad2(Math.floor(secs / 60)) + ":" + pad2(Math.floor(secs) % 60); + } else { + return Math.floor(secs) + "s"; + } +} + + +var originalAppTitle = undefined; + +onUiLoaded(function() { + originalAppTitle = document.title; +}); + +function setTitle(progress) { + var title = originalAppTitle; + + if (opts.show_progress_in_title && progress) { + title = '[' + progress.trim() + '] ' + title; + } + + if (document.title != title) { + document.title = title; + } +} + + +function randomId() { + return "task(" + Math.random().toString(36).slice(2, 7) + Math.random().toString(36).slice(2, 7) + Math.random().toString(36).slice(2, 7) + ")"; +} + +// starts sending progress requests to "/internal/progress" uri, creating progressbar above progressbarContainer element and +// preview inside gallery element. Cleans up all created stuff when the task is over and calls atEnd. +// calls onProgress every time there is a progress update +function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgress, inactivityTimeout = 40) { + var dateStart = new Date(); + var wasEverActive = false; + var parentProgressbar = progressbarContainer.parentNode; + + var divProgress = document.createElement('div'); + divProgress.className = 'progressDiv'; + divProgress.style.display = opts.show_progressbar ? "block" : "none"; + var divInner = document.createElement('div'); + divInner.className = 'progress'; + + divProgress.appendChild(divInner); + parentProgressbar.insertBefore(divProgress, progressbarContainer); + + var livePreview = null; + + var removeProgressBar = function() { + if (!divProgress) return; + + setTitle(""); + parentProgressbar.removeChild(divProgress); + if (gallery && livePreview) gallery.removeChild(livePreview); + atEnd(); + + divProgress = null; + }; + + var funProgress = function(id_task) { + request("./internal/progress", {id_task: id_task, live_preview: false}, function(res) { + if (res.completed) { + removeProgressBar(); + return; + } + + let progressText = ""; + + divInner.style.width = ((res.progress || 0) * 100.0) + '%'; + divInner.style.background = res.progress ? "" : "transparent"; + + if (res.progress > 0) { + progressText = ((res.progress || 0) * 100.0).toFixed(0) + '%'; + } + + if (res.eta) { + progressText += " ETA: " + formatTime(res.eta); + } + + setTitle(progressText); + + if (res.textinfo && res.textinfo.indexOf("\n") == -1) { + progressText = res.textinfo + " " + progressText; + } + + divInner.textContent = progressText; + + var elapsedFromStart = (new Date() - dateStart) / 1000; + + if (res.active) wasEverActive = true; + + if (!res.active && wasEverActive) { + removeProgressBar(); + return; + } + + if (elapsedFromStart > inactivityTimeout && !res.queued && !res.active) { + removeProgressBar(); + return; + } + + if (onProgress) { + onProgress(res); + } + + setTimeout(() => { + funProgress(id_task, res.id_live_preview); + }, opts.live_preview_refresh_period || 500); + }, function() { + removeProgressBar(); + }); + }; + + var funLivePreview = function(id_task, id_live_preview) { + request("./internal/progress", {id_task: id_task, id_live_preview: id_live_preview}, function(res) { + if (!divProgress) { + return; + } + + if (res.live_preview && gallery) { + var img = new Image(); + img.onload = function() { + if (!livePreview) { + livePreview = document.createElement('div'); + livePreview.className = 'livePreview'; + gallery.insertBefore(livePreview, gallery.firstElementChild); + } + + livePreview.appendChild(img); + if (livePreview.childElementCount > 2) { + livePreview.removeChild(livePreview.firstElementChild); + } + }; + img.src = res.live_preview; + } + + setTimeout(() => { + funLivePreview(id_task, res.id_live_preview); + }, opts.live_preview_refresh_period || 500); + }, function() { + removeProgressBar(); + }); + }; + + funProgress(id_task, 0); + + if (gallery) { + funLivePreview(id_task, 0); + } + +} diff --git a/javascript/resizeHandle.js b/javascript/resizeHandle.js new file mode 100644 index 0000000000000000000000000000000000000000..6560372ccdebffdcaa701b9de09e4a0871e89d5a --- /dev/null +++ b/javascript/resizeHandle.js @@ -0,0 +1,167 @@ +(function() { + const GRADIO_MIN_WIDTH = 320; + const PAD = 16; + const DEBOUNCE_TIME = 100; + + const R = { + tracking: false, + parent: null, + parentWidth: null, + leftCol: null, + leftColStartWidth: null, + screenX: null, + }; + + let resizeTimer; + let parents = []; + + function setLeftColGridTemplate(el, width) { + el.style.gridTemplateColumns = `${width}px 16px 1fr`; + } + + function displayResizeHandle(parent) { + if (window.innerWidth < GRADIO_MIN_WIDTH * 2 + PAD * 4) { + parent.style.display = 'flex'; + parent.resizeHandle.style.display = "none"; + return false; + } else { + parent.style.display = 'grid'; + parent.resizeHandle.style.display = "block"; + return true; + } + } + + function afterResize(parent) { + if (displayResizeHandle(parent) && parent.style.gridTemplateColumns != parent.style.originalGridTemplateColumns) { + const oldParentWidth = R.parentWidth; + const newParentWidth = parent.offsetWidth; + const widthL = parseInt(parent.style.gridTemplateColumns.split(' ')[0]); + + const ratio = newParentWidth / oldParentWidth; + + const newWidthL = Math.max(Math.floor(ratio * widthL), GRADIO_MIN_WIDTH); + setLeftColGridTemplate(parent, newWidthL); + + R.parentWidth = newParentWidth; + } + } + + function setup(parent) { + const leftCol = parent.firstElementChild; + const rightCol = parent.lastElementChild; + + parents.push(parent); + + parent.style.display = 'grid'; + parent.style.gap = '0'; + const gridTemplateColumns = `${parent.children[0].style.flexGrow}fr ${PAD}px ${parent.children[1].style.flexGrow}fr`; + parent.style.gridTemplateColumns = gridTemplateColumns; + parent.style.originalGridTemplateColumns = gridTemplateColumns; + + const resizeHandle = document.createElement('div'); + resizeHandle.classList.add('resize-handle'); + parent.insertBefore(resizeHandle, rightCol); + parent.resizeHandle = resizeHandle; + + ['mousedown', 'touchstart'].forEach((eventType) => { + resizeHandle.addEventListener(eventType, (evt) => { + if (eventType.startsWith('mouse')) { + if (evt.button !== 0) return; + } else { + if (evt.changedTouches.length !== 1) return; + } + + evt.preventDefault(); + evt.stopPropagation(); + + document.body.classList.add('resizing'); + + R.tracking = true; + R.parent = parent; + R.parentWidth = parent.offsetWidth; + R.leftCol = leftCol; + R.leftColStartWidth = leftCol.offsetWidth; + if (eventType.startsWith('mouse')) { + R.screenX = evt.screenX; + } else { + R.screenX = evt.changedTouches[0].screenX; + } + }); + }); + + resizeHandle.addEventListener('dblclick', (evt) => { + evt.preventDefault(); + evt.stopPropagation(); + + parent.style.gridTemplateColumns = parent.style.originalGridTemplateColumns; + }); + + afterResize(parent); + } + + ['mousemove', 'touchmove'].forEach((eventType) => { + window.addEventListener(eventType, (evt) => { + if (eventType.startsWith('mouse')) { + if (evt.button !== 0) return; + } else { + if (evt.changedTouches.length !== 1) return; + } + + if (R.tracking) { + if (eventType.startsWith('mouse')) { + evt.preventDefault(); + } + evt.stopPropagation(); + + let delta = 0; + if (eventType.startsWith('mouse')) { + delta = R.screenX - evt.screenX; + } else { + delta = R.screenX - evt.changedTouches[0].screenX; + } + const leftColWidth = Math.max(Math.min(R.leftColStartWidth - delta, R.parent.offsetWidth - GRADIO_MIN_WIDTH - PAD), GRADIO_MIN_WIDTH); + setLeftColGridTemplate(R.parent, leftColWidth); + } + }); + }); + + ['mouseup', 'touchend'].forEach((eventType) => { + window.addEventListener(eventType, (evt) => { + if (eventType.startsWith('mouse')) { + if (evt.button !== 0) return; + } else { + if (evt.changedTouches.length !== 1) return; + } + + if (R.tracking) { + evt.preventDefault(); + evt.stopPropagation(); + + R.tracking = false; + + document.body.classList.remove('resizing'); + } + }); + }); + + + window.addEventListener('resize', () => { + clearTimeout(resizeTimer); + + resizeTimer = setTimeout(function() { + for (const parent of parents) { + afterResize(parent); + } + }, DEBOUNCE_TIME); + }); + + setupResizeHandle = setup; +})(); + +onUiLoaded(function() { + for (var elem of gradioApp().querySelectorAll('.resize-handle-row')) { + if (!elem.querySelector('.resize-handle')) { + setupResizeHandle(elem); + } + } +}); diff --git a/javascript/settings.js b/javascript/settings.js new file mode 100644 index 0000000000000000000000000000000000000000..b2d981c2144703fd9e3d89faaf5c788fd7b3356a --- /dev/null +++ b/javascript/settings.js @@ -0,0 +1,71 @@ +let settingsExcludeTabsFromShowAll = { + settings_tab_defaults: 1, + settings_tab_sysinfo: 1, + settings_tab_actions: 1, + settings_tab_licenses: 1, +}; + +function settingsShowAllTabs() { + gradioApp().querySelectorAll('#settings > div').forEach(function(elem) { + if (settingsExcludeTabsFromShowAll[elem.id]) return; + + elem.style.display = "block"; + }); +} + +function settingsShowOneTab() { + gradioApp().querySelector('#settings_show_one_page').click(); +} + +onUiLoaded(function() { + var edit = gradioApp().querySelector('#settings_search'); + var editTextarea = gradioApp().querySelector('#settings_search > label > input'); + var buttonShowAllPages = gradioApp().getElementById('settings_show_all_pages'); + var settings_tabs = gradioApp().querySelector('#settings div'); + + onEdit('settingsSearch', editTextarea, 250, function() { + var searchText = (editTextarea.value || "").trim().toLowerCase(); + + gradioApp().querySelectorAll('#settings > div[id^=settings_] div[id^=column_settings_] > *').forEach(function(elem) { + var visible = elem.textContent.trim().toLowerCase().indexOf(searchText) != -1; + elem.style.display = visible ? "" : "none"; + }); + + if (searchText != "") { + settingsShowAllTabs(); + } else { + settingsShowOneTab(); + } + }); + + settings_tabs.insertBefore(edit, settings_tabs.firstChild); + settings_tabs.appendChild(buttonShowAllPages); + + + buttonShowAllPages.addEventListener("click", settingsShowAllTabs); +}); + + +onOptionsChanged(function() { + if (gradioApp().querySelector('#settings .settings-category')) return; + + var sectionMap = {}; + gradioApp().querySelectorAll('#settings > div > button').forEach(function(x) { + sectionMap[x.textContent.trim()] = x; + }); + + opts._categories.forEach(function(x) { + var section = localization[x[0]] ?? x[0]; + var category = localization[x[1]] ?? x[1]; + + var span = document.createElement('SPAN'); + span.textContent = category; + span.className = 'settings-category'; + + var sectionElem = sectionMap[section]; + if (!sectionElem) return; + + sectionElem.parentElement.insertBefore(span, sectionElem); + }); +}); + diff --git a/javascript/textualInversion.js b/javascript/textualInversion.js new file mode 100644 index 0000000000000000000000000000000000000000..20443fcca01bbba6712e40136c57dbcdb78ca945 --- /dev/null +++ b/javascript/textualInversion.js @@ -0,0 +1,17 @@ + + + +function start_training_textual_inversion() { + gradioApp().querySelector('#ti_error').innerHTML = ''; + + var id = randomId(); + requestProgress(id, gradioApp().getElementById('ti_output'), gradioApp().getElementById('ti_gallery'), function() {}, function(progress) { + gradioApp().getElementById('ti_progress').innerHTML = progress.textinfo; + }); + + var res = Array.from(arguments); + + res[0] = id; + + return res; +} diff --git a/javascript/token-counters.js b/javascript/token-counters.js new file mode 100644 index 0000000000000000000000000000000000000000..eeea7a5d26c7832836ae982b02b6f0924048c2d1 --- /dev/null +++ b/javascript/token-counters.js @@ -0,0 +1,87 @@ +let promptTokenCountUpdateFunctions = {}; + +function update_txt2img_tokens(...args) { + // Called from Gradio + update_token_counter("txt2img_token_button"); + update_token_counter("txt2img_negative_token_button"); + if (args.length == 2) { + return args[0]; + } + return args; +} + +function update_img2img_tokens(...args) { + // Called from Gradio + update_token_counter("img2img_token_button"); + update_token_counter("img2img_negative_token_button"); + if (args.length == 2) { + return args[0]; + } + return args; +} + +function update_token_counter(button_id) { + promptTokenCountUpdateFunctions[button_id]?.(); +} + + +function recalculatePromptTokens(name) { + promptTokenCountUpdateFunctions[name]?.(); +} + +function recalculate_prompts_txt2img() { + // Called from Gradio + recalculatePromptTokens('txt2img_prompt'); + recalculatePromptTokens('txt2img_neg_prompt'); + return Array.from(arguments); +} + +function recalculate_prompts_img2img() { + // Called from Gradio + recalculatePromptTokens('img2img_prompt'); + recalculatePromptTokens('img2img_neg_prompt'); + return Array.from(arguments); +} + +function setupTokenCounting(id, id_counter, id_button) { + var prompt = gradioApp().getElementById(id); + var counter = gradioApp().getElementById(id_counter); + var textarea = gradioApp().querySelector(`#${id} > label > textarea`); + + if (counter.parentElement == prompt.parentElement) { + return; + } + + prompt.parentElement.insertBefore(counter, prompt); + prompt.parentElement.style.position = "relative"; + + var func = onEdit(id, textarea, 800, function() { + if (counter.classList.contains("token-counter-visible")) { + gradioApp().getElementById(id_button)?.click(); + } + }); + promptTokenCountUpdateFunctions[id] = func; + promptTokenCountUpdateFunctions[id_button] = func; +} + +function toggleTokenCountingVisibility(id, id_counter, id_button) { + var counter = gradioApp().getElementById(id_counter); + + counter.style.display = opts.disable_token_counters ? "none" : "block"; + counter.classList.toggle("token-counter-visible", !opts.disable_token_counters); +} + +function runCodeForTokenCounters(fun) { + fun('txt2img_prompt', 'txt2img_token_counter', 'txt2img_token_button'); + fun('txt2img_neg_prompt', 'txt2img_negative_token_counter', 'txt2img_negative_token_button'); + fun('img2img_prompt', 'img2img_token_counter', 'img2img_token_button'); + fun('img2img_neg_prompt', 'img2img_negative_token_counter', 'img2img_negative_token_button'); +} + +onUiLoaded(function() { + runCodeForTokenCounters(setupTokenCounting); +}); + +onOptionsChanged(function() { + runCodeForTokenCounters(toggleTokenCountingVisibility); +}); diff --git a/javascript/ui.js b/javascript/ui.js new file mode 100644 index 0000000000000000000000000000000000000000..f2adc7dd8f64415ad533a13e49109609993e2649 --- /dev/null +++ b/javascript/ui.js @@ -0,0 +1,431 @@ +// various functions for interaction with ui.py not large enough to warrant putting them in separate files + +function set_theme(theme) { + var gradioURL = window.location.href; + if (!gradioURL.includes('?__theme=')) { + window.location.replace(gradioURL + '?__theme=' + theme); + } +} + +function all_gallery_buttons() { + var allGalleryButtons = gradioApp().querySelectorAll('[style="display: block;"].tabitem div[id$=_gallery].gradio-gallery .thumbnails > .thumbnail-item.thumbnail-small'); + var visibleGalleryButtons = []; + allGalleryButtons.forEach(function(elem) { + if (elem.parentElement.offsetParent) { + visibleGalleryButtons.push(elem); + } + }); + return visibleGalleryButtons; +} + +function selected_gallery_button() { + return all_gallery_buttons().find(elem => elem.classList.contains('selected')) ?? null; +} + +function selected_gallery_index() { + return all_gallery_buttons().findIndex(elem => elem.classList.contains('selected')); +} + +function extract_image_from_gallery(gallery) { + if (gallery.length == 0) { + return [null]; + } + if (gallery.length == 1) { + return [gallery[0]]; + } + + var index = selected_gallery_index(); + + if (index < 0 || index >= gallery.length) { + // Use the first image in the gallery as the default + index = 0; + } + + return [gallery[index]]; +} + +window.args_to_array = Array.from; // Compatibility with e.g. extensions that may expect this to be around + +function switch_to_txt2img() { + gradioApp().querySelector('#tabs').querySelectorAll('button')[0].click(); + + return Array.from(arguments); +} + +function switch_to_img2img_tab(no) { + gradioApp().querySelector('#tabs').querySelectorAll('button')[1].click(); + gradioApp().getElementById('mode_img2img').querySelectorAll('button')[no].click(); +} +function switch_to_img2img() { + switch_to_img2img_tab(0); + return Array.from(arguments); +} + +function switch_to_sketch() { + switch_to_img2img_tab(1); + return Array.from(arguments); +} + +function switch_to_inpaint() { + switch_to_img2img_tab(2); + return Array.from(arguments); +} + +function switch_to_inpaint_sketch() { + switch_to_img2img_tab(3); + return Array.from(arguments); +} + +function switch_to_extras() { + gradioApp().querySelector('#tabs').querySelectorAll('button')[2].click(); + + return Array.from(arguments); +} + +function switch_to_svd() { + gradioApp().querySelector('#tabs').querySelectorAll('button')[6].click(); + return Array.from(arguments); +} + +function get_tab_index(tabId) { + let buttons = gradioApp().getElementById(tabId).querySelector('div').querySelectorAll('button'); + for (let i = 0; i < buttons.length; i++) { + if (buttons[i].classList.contains('selected')) { + return i; + } + } + return 0; +} + +function create_tab_index_args(tabId, args) { + var res = Array.from(args); + res[0] = get_tab_index(tabId); + return res; +} + +function get_img2img_tab_index() { + let res = Array.from(arguments); + res.splice(-2); + res[0] = get_tab_index('mode_img2img'); + return res; +} + +function create_submit_args(args) { + var res = Array.from(args); + + // As it is currently, txt2img and img2img send back the previous output args (txt2img_gallery, generation_info, html_info) whenever you generate a new image. + // This can lead to uploading a huge gallery of previously generated images, which leads to an unnecessary delay between submitting and beginning to generate. + // I don't know why gradio is sending outputs along with inputs, but we can prevent sending the image gallery here, which seems to be an issue for some. + // If gradio at some point stops sending outputs, this may break something + if (Array.isArray(res[res.length - 3])) { + res[res.length - 3] = null; + } + + return res; +} + +function setSubmitButtonsVisibility(tabname, showInterrupt, showSkip, showInterrupting) { + gradioApp().getElementById(tabname + '_interrupt').style.display = showInterrupt ? "block" : "none"; + gradioApp().getElementById(tabname + '_skip').style.display = showSkip ? "block" : "none"; + gradioApp().getElementById(tabname + '_interrupting').style.display = showInterrupting ? "block" : "none"; +} + +function showSubmitButtons(tabname, show) { + setSubmitButtonsVisibility(tabname, !show, !show, false); +} + +function showSubmitInterruptingPlaceholder(tabname) { + setSubmitButtonsVisibility(tabname, false, true, true); +} + +function showRestoreProgressButton(tabname, show) { + var button = gradioApp().getElementById(tabname + "_restore_progress"); + if (!button) return; + + button.style.display = show ? "flex" : "none"; +} + +function submit() { + showSubmitButtons('txt2img', false); + + var id = randomId(); + localSet("txt2img_task_id", id); + + requestProgress(id, gradioApp().getElementById('txt2img_gallery_container'), gradioApp().getElementById('txt2img_gallery'), function() { + showSubmitButtons('txt2img', true); + localRemove("txt2img_task_id"); + showRestoreProgressButton('txt2img', false); + }); + + var res = create_submit_args(arguments); + + res[0] = id; + + return res; +} + +function submit_txt2img_upscale() { + var res = submit(...arguments); + + res[2] = selected_gallery_index(); + + return res; +} + +function submit_img2img() { + showSubmitButtons('img2img', false); + + var id = randomId(); + localSet("img2img_task_id", id); + + requestProgress(id, gradioApp().getElementById('img2img_gallery_container'), gradioApp().getElementById('img2img_gallery'), function() { + showSubmitButtons('img2img', true); + localRemove("img2img_task_id"); + showRestoreProgressButton('img2img', false); + }); + + var res = create_submit_args(arguments); + + res[0] = id; + res[1] = get_tab_index('mode_img2img'); + + return res; +} + +function submit_extras() { + showSubmitButtons('extras', false); + + var id = randomId(); + + requestProgress(id, gradioApp().getElementById('extras_gallery_container'), gradioApp().getElementById('extras_gallery'), function() { + showSubmitButtons('extras', true); + }); + + var res = create_submit_args(arguments); + + res[0] = id; + + console.log(res); + return res; +} + +function restoreProgressTxt2img() { + showRestoreProgressButton("txt2img", false); + var id = localGet("txt2img_task_id"); + + if (id) { + requestProgress(id, gradioApp().getElementById('txt2img_gallery_container'), gradioApp().getElementById('txt2img_gallery'), function() { + showSubmitButtons('txt2img', true); + }, null, 0); + } + + return id; +} + +function restoreProgressImg2img() { + showRestoreProgressButton("img2img", false); + + var id = localGet("img2img_task_id"); + + if (id) { + requestProgress(id, gradioApp().getElementById('img2img_gallery_container'), gradioApp().getElementById('img2img_gallery'), function() { + showSubmitButtons('img2img', true); + }, null, 0); + } + + return id; +} + + +/** + * Configure the width and height elements on `tabname` to accept + * pasting of resolutions in the form of "width x height". + */ +function setupResolutionPasting(tabname) { + var width = gradioApp().querySelector(`#${tabname}_width input[type=number]`); + var height = gradioApp().querySelector(`#${tabname}_height input[type=number]`); + for (const el of [width, height]) { + el.addEventListener('paste', function(event) { + var pasteData = event.clipboardData.getData('text/plain'); + var parsed = pasteData.match(/^\s*(\d+)\D+(\d+)\s*$/); + if (parsed) { + width.value = parsed[1]; + height.value = parsed[2]; + updateInput(width); + updateInput(height); + event.preventDefault(); + } + }); + } +} + +onUiLoaded(function() { + showRestoreProgressButton('txt2img', localGet("txt2img_task_id")); + showRestoreProgressButton('img2img', localGet("img2img_task_id")); + setupResolutionPasting('txt2img'); + setupResolutionPasting('img2img'); +}); + + +function modelmerger() { + var id = randomId(); + requestProgress(id, gradioApp().getElementById('modelmerger_results_panel'), null, function() {}); + + var res = create_submit_args(arguments); + res[0] = id; + return res; +} + + +function ask_for_style_name(_, prompt_text, negative_prompt_text) { + var name_ = prompt('Style name:'); + return [name_, prompt_text, negative_prompt_text]; +} + +function confirm_clear_prompt(prompt, negative_prompt) { + if (confirm("Delete prompt?")) { + prompt = ""; + negative_prompt = ""; + } + + return [prompt, negative_prompt]; +} + + +var opts = {}; +onAfterUiUpdate(function() { + if (Object.keys(opts).length != 0) return; + + var json_elem = gradioApp().getElementById('settings_json'); + if (json_elem == null) return; + + var textarea = json_elem.querySelector('textarea'); + var jsdata = textarea.value; + opts = JSON.parse(jsdata); + + executeCallbacks(optionsChangedCallbacks); /*global optionsChangedCallbacks*/ + + Object.defineProperty(textarea, 'value', { + set: function(newValue) { + var valueProp = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value'); + var oldValue = valueProp.get.call(textarea); + valueProp.set.call(textarea, newValue); + + if (oldValue != newValue) { + opts = JSON.parse(textarea.value); + } + + executeCallbacks(optionsChangedCallbacks); + }, + get: function() { + var valueProp = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value'); + return valueProp.get.call(textarea); + } + }); + + json_elem.parentElement.style.display = "none"; +}); + +onOptionsChanged(function() { + var elem = gradioApp().getElementById('sd_checkpoint_hash'); + var sd_checkpoint_hash = opts.sd_checkpoint_hash || ""; + var shorthash = sd_checkpoint_hash.substring(0, 10); + + if (elem && elem.textContent != shorthash) { + elem.textContent = shorthash; + elem.title = sd_checkpoint_hash; + elem.href = "https://google.com/search?q=" + sd_checkpoint_hash; + } +}); + +let txt2img_textarea, img2img_textarea = undefined; + +function restart_reload() { + document.body.innerHTML = '

    Reloading...

    '; + + var requestPing = function() { + requestGet("./internal/ping", {}, function(data) { + location.reload(); + }, function() { + setTimeout(requestPing, 500); + }); + }; + + setTimeout(requestPing, 2000); + + return []; +} + +// Simulate an `input` DOM event for Gradio Textbox component. Needed after you edit its contents in javascript, otherwise your edits +// will only visible on web page and not sent to python. +function updateInput(target) { + let e = new Event("input", {bubbles: true}); + Object.defineProperty(e, "target", {value: target}); + target.dispatchEvent(e); +} + + +var desiredCheckpointName = null; +function selectCheckpoint(name) { + desiredCheckpointName = name; + gradioApp().getElementById('change_checkpoint').click(); +} + +function currentImg2imgSourceResolution(w, h, scaleBy) { + var img = gradioApp().querySelector('#mode_img2img > div[style="display: block;"] img'); + return img ? [img.naturalWidth, img.naturalHeight, scaleBy] : [0, 0, scaleBy]; +} + +function updateImg2imgResizeToTextAfterChangingImage() { + // At the time this is called from gradio, the image has no yet been replaced. + // There may be a better solution, but this is simple and straightforward so I'm going with it. + + setTimeout(function() { + gradioApp().getElementById('img2img_update_resize_to').click(); + }, 500); + + return []; + +} + + + +function setRandomSeed(elem_id) { + var input = gradioApp().querySelector("#" + elem_id + " input"); + if (!input) return []; + + input.value = "-1"; + updateInput(input); + return []; +} + +function switchWidthHeight(tabname) { + var width = gradioApp().querySelector("#" + tabname + "_width input[type=number]"); + var height = gradioApp().querySelector("#" + tabname + "_height input[type=number]"); + if (!width || !height) return []; + + var tmp = width.value; + width.value = height.value; + height.value = tmp; + + updateInput(width); + updateInput(height); + return []; +} + + +var onEditTimers = {}; + +// calls func after afterMs milliseconds has passed since the input elem has beed enited by user +function onEdit(editId, elem, afterMs, func) { + var edited = function() { + var existingTimer = onEditTimers[editId]; + if (existingTimer) clearTimeout(existingTimer); + + onEditTimers[editId] = setTimeout(func, afterMs); + }; + + elem.addEventListener("input", edited); + + return edited; +} diff --git a/javascript/ui_settings_hints.js b/javascript/ui_settings_hints.js new file mode 100644 index 0000000000000000000000000000000000000000..d088f9494f826d9534dc105ac2f99bda702d22c0 --- /dev/null +++ b/javascript/ui_settings_hints.js @@ -0,0 +1,62 @@ +// various hints and extra info for the settings tab + +var settingsHintsSetup = false; + +onOptionsChanged(function() { + if (settingsHintsSetup) return; + settingsHintsSetup = true; + + gradioApp().querySelectorAll('#settings [id^=setting_]').forEach(function(div) { + var name = div.id.substr(8); + var commentBefore = opts._comments_before[name]; + var commentAfter = opts._comments_after[name]; + + if (!commentBefore && !commentAfter) return; + + var span = null; + if (div.classList.contains('gradio-checkbox')) span = div.querySelector('label span'); + else if (div.classList.contains('gradio-checkboxgroup')) span = div.querySelector('span').firstChild; + else if (div.classList.contains('gradio-radio')) span = div.querySelector('span').firstChild; + else span = div.querySelector('label span').firstChild; + + if (!span) return; + + if (commentBefore) { + var comment = document.createElement('DIV'); + comment.className = 'settings-comment'; + comment.innerHTML = commentBefore; + span.parentElement.insertBefore(document.createTextNode('\xa0'), span); + span.parentElement.insertBefore(comment, span); + span.parentElement.insertBefore(document.createTextNode('\xa0'), span); + } + if (commentAfter) { + comment = document.createElement('DIV'); + comment.className = 'settings-comment'; + comment.innerHTML = commentAfter; + span.parentElement.insertBefore(comment, span.nextSibling); + span.parentElement.insertBefore(document.createTextNode('\xa0'), span.nextSibling); + } + }); +}); + +function settingsHintsShowQuicksettings() { + requestGet("./internal/quicksettings-hint", {}, function(data) { + var table = document.createElement('table'); + table.className = 'popup-table'; + + data.forEach(function(obj) { + var tr = document.createElement('tr'); + var td = document.createElement('td'); + td.textContent = obj.name; + tr.appendChild(td); + + td = document.createElement('td'); + td.textContent = obj.label; + tr.appendChild(td); + + table.appendChild(tr); + }); + + popup(table); + }); +} diff --git a/launch.py b/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..55eafe256bf1f9a347d5ed1c49b07ed4129d9cc7 --- /dev/null +++ b/launch.py @@ -0,0 +1,51 @@ +from modules import launch_utils + +args = launch_utils.args +python = launch_utils.python +git = launch_utils.git +index_url = launch_utils.index_url +dir_repos = launch_utils.dir_repos + +commit_hash = launch_utils.commit_hash +git_tag = launch_utils.git_tag + +run = launch_utils.run +is_installed = launch_utils.is_installed +repo_dir = launch_utils.repo_dir + +run_pip = launch_utils.run_pip +check_run_python = launch_utils.check_run_python +git_clone = launch_utils.git_clone +git_pull_recursive = launch_utils.git_pull_recursive +list_extensions = launch_utils.list_extensions +run_extension_installer = launch_utils.run_extension_installer +prepare_environment = launch_utils.prepare_environment +configure_for_tests = launch_utils.configure_for_tests +start = launch_utils.start + + +def main(): + if args.dump_sysinfo: + filename = launch_utils.dump_sysinfo() + + print(f"Sysinfo saved as {filename}. Exiting...") + + exit(0) + + launch_utils.startup_timer.record("initial startup") + + with launch_utils.startup_timer.subcategory("prepare environment"): + if not args.skip_prepare_environment: + prepare_environment() + + if args.test_server: + configure_for_tests() + + if args.forge_ref_a1111_home: + launch_utils.configure_forge_reference_checkout(args.forge_ref_a1111_home) + + start() + + +if __name__ == "__main__": + main() diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py new file mode 100644 index 0000000000000000000000000000000000000000..86f533a6aa135ef9d620ee106628da38838f5c47 --- /dev/null +++ b/ldm_patched/contrib/external.py @@ -0,0 +1,1977 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch + +import os +import sys +import json +import hashlib +import traceback +import math +import time +import random + +from PIL import Image, ImageOps, ImageSequence +from PIL.PngImagePlugin import PngInfo +import numpy as np +import safetensors.torch + +pass # sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "ldm_patched")) + + +import ldm_patched.modules.diffusers_load +import ldm_patched.modules.samplers +import ldm_patched.modules.sample +import ldm_patched.modules.sd +import ldm_patched.modules.utils +import ldm_patched.modules.controlnet + +import ldm_patched.modules.clip_vision + +import ldm_patched.modules.model_management +from ldm_patched.modules.args_parser import args + +import importlib + +import ldm_patched.utils.path_utils +import ldm_patched.utils.latent_visualization + +def before_node_execution(): + ldm_patched.modules.model_management.throw_exception_if_processing_interrupted() + +def interrupt_processing(value=True): + ldm_patched.modules.model_management.interrupt_current_processing(value) + +MAX_RESOLUTION=8192 + +class CLIPTextEncode: + @classmethod + def INPUT_TYPES(s): + return {"required": {"text": ("STRING", {"multiline": True}), "clip": ("CLIP", )}} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "encode" + + CATEGORY = "conditioning" + + def encode(self, clip, text): + tokens = clip.tokenize(text) + cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) + return ([[cond, {"pooled_output": pooled}]], ) + +class ConditioningCombine: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "combine" + + CATEGORY = "conditioning" + + def combine(self, conditioning_1, conditioning_2): + return (conditioning_1 + conditioning_2, ) + +class ConditioningAverage : + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", ), + "conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "addWeighted" + + CATEGORY = "conditioning" + + def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): + out = [] + + if len(conditioning_from) > 1: + print("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") + + cond_from = conditioning_from[0][0] + pooled_output_from = conditioning_from[0][1].get("pooled_output", None) + + for i in range(len(conditioning_to)): + t1 = conditioning_to[i][0] + pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from) + t0 = cond_from[:,:t1.shape[1]] + if t0.shape[1] < t1.shape[1]: + t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) + + tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) + t_to = conditioning_to[i][1].copy() + if pooled_output_from is not None and pooled_output_to is not None: + t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength)) + elif pooled_output_from is not None: + t_to["pooled_output"] = pooled_output_from + + n = [tw, t_to] + out.append(n) + return (out, ) + +class ConditioningConcat: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "conditioning_to": ("CONDITIONING",), + "conditioning_from": ("CONDITIONING",), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "concat" + + CATEGORY = "conditioning" + + def concat(self, conditioning_to, conditioning_from): + out = [] + + if len(conditioning_from) > 1: + print("Warning: ConditioningConcat conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") + + cond_from = conditioning_from[0][0] + + for i in range(len(conditioning_to)): + t1 = conditioning_to[i][0] + tw = torch.cat((t1, cond_from),1) + n = [tw, conditioning_to[i][1].copy()] + out.append(n) + + return (out, ) + +class ConditioningSetArea: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "width": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, width, height, x, y, strength): + c = [] + for t in conditioning: + n = [t[0], t[1].copy()] + n[1]['area'] = (height // 8, width // 8, y // 8, x // 8) + n[1]['strength'] = strength + n[1]['set_area_to_bounds'] = False + c.append(n) + return (c, ) + +class ConditioningSetAreaPercentage: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "width": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}), + "height": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}), + "x": ("FLOAT", {"default": 0, "min": 0, "max": 1.0, "step": 0.01}), + "y": ("FLOAT", {"default": 0, "min": 0, "max": 1.0, "step": 0.01}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, width, height, x, y, strength): + c = [] + for t in conditioning: + n = [t[0], t[1].copy()] + n[1]['area'] = ("percentage", height, width, y, x) + n[1]['strength'] = strength + n[1]['set_area_to_bounds'] = False + c.append(n) + return (c, ) + +class ConditioningSetAreaStrength: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, strength): + c = [] + for t in conditioning: + n = [t[0], t[1].copy()] + n[1]['strength'] = strength + c.append(n) + return (c, ) + + +class ConditioningSetMask: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "mask": ("MASK", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "set_cond_area": (["default", "mask bounds"],), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, mask, set_cond_area, strength): + c = [] + set_area_to_bounds = False + if set_cond_area != "default": + set_area_to_bounds = True + if len(mask.shape) < 3: + mask = mask.unsqueeze(0) + for t in conditioning: + n = [t[0], t[1].copy()] + _, h, w = mask.shape + n[1]['mask'] = mask + n[1]['set_area_to_bounds'] = set_area_to_bounds + n[1]['mask_strength'] = strength + c.append(n) + return (c, ) + +class ConditioningZeroOut: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", )}} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "zero_out" + + CATEGORY = "advanced/conditioning" + + def zero_out(self, conditioning): + c = [] + for t in conditioning: + d = t[1].copy() + if "pooled_output" in d: + d["pooled_output"] = torch.zeros_like(d["pooled_output"]) + n = [torch.zeros_like(t[0]), d] + c.append(n) + return (c, ) + +class ConditioningSetTimestepRange: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), + "end": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}) + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "set_range" + + CATEGORY = "advanced/conditioning" + + def set_range(self, conditioning, start, end): + c = [] + for t in conditioning: + d = t[1].copy() + d['start_percent'] = start + d['end_percent'] = end + n = [t[0], d] + c.append(n) + return (c, ) + +class VAEDecode: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT", ), "vae": ("VAE", )}} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "decode" + + CATEGORY = "latent" + + def decode(self, vae, samples): + return (vae.decode(samples["samples"]), ) + +class VAEDecodeTiled: + @classmethod + def INPUT_TYPES(s): + return {"required": {"samples": ("LATENT", ), "vae": ("VAE", ), + "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}) + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "decode" + + CATEGORY = "_for_testing" + + def decode(self, vae, samples, tile_size): + return (vae.decode_tiled(samples["samples"], tile_x=tile_size // 8, tile_y=tile_size // 8, ), ) + +class VAEEncode: + @classmethod + def INPUT_TYPES(s): + return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", )}} + RETURN_TYPES = ("LATENT",) + FUNCTION = "encode" + + CATEGORY = "latent" + + @staticmethod + def vae_encode_crop_pixels(pixels): + x = (pixels.shape[1] // 8) * 8 + y = (pixels.shape[2] // 8) * 8 + if pixels.shape[1] != x or pixels.shape[2] != y: + x_offset = (pixels.shape[1] % 8) // 2 + y_offset = (pixels.shape[2] % 8) // 2 + pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :] + return pixels + + def encode(self, vae, pixels): + pixels = self.vae_encode_crop_pixels(pixels) + t = vae.encode(pixels[:,:,:,:3]) + return ({"samples":t}, ) + +class VAEEncodeTiled: + @classmethod + def INPUT_TYPES(s): + return {"required": {"pixels": ("IMAGE", ), "vae": ("VAE", ), + "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}) + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "encode" + + CATEGORY = "_for_testing" + + def encode(self, vae, pixels, tile_size): + pixels = VAEEncode.vae_encode_crop_pixels(pixels) + t = vae.encode_tiled(pixels[:,:,:,:3], tile_x=tile_size, tile_y=tile_size, ) + return ({"samples":t}, ) + +class VAEEncodeForInpaint: + @classmethod + def INPUT_TYPES(s): + return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", ), "mask": ("MASK", ), "grow_mask_by": ("INT", {"default": 6, "min": 0, "max": 64, "step": 1}),}} + RETURN_TYPES = ("LATENT",) + FUNCTION = "encode" + + CATEGORY = "latent/inpaint" + + def encode(self, vae, pixels, mask, grow_mask_by=6): + x = (pixels.shape[1] // 8) * 8 + y = (pixels.shape[2] // 8) * 8 + mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") + + pixels = pixels.clone() + if pixels.shape[1] != x or pixels.shape[2] != y: + x_offset = (pixels.shape[1] % 8) // 2 + y_offset = (pixels.shape[2] % 8) // 2 + pixels = pixels[:,x_offset:x + x_offset, y_offset:y + y_offset,:] + mask = mask[:,:,x_offset:x + x_offset, y_offset:y + y_offset] + + #grow mask by a few pixels to keep things seamless in latent space + if grow_mask_by == 0: + mask_erosion = mask + else: + kernel_tensor = torch.ones((1, 1, grow_mask_by, grow_mask_by)) + padding = math.ceil((grow_mask_by - 1) / 2) + + mask_erosion = torch.clamp(torch.nn.functional.conv2d(mask.round(), kernel_tensor, padding=padding), 0, 1) + + m = (1.0 - mask.round()).squeeze(1) + for i in range(3): + pixels[:,:,:,i] -= 0.5 + pixels[:,:,:,i] *= m + pixels[:,:,:,i] += 0.5 + t = vae.encode(pixels) + + return ({"samples":t, "noise_mask": (mask_erosion[:,:,:x,:y].round())}, ) + + +class InpaintModelConditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "vae": ("VAE", ), + "pixels": ("IMAGE", ), + "mask": ("MASK", ), + }} + + RETURN_TYPES = ("CONDITIONING","CONDITIONING","LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + FUNCTION = "encode" + + CATEGORY = "conditioning/inpaint" + + def encode(self, positive, negative, pixels, vae, mask): + x = (pixels.shape[1] // 8) * 8 + y = (pixels.shape[2] // 8) * 8 + mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") + + orig_pixels = pixels + pixels = orig_pixels.clone() + if pixels.shape[1] != x or pixels.shape[2] != y: + x_offset = (pixels.shape[1] % 8) // 2 + y_offset = (pixels.shape[2] % 8) // 2 + pixels = pixels[:,x_offset:x + x_offset, y_offset:y + y_offset,:] + mask = mask[:,:,x_offset:x + x_offset, y_offset:y + y_offset] + + m = (1.0 - mask.round()).squeeze(1) + for i in range(3): + pixels[:,:,:,i] -= 0.5 + pixels[:,:,:,i] *= m + pixels[:,:,:,i] += 0.5 + concat_latent = vae.encode(pixels) + orig_latent = vae.encode(orig_pixels) + + out_latent = {} + + out_latent["samples"] = orig_latent + out_latent["noise_mask"] = mask + + out = [] + for conditioning in [positive, negative]: + c = [] + for t in conditioning: + d = t[1].copy() + d["concat_latent_image"] = concat_latent + d["concat_mask"] = mask + n = [t[0], d] + c.append(n) + out.append(c) + return (out[0], out[1], out_latent) + + +class SaveLatent: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT", ), + "filename_prefix": ("STRING", {"default": "latents/ldm_patched"})}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + RETURN_TYPES = () + FUNCTION = "save" + + OUTPUT_NODE = True + + CATEGORY = "_for_testing" + + def save(self, samples, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None): + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, self.output_dir) + + # support save metadata for latent sharing + prompt_info = "" + if prompt is not None: + prompt_info = json.dumps(prompt) + + metadata = None + if not args.disable_server_info: + metadata = {"prompt": prompt_info} + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata[x] = json.dumps(extra_pnginfo[x]) + + file = f"{filename}_{counter:05}_.latent" + + results = list() + results.append({ + "filename": file, + "subfolder": subfolder, + "type": "output" + }) + + file = os.path.join(full_output_folder, file) + + output = {} + output["latent_tensor"] = samples["samples"] + output["latent_format_version_0"] = torch.tensor([]) + + ldm_patched.modules.utils.save_torch_file(output, file, metadata=metadata) + return { "ui": { "latents": results } } + + +class LoadLatent: + @classmethod + def INPUT_TYPES(s): + input_dir = ldm_patched.utils.path_utils.get_input_directory() + files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f)) and f.endswith(".latent")] + return {"required": {"latent": [sorted(files), ]}, } + + CATEGORY = "_for_testing" + + RETURN_TYPES = ("LATENT", ) + FUNCTION = "load" + + def load(self, latent): + latent_path = ldm_patched.utils.path_utils.get_annotated_filepath(latent) + latent = safetensors.torch.load_file(latent_path, device="cpu") + multiplier = 1.0 + if "latent_format_version_0" not in latent: + multiplier = 1.0 / 0.18215 + samples = {"samples": latent["latent_tensor"].float() * multiplier} + return (samples, ) + + @classmethod + def IS_CHANGED(s, latent): + image_path = ldm_patched.utils.path_utils.get_annotated_filepath(latent) + m = hashlib.sha256() + with open(image_path, 'rb') as f: + m.update(f.read()) + return m.digest().hex() + + @classmethod + def VALIDATE_INPUTS(s, latent): + if not ldm_patched.utils.path_utils.exists_annotated_filepath(latent): + return "Invalid latent file: {}".format(latent) + return True + + +class CheckpointLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "config_name": (ldm_patched.utils.path_utils.get_filename_list("configs"), ), + "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), )}} + RETURN_TYPES = ("MODEL", "CLIP", "VAE") + FUNCTION = "load_checkpoint" + + CATEGORY = "advanced/loaders" + + def load_checkpoint(self, config_name, ckpt_name, output_vae=True, output_clip=True): + config_path = ldm_patched.utils.path_utils.get_full_path("configs", config_name) + ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) + return ldm_patched.modules.sd.load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + +class CheckpointLoaderSimple: + @classmethod + def INPUT_TYPES(s): + return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ), + }} + RETURN_TYPES = ("MODEL", "CLIP", "VAE") + FUNCTION = "load_checkpoint" + + CATEGORY = "loaders" + + def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): + ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) + out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + return out[:3] + +class DiffusersLoader: + @classmethod + def INPUT_TYPES(cls): + paths = [] + for search_path in ldm_patched.utils.path_utils.get_folder_paths("diffusers"): + if os.path.exists(search_path): + for root, subdir, files in os.walk(search_path, followlinks=True): + if "model_index.json" in files: + paths.append(os.path.relpath(root, start=search_path)) + + return {"required": {"model_path": (paths,), }} + RETURN_TYPES = ("MODEL", "CLIP", "VAE") + FUNCTION = "load_checkpoint" + + CATEGORY = "advanced/loaders/deprecated" + + def load_checkpoint(self, model_path, output_vae=True, output_clip=True): + for search_path in ldm_patched.utils.path_utils.get_folder_paths("diffusers"): + if os.path.exists(search_path): + path = os.path.join(search_path, model_path) + if os.path.exists(path): + model_path = path + break + + return ldm_patched.modules.diffusers_load.load_diffusers(model_path, output_vae=output_vae, output_clip=output_clip, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + + +class unCLIPCheckpointLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ), + }} + RETURN_TYPES = ("MODEL", "CLIP", "VAE", "CLIP_VISION") + FUNCTION = "load_checkpoint" + + CATEGORY = "loaders" + + def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): + ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) + out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + return out + +class CLIPSetLastLayer: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip": ("CLIP", ), + "stop_at_clip_layer": ("INT", {"default": -1, "min": -24, "max": -1, "step": 1}), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "set_last_layer" + + CATEGORY = "conditioning" + + def set_last_layer(self, clip, stop_at_clip_layer): + clip = clip.clone() + clip.clip_layer(stop_at_clip_layer) + return (clip,) + +class LoraLoader: + def __init__(self): + self.loaded_lora = None + + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "clip": ("CLIP", ), + "lora_name": (ldm_patched.utils.path_utils.get_filename_list("loras"), ), + "strength_model": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01}), + "strength_clip": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL", "CLIP") + FUNCTION = "load_lora" + + CATEGORY = "loaders" + + def load_lora(self, model, clip, lora_name, strength_model, strength_clip): + if strength_model == 0 and strength_clip == 0: + return (model, clip) + + lora_path = ldm_patched.utils.path_utils.get_full_path("loras", lora_name) + lora = None + if self.loaded_lora is not None: + if self.loaded_lora[0] == lora_path: + lora = self.loaded_lora[1] + else: + temp = self.loaded_lora + self.loaded_lora = None + del temp + + if lora is None: + lora = ldm_patched.modules.utils.load_torch_file(lora_path, safe_load=True) + self.loaded_lora = (lora_path, lora) + + model_lora, clip_lora = ldm_patched.modules.sd.load_lora_for_models(model, clip, lora, strength_model, strength_clip) + return (model_lora, clip_lora) + +class LoraLoaderModelOnly(LoraLoader): + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "lora_name": (ldm_patched.utils.path_utils.get_filename_list("loras"), ), + "strength_model": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_lora_model_only" + + def load_lora_model_only(self, model, lora_name, strength_model): + return (self.load_lora(model, None, lora_name, strength_model, 0)[0],) + +class VAELoader: + @staticmethod + def vae_list(): + vaes = ldm_patched.utils.path_utils.get_filename_list("vae") + approx_vaes = ldm_patched.utils.path_utils.get_filename_list("vae_approx") + sdxl_taesd_enc = False + sdxl_taesd_dec = False + sd1_taesd_enc = False + sd1_taesd_dec = False + + for v in approx_vaes: + if v.startswith("taesd_decoder."): + sd1_taesd_dec = True + elif v.startswith("taesd_encoder."): + sd1_taesd_enc = True + elif v.startswith("taesdxl_decoder."): + sdxl_taesd_dec = True + elif v.startswith("taesdxl_encoder."): + sdxl_taesd_enc = True + if sd1_taesd_dec and sd1_taesd_enc: + vaes.append("taesd") + if sdxl_taesd_dec and sdxl_taesd_enc: + vaes.append("taesdxl") + return vaes + + @staticmethod + def load_taesd(name): + sd = {} + approx_vaes = ldm_patched.utils.path_utils.get_filename_list("vae_approx") + + encoder = next(filter(lambda a: a.startswith("{}_encoder.".format(name)), approx_vaes)) + decoder = next(filter(lambda a: a.startswith("{}_decoder.".format(name)), approx_vaes)) + + enc = ldm_patched.modules.utils.load_torch_file(ldm_patched.utils.path_utils.get_full_path("vae_approx", encoder)) + for k in enc: + sd["taesd_encoder.{}".format(k)] = enc[k] + + dec = ldm_patched.modules.utils.load_torch_file(ldm_patched.utils.path_utils.get_full_path("vae_approx", decoder)) + for k in dec: + sd["taesd_decoder.{}".format(k)] = dec[k] + + if name == "taesd": + sd["vae_scale"] = torch.tensor(0.18215) + elif name == "taesdxl": + sd["vae_scale"] = torch.tensor(0.13025) + return sd + + @classmethod + def INPUT_TYPES(s): + return {"required": { "vae_name": (s.vae_list(), )}} + RETURN_TYPES = ("VAE",) + FUNCTION = "load_vae" + + CATEGORY = "loaders" + + #TODO: scale factor? + def load_vae(self, vae_name): + if vae_name in ["taesd", "taesdxl"]: + sd = self.load_taesd(vae_name) + else: + vae_path = ldm_patched.utils.path_utils.get_full_path("vae", vae_name) + sd = ldm_patched.modules.utils.load_torch_file(vae_path) + vae = ldm_patched.modules.sd.VAE(sd=sd) + return (vae,) + +class ControlNetLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "control_net_name": (ldm_patched.utils.path_utils.get_filename_list("controlnet"), )}} + + RETURN_TYPES = ("CONTROL_NET",) + FUNCTION = "load_controlnet" + + CATEGORY = "loaders" + + def load_controlnet(self, control_net_name): + controlnet_path = ldm_patched.utils.path_utils.get_full_path("controlnet", control_net_name) + controlnet = ldm_patched.modules.controlnet.load_controlnet(controlnet_path) + return (controlnet,) + +class DiffControlNetLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "control_net_name": (ldm_patched.utils.path_utils.get_filename_list("controlnet"), )}} + + RETURN_TYPES = ("CONTROL_NET",) + FUNCTION = "load_controlnet" + + CATEGORY = "loaders" + + def load_controlnet(self, model, control_net_name): + controlnet_path = ldm_patched.utils.path_utils.get_full_path("controlnet", control_net_name) + controlnet = ldm_patched.modules.controlnet.load_controlnet(controlnet_path, model) + return (controlnet,) + + +class ControlNetApply: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "control_net": ("CONTROL_NET", ), + "image": ("IMAGE", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}) + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "apply_controlnet" + + CATEGORY = "conditioning" + + def apply_controlnet(self, conditioning, control_net, image, strength): + if strength == 0: + return (conditioning, ) + + c = [] + control_hint = image.movedim(-1,1) + for t in conditioning: + n = [t[0], t[1].copy()] + c_net = control_net.copy().set_cond_hint(control_hint, strength) + if 'control' in t[1]: + c_net.set_previous_controlnet(t[1]['control']) + n[1]['control'] = c_net + n[1]['control_apply_to_uncond'] = True + c.append(n) + return (c, ) + + +class ControlNetApplyAdvanced: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "control_net": ("CONTROL_NET", ), + "image": ("IMAGE", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), + "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}) + }} + + RETURN_TYPES = ("CONDITIONING","CONDITIONING") + RETURN_NAMES = ("positive", "negative") + FUNCTION = "apply_controlnet" + + CATEGORY = "conditioning" + + def apply_controlnet(self, positive, negative, control_net, image, strength, start_percent, end_percent): + if strength == 0: + return (positive, negative) + + control_hint = image.movedim(-1,1) + cnets = {} + + out = [] + for conditioning in [positive, negative]: + c = [] + for t in conditioning: + d = t[1].copy() + + prev_cnet = d.get('control', None) + if prev_cnet in cnets: + c_net = cnets[prev_cnet] + else: + c_net = control_net.copy().set_cond_hint(control_hint, strength, (start_percent, end_percent)) + c_net.set_previous_controlnet(prev_cnet) + cnets[prev_cnet] = c_net + + d['control'] = c_net + d['control_apply_to_uncond'] = False + n = [t[0], d] + c.append(n) + out.append(c) + return (out[0], out[1]) + + +class UNETLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "unet_name": (ldm_patched.utils.path_utils.get_filename_list("unet"), ), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_unet" + + CATEGORY = "advanced/loaders" + + def load_unet(self, unet_name): + unet_path = ldm_patched.utils.path_utils.get_full_path("unet", unet_name) + model = ldm_patched.modules.sd.load_unet(unet_path) + return (model,) + +class CLIPLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_name": (ldm_patched.utils.path_utils.get_filename_list("clip"), ), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "load_clip" + + CATEGORY = "advanced/loaders" + + def load_clip(self, clip_name): + clip_path = ldm_patched.utils.path_utils.get_full_path("clip", clip_name) + clip = ldm_patched.modules.sd.load_clip(ckpt_paths=[clip_path], embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + return (clip,) + +class DualCLIPLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_name1": (ldm_patched.utils.path_utils.get_filename_list("clip"), ), "clip_name2": (ldm_patched.utils.path_utils.get_filename_list("clip"), ), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "load_clip" + + CATEGORY = "advanced/loaders" + + def load_clip(self, clip_name1, clip_name2): + clip_path1 = ldm_patched.utils.path_utils.get_full_path("clip", clip_name1) + clip_path2 = ldm_patched.utils.path_utils.get_full_path("clip", clip_name2) + clip = ldm_patched.modules.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + return (clip,) + +class CLIPVisionLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_name": (ldm_patched.utils.path_utils.get_filename_list("clip_vision"), ), + }} + RETURN_TYPES = ("CLIP_VISION",) + FUNCTION = "load_clip" + + CATEGORY = "loaders" + + def load_clip(self, clip_name): + clip_path = ldm_patched.utils.path_utils.get_full_path("clip_vision", clip_name) + clip_vision = ldm_patched.modules.clip_vision.load(clip_path) + return (clip_vision,) + +class CLIPVisionEncode: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_vision": ("CLIP_VISION",), + "image": ("IMAGE",) + }} + RETURN_TYPES = ("CLIP_VISION_OUTPUT",) + FUNCTION = "encode" + + CATEGORY = "conditioning" + + def encode(self, clip_vision, image): + output = clip_vision.encode_image(image) + return (output,) + +class StyleModelLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "style_model_name": (ldm_patched.utils.path_utils.get_filename_list("style_models"), )}} + + RETURN_TYPES = ("STYLE_MODEL",) + FUNCTION = "load_style_model" + + CATEGORY = "loaders" + + def load_style_model(self, style_model_name): + style_model_path = ldm_patched.utils.path_utils.get_full_path("style_models", style_model_name) + style_model = ldm_patched.modules.sd.load_style_model(style_model_path) + return (style_model,) + + +class StyleModelApply: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "style_model": ("STYLE_MODEL", ), + "clip_vision_output": ("CLIP_VISION_OUTPUT", ), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "apply_stylemodel" + + CATEGORY = "conditioning/style_model" + + def apply_stylemodel(self, clip_vision_output, style_model, conditioning): + cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) + c = [] + for t in conditioning: + n = [torch.cat((t[0], cond), dim=1), t[1].copy()] + c.append(n) + return (c, ) + +class unCLIPConditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "clip_vision_output": ("CLIP_VISION_OUTPUT", ), + "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "apply_adm" + + CATEGORY = "conditioning" + + def apply_adm(self, conditioning, clip_vision_output, strength, noise_augmentation): + if strength == 0: + return (conditioning, ) + + c = [] + for t in conditioning: + o = t[1].copy() + x = {"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation} + if "unclip_conditioning" in o: + o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x] + else: + o["unclip_conditioning"] = [x] + n = [t[0], o] + c.append(n) + return (c, ) + +class GLIGENLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "gligen_name": (ldm_patched.utils.path_utils.get_filename_list("gligen"), )}} + + RETURN_TYPES = ("GLIGEN",) + FUNCTION = "load_gligen" + + CATEGORY = "loaders" + + def load_gligen(self, gligen_name): + gligen_path = ldm_patched.utils.path_utils.get_full_path("gligen", gligen_name) + gligen = ldm_patched.modules.sd.load_gligen(gligen_path) + return (gligen,) + +class GLIGENTextBoxApply: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_to": ("CONDITIONING", ), + "clip": ("CLIP", ), + "gligen_textbox_model": ("GLIGEN", ), + "text": ("STRING", {"multiline": True}), + "width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning/gligen" + + def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y): + c = [] + cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True) + for t in conditioning_to: + n = [t[0], t[1].copy()] + position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)] + prev = [] + if "gligen" in n[1]: + prev = n[1]['gligen'][2] + + n[1]['gligen'] = ("position", gligen_textbox_model, prev + position_params) + c.append(n) + return (c, ) + +class EmptyLatentImage: + def __init__(self): + self.device = ldm_patched.modules.model_management.intermediate_device() + + @classmethod + def INPUT_TYPES(s): + return {"required": { "width": ("INT", {"default": 512, "min": 16, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 16, "max": MAX_RESOLUTION, "step": 8}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}} + RETURN_TYPES = ("LATENT",) + FUNCTION = "generate" + + CATEGORY = "latent" + + def generate(self, width, height, batch_size=1): + latent = torch.zeros([batch_size, 4, height // 8, width // 8], device=self.device) + return ({"samples":latent}, ) + + +class LatentFromBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "batch_index": ("INT", {"default": 0, "min": 0, "max": 63}), + "length": ("INT", {"default": 1, "min": 1, "max": 64}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "frombatch" + + CATEGORY = "latent/batch" + + def frombatch(self, samples, batch_index, length): + s = samples.copy() + s_in = samples["samples"] + batch_index = min(s_in.shape[0] - 1, batch_index) + length = min(s_in.shape[0] - batch_index, length) + s["samples"] = s_in[batch_index:batch_index + length].clone() + if "noise_mask" in samples: + masks = samples["noise_mask"] + if masks.shape[0] == 1: + s["noise_mask"] = masks.clone() + else: + if masks.shape[0] < s_in.shape[0]: + masks = masks.repeat(math.ceil(s_in.shape[0] / masks.shape[0]), 1, 1, 1)[:s_in.shape[0]] + s["noise_mask"] = masks[batch_index:batch_index + length].clone() + if "batch_index" not in s: + s["batch_index"] = [x for x in range(batch_index, batch_index+length)] + else: + s["batch_index"] = samples["batch_index"][batch_index:batch_index + length] + return (s,) + +class RepeatLatentBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "amount": ("INT", {"default": 1, "min": 1, "max": 64}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "repeat" + + CATEGORY = "latent/batch" + + def repeat(self, samples, amount): + s = samples.copy() + s_in = samples["samples"] + + s["samples"] = s_in.repeat((amount, 1,1,1)) + if "noise_mask" in samples and samples["noise_mask"].shape[0] > 1: + masks = samples["noise_mask"] + if masks.shape[0] < s_in.shape[0]: + masks = masks.repeat(math.ceil(s_in.shape[0] / masks.shape[0]), 1, 1, 1)[:s_in.shape[0]] + s["noise_mask"] = samples["noise_mask"].repeat((amount, 1,1,1)) + if "batch_index" in s: + offset = max(s["batch_index"]) - min(s["batch_index"]) + 1 + s["batch_index"] = s["batch_index"] + [x + (i * offset) for i in range(1, amount) for x in s["batch_index"]] + return (s,) + +class LatentUpscale: + upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "bislerp"] + crop_methods = ["disabled", "center"] + + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), "upscale_method": (s.upscale_methods,), + "width": ("INT", {"default": 512, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "crop": (s.crop_methods,)}} + RETURN_TYPES = ("LATENT",) + FUNCTION = "upscale" + + CATEGORY = "latent" + + def upscale(self, samples, upscale_method, width, height, crop): + if width == 0 and height == 0: + s = samples + else: + s = samples.copy() + + if width == 0: + height = max(64, height) + width = max(64, round(samples["samples"].shape[3] * height / samples["samples"].shape[2])) + elif height == 0: + width = max(64, width) + height = max(64, round(samples["samples"].shape[2] * width / samples["samples"].shape[3])) + else: + width = max(64, width) + height = max(64, height) + + s["samples"] = ldm_patched.modules.utils.common_upscale(samples["samples"], width // 8, height // 8, upscale_method, crop) + return (s,) + +class LatentUpscaleBy: + upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "bislerp"] + + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), "upscale_method": (s.upscale_methods,), + "scale_by": ("FLOAT", {"default": 1.5, "min": 0.01, "max": 8.0, "step": 0.01}),}} + RETURN_TYPES = ("LATENT",) + FUNCTION = "upscale" + + CATEGORY = "latent" + + def upscale(self, samples, upscale_method, scale_by): + s = samples.copy() + width = round(samples["samples"].shape[3] * scale_by) + height = round(samples["samples"].shape[2] * scale_by) + s["samples"] = ldm_patched.modules.utils.common_upscale(samples["samples"], width, height, upscale_method, "disabled") + return (s,) + +class LatentRotate: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "rotation": (["none", "90 degrees", "180 degrees", "270 degrees"],), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "rotate" + + CATEGORY = "latent/transform" + + def rotate(self, samples, rotation): + s = samples.copy() + rotate_by = 0 + if rotation.startswith("90"): + rotate_by = 1 + elif rotation.startswith("180"): + rotate_by = 2 + elif rotation.startswith("270"): + rotate_by = 3 + + s["samples"] = torch.rot90(samples["samples"], k=rotate_by, dims=[3, 2]) + return (s,) + +class LatentFlip: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "flip_method": (["x-axis: vertically", "y-axis: horizontally"],), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "flip" + + CATEGORY = "latent/transform" + + def flip(self, samples, flip_method): + s = samples.copy() + if flip_method.startswith("x"): + s["samples"] = torch.flip(samples["samples"], dims=[2]) + elif flip_method.startswith("y"): + s["samples"] = torch.flip(samples["samples"], dims=[3]) + + return (s,) + +class LatentComposite: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples_to": ("LATENT",), + "samples_from": ("LATENT",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "feather": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "composite" + + CATEGORY = "latent" + + def composite(self, samples_to, samples_from, x, y, composite_method="normal", feather=0): + x = x // 8 + y = y // 8 + feather = feather // 8 + samples_out = samples_to.copy() + s = samples_to["samples"].clone() + samples_to = samples_to["samples"] + samples_from = samples_from["samples"] + if feather == 0: + s[:,:,y:y+samples_from.shape[2],x:x+samples_from.shape[3]] = samples_from[:,:,:samples_to.shape[2] - y, :samples_to.shape[3] - x] + else: + samples_from = samples_from[:,:,:samples_to.shape[2] - y, :samples_to.shape[3] - x] + mask = torch.ones_like(samples_from) + for t in range(feather): + if y != 0: + mask[:,:,t:1+t,:] *= ((1.0/feather) * (t + 1)) + + if y + samples_from.shape[2] < samples_to.shape[2]: + mask[:,:,mask.shape[2] -1 -t: mask.shape[2]-t,:] *= ((1.0/feather) * (t + 1)) + if x != 0: + mask[:,:,:,t:1+t] *= ((1.0/feather) * (t + 1)) + if x + samples_from.shape[3] < samples_to.shape[3]: + mask[:,:,:,mask.shape[3]- 1 - t: mask.shape[3]- t] *= ((1.0/feather) * (t + 1)) + rev_mask = torch.ones_like(mask) - mask + s[:,:,y:y+samples_from.shape[2],x:x+samples_from.shape[3]] = samples_from[:,:,:samples_to.shape[2] - y, :samples_to.shape[3] - x] * mask + s[:,:,y:y+samples_from.shape[2],x:x+samples_from.shape[3]] * rev_mask + samples_out["samples"] = s + return (samples_out,) + +class LatentBlend: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "samples1": ("LATENT",), + "samples2": ("LATENT",), + "blend_factor": ("FLOAT", { + "default": 0.5, + "min": 0, + "max": 1, + "step": 0.01 + }), + }} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "blend" + + CATEGORY = "_for_testing" + + def blend(self, samples1, samples2, blend_factor:float, blend_mode: str="normal"): + + samples_out = samples1.copy() + samples1 = samples1["samples"] + samples2 = samples2["samples"] + + if samples1.shape != samples2.shape: + samples2.permute(0, 3, 1, 2) + samples2 = ldm_patched.modules.utils.common_upscale(samples2, samples1.shape[3], samples1.shape[2], 'bicubic', crop='center') + samples2.permute(0, 2, 3, 1) + + samples_blended = self.blend_mode(samples1, samples2, blend_mode) + samples_blended = samples1 * blend_factor + samples_blended * (1 - blend_factor) + samples_out["samples"] = samples_blended + return (samples_out,) + + def blend_mode(self, img1, img2, mode): + if mode == "normal": + return img2 + else: + raise ValueError(f"Unsupported blend mode: {mode}") + +class LatentCrop: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "crop" + + CATEGORY = "latent/transform" + + def crop(self, samples, width, height, x, y): + s = samples.copy() + samples = samples['samples'] + x = x // 8 + y = y // 8 + + #enfonce minimum size of 64 + if x > (samples.shape[3] - 8): + x = samples.shape[3] - 8 + if y > (samples.shape[2] - 8): + y = samples.shape[2] - 8 + + new_height = height // 8 + new_width = width // 8 + to_x = new_width + x + to_y = new_height + y + s['samples'] = samples[:,:,y:to_y, x:to_x] + return (s,) + +class SetLatentNoiseMask: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "mask": ("MASK",), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "set_mask" + + CATEGORY = "latent/inpaint" + + def set_mask(self, samples, mask): + s = samples.copy() + s["noise_mask"] = mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])) + return (s,) + +def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): + latent_image = latent["samples"] + if disable_noise: + noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") + else: + batch_inds = latent["batch_index"] if "batch_index" in latent else None + noise = ldm_patched.modules.sample.prepare_noise(latent_image, seed, batch_inds) + + noise_mask = None + if "noise_mask" in latent: + noise_mask = latent["noise_mask"] + + callback = ldm_patched.utils.latent_visualization.prepare_callback(model, steps) + disable_pbar = not ldm_patched.modules.utils.PROGRESS_BAR_ENABLED + samples = ldm_patched.modules.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, + denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step, + force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed) + out = latent.copy() + out["samples"] = samples + return (out, ) + +class KSampler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model": ("MODEL",), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}), + "sampler_name": (ldm_patched.modules.samplers.KSampler.SAMPLERS, ), + "scheduler": (ldm_patched.modules.samplers.KSampler.SCHEDULERS, ), + "positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "latent_image": ("LATENT", ), + "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + + RETURN_TYPES = ("LATENT",) + FUNCTION = "sample" + + CATEGORY = "sampling" + + def sample(self, model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0): + return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise) + +class KSamplerAdvanced: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model": ("MODEL",), + "add_noise": (["enable", "disable"], ), + "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}), + "sampler_name": (ldm_patched.modules.samplers.KSampler.SAMPLERS, ), + "scheduler": (ldm_patched.modules.samplers.KSampler.SCHEDULERS, ), + "positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "latent_image": ("LATENT", ), + "start_at_step": ("INT", {"default": 0, "min": 0, "max": 10000}), + "end_at_step": ("INT", {"default": 10000, "min": 0, "max": 10000}), + "return_with_leftover_noise": (["disable", "enable"], ), + } + } + + RETURN_TYPES = ("LATENT",) + FUNCTION = "sample" + + CATEGORY = "sampling" + + def sample(self, model, add_noise, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, start_at_step, end_at_step, return_with_leftover_noise, denoise=1.0): + force_full_denoise = True + if return_with_leftover_noise == "enable": + force_full_denoise = False + disable_noise = False + if add_noise == "disable": + disable_noise = True + return common_ksampler(model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise, disable_noise=disable_noise, start_step=start_at_step, last_step=end_at_step, force_full_denoise=force_full_denoise) + +class SaveImage: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + self.type = "output" + self.prefix_append = "" + self.compress_level = 4 + + @classmethod + def INPUT_TYPES(s): + return {"required": + {"images": ("IMAGE", ), + "filename_prefix": ("STRING", {"default": "ldm_patched"})}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_images" + + OUTPUT_NODE = True + + CATEGORY = "image" + + def save_images(self, images, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None): + filename_prefix += self.prefix_append + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]) + results = list() + for image in images: + i = 255. * image.cpu().numpy() + img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) + metadata = None + if not args.disable_server_info: + metadata = PngInfo() + if prompt is not None: + metadata.add_text("prompt", json.dumps(prompt)) + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata.add_text(x, json.dumps(extra_pnginfo[x])) + + file = f"{filename}_{counter:05}_.png" + img.save(os.path.join(full_output_folder, file), pnginfo=metadata, compress_level=self.compress_level) + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + return { "ui": { "images": results } } + +class PreviewImage(SaveImage): + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_temp_directory() + self.type = "temp" + self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for x in range(5)) + self.compress_level = 1 + + @classmethod + def INPUT_TYPES(s): + return {"required": + {"images": ("IMAGE", ), }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + +class LoadImage: + @classmethod + def INPUT_TYPES(s): + input_dir = ldm_patched.utils.path_utils.get_input_directory() + files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))] + return {"required": + {"image": (sorted(files), {"image_upload": True})}, + } + + CATEGORY = "image" + + RETURN_TYPES = ("IMAGE", "MASK") + FUNCTION = "load_image" + def load_image(self, image): + image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image) + img = Image.open(image_path) + output_images = [] + output_masks = [] + for i in ImageSequence.Iterator(img): + i = ImageOps.exif_transpose(i) + if i.mode == 'I': + i = i.point(lambda i: i * (1 / 255)) + image = i.convert("RGB") + image = np.array(image).astype(np.float32) / 255.0 + image = torch.from_numpy(image)[None,] + if 'A' in i.getbands(): + mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0 + mask = 1. - torch.from_numpy(mask) + else: + mask = torch.zeros((64,64), dtype=torch.float32, device="cpu") + output_images.append(image) + output_masks.append(mask.unsqueeze(0)) + + if len(output_images) > 1: + output_image = torch.cat(output_images, dim=0) + output_mask = torch.cat(output_masks, dim=0) + else: + output_image = output_images[0] + output_mask = output_masks[0] + + return (output_image, output_mask) + + @classmethod + def IS_CHANGED(s, image): + image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image) + m = hashlib.sha256() + with open(image_path, 'rb') as f: + m.update(f.read()) + return m.digest().hex() + + @classmethod + def VALIDATE_INPUTS(s, image): + if not ldm_patched.utils.path_utils.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + return True + +class LoadImageMask: + _color_channels = ["alpha", "red", "green", "blue"] + @classmethod + def INPUT_TYPES(s): + input_dir = ldm_patched.utils.path_utils.get_input_directory() + files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))] + return {"required": + {"image": (sorted(files), {"image_upload": True}), + "channel": (s._color_channels, ), } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + FUNCTION = "load_image" + def load_image(self, image, channel): + image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image) + i = Image.open(image_path) + i = ImageOps.exif_transpose(i) + if i.getbands() != ("R", "G", "B", "A"): + if i.mode == 'I': + i = i.point(lambda i: i * (1 / 255)) + i = i.convert("RGBA") + mask = None + c = channel[0].upper() + if c in i.getbands(): + mask = np.array(i.getchannel(c)).astype(np.float32) / 255.0 + mask = torch.from_numpy(mask) + if c == 'A': + mask = 1. - mask + else: + mask = torch.zeros((64,64), dtype=torch.float32, device="cpu") + return (mask.unsqueeze(0),) + + @classmethod + def IS_CHANGED(s, image, channel): + image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image) + m = hashlib.sha256() + with open(image_path, 'rb') as f: + m.update(f.read()) + return m.digest().hex() + + @classmethod + def VALIDATE_INPUTS(s, image): + if not ldm_patched.utils.path_utils.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + return True + +class ImageScale: + upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"] + crop_methods = ["disabled", "center"] + + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), "upscale_method": (s.upscale_methods,), + "width": ("INT", {"default": 512, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "height": ("INT", {"default": 512, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "crop": (s.crop_methods,)}} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "upscale" + + CATEGORY = "image/upscaling" + + def upscale(self, image, upscale_method, width, height, crop): + if width == 0 and height == 0: + s = image + else: + samples = image.movedim(-1,1) + + if width == 0: + width = max(1, round(samples.shape[3] * height / samples.shape[2])) + elif height == 0: + height = max(1, round(samples.shape[2] * width / samples.shape[3])) + + s = ldm_patched.modules.utils.common_upscale(samples, width, height, upscale_method, crop) + s = s.movedim(1,-1) + return (s,) + +class ImageScaleBy: + upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"] + + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), "upscale_method": (s.upscale_methods,), + "scale_by": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 8.0, "step": 0.01}),}} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "upscale" + + CATEGORY = "image/upscaling" + + def upscale(self, image, upscale_method, scale_by): + samples = image.movedim(-1,1) + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + s = ldm_patched.modules.utils.common_upscale(samples, width, height, upscale_method, "disabled") + s = s.movedim(1,-1) + return (s,) + +class ImageInvert: + + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",)}} + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "invert" + + CATEGORY = "image" + + def invert(self, image): + s = 1.0 - image + return (s,) + +class ImageBatch: + + @classmethod + def INPUT_TYPES(s): + return {"required": { "image1": ("IMAGE",), "image2": ("IMAGE",)}} + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "batch" + + CATEGORY = "image" + + def batch(self, image1, image2): + if image1.shape[1:] != image2.shape[1:]: + image2 = ldm_patched.modules.utils.common_upscale(image2.movedim(-1,1), image1.shape[2], image1.shape[1], "bilinear", "center").movedim(1,-1) + s = torch.cat((image1, image2), dim=0) + return (s,) + +class EmptyImage: + def __init__(self, device="cpu"): + self.device = device + + @classmethod + def INPUT_TYPES(s): + return {"required": { "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "color": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFF, "step": 1, "display": "color"}), + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "generate" + + CATEGORY = "image" + + def generate(self, width, height, batch_size=1, color=0): + r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF) + g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF) + b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF) + return (torch.cat((r, g, b), dim=-1), ) + +class ImagePadForOutpaint: + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "left": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "top": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "right": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "bottom": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "feathering": ("INT", {"default": 40, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + } + } + + RETURN_TYPES = ("IMAGE", "MASK") + FUNCTION = "expand_image" + + CATEGORY = "image" + + def expand_image(self, image, left, top, right, bottom, feathering): + d1, d2, d3, d4 = image.size() + + new_image = torch.ones( + (d1, d2 + top + bottom, d3 + left + right, d4), + dtype=torch.float32, + ) * 0.5 + + new_image[:, top:top + d2, left:left + d3, :] = image + + mask = torch.ones( + (d2 + top + bottom, d3 + left + right), + dtype=torch.float32, + ) + + t = torch.zeros( + (d2, d3), + dtype=torch.float32 + ) + + if feathering > 0 and feathering * 2 < d2 and feathering * 2 < d3: + + for i in range(d2): + for j in range(d3): + dt = i if top != 0 else d2 + db = d2 - i if bottom != 0 else d2 + + dl = j if left != 0 else d3 + dr = d3 - j if right != 0 else d3 + + d = min(dt, db, dl, dr) + + if d >= feathering: + continue + + v = (feathering - d) / feathering + + t[i, j] = v * v + + mask[top:top + d2, left:left + d3] = t + + return (new_image, mask) + + +NODE_CLASS_MAPPINGS = { + "KSampler": KSampler, + "CheckpointLoaderSimple": CheckpointLoaderSimple, + "CLIPTextEncode": CLIPTextEncode, + "CLIPSetLastLayer": CLIPSetLastLayer, + "VAEDecode": VAEDecode, + "VAEEncode": VAEEncode, + "VAEEncodeForInpaint": VAEEncodeForInpaint, + "VAELoader": VAELoader, + "EmptyLatentImage": EmptyLatentImage, + "LatentUpscale": LatentUpscale, + "LatentUpscaleBy": LatentUpscaleBy, + "LatentFromBatch": LatentFromBatch, + "RepeatLatentBatch": RepeatLatentBatch, + "SaveImage": SaveImage, + "PreviewImage": PreviewImage, + "LoadImage": LoadImage, + "LoadImageMask": LoadImageMask, + "ImageScale": ImageScale, + "ImageScaleBy": ImageScaleBy, + "ImageInvert": ImageInvert, + "ImageBatch": ImageBatch, + "ImagePadForOutpaint": ImagePadForOutpaint, + "EmptyImage": EmptyImage, + "ConditioningAverage": ConditioningAverage , + "ConditioningCombine": ConditioningCombine, + "ConditioningConcat": ConditioningConcat, + "ConditioningSetArea": ConditioningSetArea, + "ConditioningSetAreaPercentage": ConditioningSetAreaPercentage, + "ConditioningSetAreaStrength": ConditioningSetAreaStrength, + "ConditioningSetMask": ConditioningSetMask, + "KSamplerAdvanced": KSamplerAdvanced, + "SetLatentNoiseMask": SetLatentNoiseMask, + "LatentComposite": LatentComposite, + "LatentBlend": LatentBlend, + "LatentRotate": LatentRotate, + "LatentFlip": LatentFlip, + "LatentCrop": LatentCrop, + "LoraLoader": LoraLoader, + "CLIPLoader": CLIPLoader, + "UNETLoader": UNETLoader, + "DualCLIPLoader": DualCLIPLoader, + "CLIPVisionEncode": CLIPVisionEncode, + "StyleModelApply": StyleModelApply, + "unCLIPConditioning": unCLIPConditioning, + "ControlNetApply": ControlNetApply, + "ControlNetApplyAdvanced": ControlNetApplyAdvanced, + "ControlNetLoader": ControlNetLoader, + "DiffControlNetLoader": DiffControlNetLoader, + "StyleModelLoader": StyleModelLoader, + "CLIPVisionLoader": CLIPVisionLoader, + "VAEDecodeTiled": VAEDecodeTiled, + "VAEEncodeTiled": VAEEncodeTiled, + "unCLIPCheckpointLoader": unCLIPCheckpointLoader, + "GLIGENLoader": GLIGENLoader, + "GLIGENTextBoxApply": GLIGENTextBoxApply, + "InpaintModelConditioning": InpaintModelConditioning, + + "CheckpointLoader": CheckpointLoader, + "DiffusersLoader": DiffusersLoader, + + "LoadLatent": LoadLatent, + "SaveLatent": SaveLatent, + + "ConditioningZeroOut": ConditioningZeroOut, + "ConditioningSetTimestepRange": ConditioningSetTimestepRange, + "LoraLoaderModelOnly": LoraLoaderModelOnly, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + # Sampling + "KSampler": "KSampler", + "KSamplerAdvanced": "KSampler (Advanced)", + # Loaders + "CheckpointLoader": "Load Checkpoint With Config (DEPRECATED)", + "CheckpointLoaderSimple": "Load Checkpoint", + "VAELoader": "Load VAE", + "LoraLoader": "Load LoRA", + "CLIPLoader": "Load CLIP", + "ControlNetLoader": "Load ControlNet Model", + "DiffControlNetLoader": "Load ControlNet Model (diff)", + "StyleModelLoader": "Load Style Model", + "CLIPVisionLoader": "Load CLIP Vision", + "UpscaleModelLoader": "Load Upscale Model", + # Conditioning + "CLIPVisionEncode": "CLIP Vision Encode", + "StyleModelApply": "Apply Style Model", + "CLIPTextEncode": "CLIP Text Encode (Prompt)", + "CLIPSetLastLayer": "CLIP Set Last Layer", + "ConditioningCombine": "Conditioning (Combine)", + "ConditioningAverage ": "Conditioning (Average)", + "ConditioningConcat": "Conditioning (Concat)", + "ConditioningSetArea": "Conditioning (Set Area)", + "ConditioningSetAreaPercentage": "Conditioning (Set Area with Percentage)", + "ConditioningSetMask": "Conditioning (Set Mask)", + "ControlNetApply": "Apply ControlNet", + "ControlNetApplyAdvanced": "Apply ControlNet (Advanced)", + # Latent + "VAEEncodeForInpaint": "VAE Encode (for Inpainting)", + "SetLatentNoiseMask": "Set Latent Noise Mask", + "VAEDecode": "VAE Decode", + "VAEEncode": "VAE Encode", + "LatentRotate": "Rotate Latent", + "LatentFlip": "Flip Latent", + "LatentCrop": "Crop Latent", + "EmptyLatentImage": "Empty Latent Image", + "LatentUpscale": "Upscale Latent", + "LatentUpscaleBy": "Upscale Latent By", + "LatentComposite": "Latent Composite", + "LatentBlend": "Latent Blend", + "LatentFromBatch" : "Latent From Batch", + "RepeatLatentBatch": "Repeat Latent Batch", + # Image + "SaveImage": "Save Image", + "PreviewImage": "Preview Image", + "LoadImage": "Load Image", + "LoadImageMask": "Load Image (as Mask)", + "ImageScale": "Upscale Image", + "ImageScaleBy": "Upscale Image By", + "ImageUpscaleWithModel": "Upscale Image (using Model)", + "ImageInvert": "Invert Image", + "ImagePadForOutpaint": "Pad Image for Outpainting", + "ImageBatch": "Batch Images", + # _for_testing + "VAEDecodeTiled": "VAE Decode (Tiled)", + "VAEEncodeTiled": "VAE Encode (Tiled)", +} + +EXTENSION_WEB_DIRS = {} + +def load_custom_node(module_path, ignore=set()): + module_name = os.path.basename(module_path) + if os.path.isfile(module_path): + sp = os.path.splitext(module_path) + module_name = sp[0] + try: + if os.path.isfile(module_path): + module_spec = importlib.util.spec_from_file_location(module_name, module_path) + module_dir = os.path.split(module_path)[0] + else: + module_spec = importlib.util.spec_from_file_location(module_name, os.path.join(module_path, "__init__.py")) + module_dir = module_path + + module = importlib.util.module_from_spec(module_spec) + sys.modules[module_name] = module + module_spec.loader.exec_module(module) + + if hasattr(module, "WEB_DIRECTORY") and getattr(module, "WEB_DIRECTORY") is not None: + web_dir = os.path.abspath(os.path.join(module_dir, getattr(module, "WEB_DIRECTORY"))) + if os.path.isdir(web_dir): + EXTENSION_WEB_DIRS[module_name] = web_dir + + if hasattr(module, "NODE_CLASS_MAPPINGS") and getattr(module, "NODE_CLASS_MAPPINGS") is not None: + for name in module.NODE_CLASS_MAPPINGS: + if name not in ignore: + NODE_CLASS_MAPPINGS[name] = module.NODE_CLASS_MAPPINGS[name] + if hasattr(module, "NODE_DISPLAY_NAME_MAPPINGS") and getattr(module, "NODE_DISPLAY_NAME_MAPPINGS") is not None: + NODE_DISPLAY_NAME_MAPPINGS.update(module.NODE_DISPLAY_NAME_MAPPINGS) + return True + else: + print(f"Skip {module_path} module for custom nodes due to the lack of NODE_CLASS_MAPPINGS.") + return False + except Exception as e: + print(traceback.format_exc()) + print(f"Cannot import {module_path} module for custom nodes:", e) + return False + +def load_custom_nodes(): + base_node_names = set(NODE_CLASS_MAPPINGS.keys()) + node_paths = ldm_patched.utils.path_utils.get_folder_paths("custom_nodes") + node_import_times = [] + for custom_node_path in node_paths: + possible_modules = os.listdir(os.path.realpath(custom_node_path)) + if "__pycache__" in possible_modules: + possible_modules.remove("__pycache__") + + for possible_module in possible_modules: + module_path = os.path.join(custom_node_path, possible_module) + if os.path.isfile(module_path) and os.path.splitext(module_path)[1] != ".py": continue + if module_path.endswith(".disabled"): continue + time_before = time.perf_counter() + success = load_custom_node(module_path, base_node_names) + node_import_times.append((time.perf_counter() - time_before, module_path, success)) + + if len(node_import_times) > 0: + print("\nImport times for custom nodes:") + for n in sorted(node_import_times): + if n[2]: + import_message = "" + else: + import_message = " (IMPORT FAILED)" + print("{:6.1f} seconds{}:".format(n[0], import_message), n[1]) + print() + +def init_custom_nodes(): + extras_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "ldm_patched_extras") + extras_files = [ + "nodes_latent.py", + "nodes_hypernetwork.py", + "nodes_upscale_model.py", + "nodes_post_processing.py", + "nodes_mask.py", + "nodes_compositing.py", + "nodes_rebatch.py", + "nodes_model_merging.py", + "nodes_tomesd.py", + "nodes_clip_sdxl.py", + "nodes_canny.py", + "nodes_freelunch.py", + "nodes_custom_sampler.py", + "nodes_hypertile.py", + "nodes_model_advanced.py", + "nodes_model_downscale.py", + "nodes_images.py", + "nodes_video_model.py", + "nodes_sag.py", + "nodes_perpneg.py", + "nodes_stable3d.py", + "nodes_sdupscale.py", + "nodes_photomaker.py", + ] + + for node_file in extras_files: + load_custom_node(os.path.join(extras_dir, node_file)) + + load_custom_nodes() diff --git a/ldm_patched/contrib/external_canny.py b/ldm_patched/contrib/external_canny.py new file mode 100644 index 0000000000000000000000000000000000000000..df7179c053644da6071365c14ee1416c8e3596d3 --- /dev/null +++ b/ldm_patched/contrib/external_canny.py @@ -0,0 +1,303 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +#From https://github.com/kornia/kornia +import math + +import torch +import torch.nn.functional as F +import ldm_patched.modules.model_management + +def get_canny_nms_kernel(device=None, dtype=None): + """Utility function that returns 3x3 kernels for the Canny Non-maximal suppression.""" + return torch.tensor( + [ + [[[0.0, 0.0, 0.0], [0.0, 1.0, -1.0], [0.0, 0.0, 0.0]]], + [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, -1.0]]], + [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, -1.0, 0.0]]], + [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0]]], + [[[0.0, 0.0, 0.0], [-1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]], + [[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]], + [[[0.0, -1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]], + [[[0.0, 0.0, -1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]], + ], + device=device, + dtype=dtype, + ) + + +def get_hysteresis_kernel(device=None, dtype=None): + """Utility function that returns the 3x3 kernels for the Canny hysteresis.""" + return torch.tensor( + [ + [[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]], + [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]], + [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]], + [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]], + [[[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]], + [[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]], + [[[0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]], + [[[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]], + ], + device=device, + dtype=dtype, + ) + +def gaussian_blur_2d(img, kernel_size, sigma): + ksize_half = (kernel_size - 1) * 0.5 + + x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) + + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + + x_kernel = pdf / pdf.sum() + x_kernel = x_kernel.to(device=img.device, dtype=img.dtype) + + kernel2d = torch.mm(x_kernel[:, None], x_kernel[None, :]) + kernel2d = kernel2d.expand(img.shape[-3], 1, kernel2d.shape[0], kernel2d.shape[1]) + + padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2] + + img = torch.nn.functional.pad(img, padding, mode="reflect") + img = torch.nn.functional.conv2d(img, kernel2d, groups=img.shape[-3]) + + return img + +def get_sobel_kernel2d(device=None, dtype=None): + kernel_x = torch.tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=device, dtype=dtype) + kernel_y = kernel_x.transpose(0, 1) + return torch.stack([kernel_x, kernel_y]) + +def spatial_gradient(input, normalized: bool = True): + r"""Compute the first order image derivative in both x and y using a Sobel operator. + .. image:: _static/img/spatial_gradient.png + Args: + input: input image tensor with shape :math:`(B, C, H, W)`. + mode: derivatives modality, can be: `sobel` or `diff`. + order: the order of the derivatives. + normalized: whether the output is normalized. + Return: + the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`. + .. note:: + See a working example `here `__. + Examples: + >>> input = torch.rand(1, 3, 4, 4) + >>> output = spatial_gradient(input) # 1x3x2x4x4 + >>> output.shape + torch.Size([1, 3, 2, 4, 4]) + """ + # KORNIA_CHECK_IS_TENSOR(input) + # KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W']) + + # allocate kernel + kernel = get_sobel_kernel2d(device=input.device, dtype=input.dtype) + if normalized: + kernel = normalize_kernel2d(kernel) + + # prepare kernel + b, c, h, w = input.shape + tmp_kernel = kernel[:, None, ...] + + # Pad with "replicate for spatial dims, but with zeros for channel + spatial_pad = [kernel.size(1) // 2, kernel.size(1) // 2, kernel.size(2) // 2, kernel.size(2) // 2] + out_channels: int = 2 + padded_inp = torch.nn.functional.pad(input.reshape(b * c, 1, h, w), spatial_pad, 'replicate') + out = F.conv2d(padded_inp, tmp_kernel, groups=1, padding=0, stride=1) + return out.reshape(b, c, out_channels, h, w) + +def rgb_to_grayscale(image, rgb_weights = None): + r"""Convert a RGB image to grayscale version of image. + + .. image:: _static/img/rgb_to_grayscale.png + + The image data is assumed to be in the range of (0, 1). + + Args: + image: RGB image to be converted to grayscale with shape :math:`(*,3,H,W)`. + rgb_weights: Weights that will be applied on each channel (RGB). + The sum of the weights should add up to one. + Returns: + grayscale version of the image with shape :math:`(*,1,H,W)`. + + .. note:: + See a working example `here `__. + + Example: + >>> input = torch.rand(2, 3, 4, 5) + >>> gray = rgb_to_grayscale(input) # 2x1x4x5 + """ + + if len(image.shape) < 3 or image.shape[-3] != 3: + raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}") + + if rgb_weights is None: + # 8 bit images + if image.dtype == torch.uint8: + rgb_weights = torch.tensor([76, 150, 29], device=image.device, dtype=torch.uint8) + # floating point images + elif image.dtype in (torch.float16, torch.float32, torch.float64): + rgb_weights = torch.tensor([0.299, 0.587, 0.114], device=image.device, dtype=image.dtype) + else: + raise TypeError(f"Unknown data type: {image.dtype}") + else: + # is tensor that we make sure is in the same device/dtype + rgb_weights = rgb_weights.to(image) + + # unpack the color image channels with RGB order + r: Tensor = image[..., 0:1, :, :] + g: Tensor = image[..., 1:2, :, :] + b: Tensor = image[..., 2:3, :, :] + + w_r, w_g, w_b = rgb_weights.unbind() + return w_r * r + w_g * g + w_b * b + +def canny( + input, + low_threshold = 0.1, + high_threshold = 0.2, + kernel_size = 5, + sigma = 1, + hysteresis = True, + eps = 1e-6, +): + r"""Find edges of the input image and filters them using the Canny algorithm. + .. image:: _static/img/canny.png + Args: + input: input image tensor with shape :math:`(B,C,H,W)`. + low_threshold: lower threshold for the hysteresis procedure. + high_threshold: upper threshold for the hysteresis procedure. + kernel_size: the size of the kernel for the gaussian blur. + sigma: the standard deviation of the kernel for the gaussian blur. + hysteresis: if True, applies the hysteresis edge tracking. + Otherwise, the edges are divided between weak (0.5) and strong (1) edges. + eps: regularization number to avoid NaN during backprop. + Returns: + - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`. + - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`. + .. note:: + See a working example `here `__. + Example: + >>> input = torch.rand(5, 3, 4, 4) + >>> magnitude, edges = canny(input) # 5x3x4x4 + >>> magnitude.shape + torch.Size([5, 1, 4, 4]) + >>> edges.shape + torch.Size([5, 1, 4, 4]) + """ + # KORNIA_CHECK_IS_TENSOR(input) + # KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W']) + # KORNIA_CHECK( + # low_threshold <= high_threshold, + # "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: " + # f"{low_threshold}>{high_threshold}", + # ) + # KORNIA_CHECK(0 < low_threshold < 1, f'Invalid low threshold. Should be in range (0, 1). Got: {low_threshold}') + # KORNIA_CHECK(0 < high_threshold < 1, f'Invalid high threshold. Should be in range (0, 1). Got: {high_threshold}') + + device = input.device + dtype = input.dtype + + # To Grayscale + if input.shape[1] == 3: + input = rgb_to_grayscale(input) + + # Gaussian filter + blurred: Tensor = gaussian_blur_2d(input, kernel_size, sigma) + + # Compute the gradients + gradients: Tensor = spatial_gradient(blurred, normalized=False) + + # Unpack the edges + gx: Tensor = gradients[:, :, 0] + gy: Tensor = gradients[:, :, 1] + + # Compute gradient magnitude and angle + magnitude: Tensor = torch.sqrt(gx * gx + gy * gy + eps) + angle: Tensor = torch.atan2(gy, gx) + + # Radians to Degrees + angle = 180.0 * angle / math.pi + + # Round angle to the nearest 45 degree + angle = torch.round(angle / 45) * 45 + + # Non-maximal suppression + nms_kernels: Tensor = get_canny_nms_kernel(device, dtype) + nms_magnitude: Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2) + + # Get the indices for both directions + positive_idx: Tensor = (angle / 45) % 8 + positive_idx = positive_idx.long() + + negative_idx: Tensor = ((angle / 45) + 4) % 8 + negative_idx = negative_idx.long() + + # Apply the non-maximum suppression to the different directions + channel_select_filtered_positive: Tensor = torch.gather(nms_magnitude, 1, positive_idx) + channel_select_filtered_negative: Tensor = torch.gather(nms_magnitude, 1, negative_idx) + + channel_select_filtered: Tensor = torch.stack( + [channel_select_filtered_positive, channel_select_filtered_negative], 1 + ) + + is_max: Tensor = channel_select_filtered.min(dim=1)[0] > 0.0 + + magnitude = magnitude * is_max + + # Threshold + edges: Tensor = F.threshold(magnitude, low_threshold, 0.0) + + low: Tensor = magnitude > low_threshold + high: Tensor = magnitude > high_threshold + + edges = low * 0.5 + high * 0.5 + edges = edges.to(dtype) + + # Hysteresis + if hysteresis: + edges_old: Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype) + hysteresis_kernels: Tensor = get_hysteresis_kernel(device, dtype) + + while ((edges_old - edges).abs() != 0).any(): + weak: Tensor = (edges == 0.5).float() + strong: Tensor = (edges == 1).float() + + hysteresis_magnitude: Tensor = F.conv2d( + edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2 + ) + hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype) + hysteresis_magnitude = hysteresis_magnitude * weak + strong + + edges_old = edges.clone() + edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5 + + edges = hysteresis_magnitude + + return magnitude, edges + + +class Canny: + @classmethod + def INPUT_TYPES(s): + return {"required": {"image": ("IMAGE",), + "low_threshold": ("FLOAT", {"default": 0.4, "min": 0.01, "max": 0.99, "step": 0.01}), + "high_threshold": ("FLOAT", {"default": 0.8, "min": 0.01, "max": 0.99, "step": 0.01}) + }} + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "detect_edge" + + CATEGORY = "image/preprocessors" + + def detect_edge(self, image, low_threshold, high_threshold): + output = canny(image.to(ldm_patched.modules.model_management.get_torch_device()).movedim(-1, 1), low_threshold, high_threshold) + img_out = output[1].to(ldm_patched.modules.model_management.intermediate_device()).repeat(1, 3, 1, 1).movedim(1, -1) + return (img_out,) + +NODE_CLASS_MAPPINGS = { + "Canny": Canny, +} diff --git a/ldm_patched/contrib/external_clip_sdxl.py b/ldm_patched/contrib/external_clip_sdxl.py new file mode 100644 index 0000000000000000000000000000000000000000..ddd96433f125edecb2f88491cc6fa8618ac6eac3 --- /dev/null +++ b/ldm_patched/contrib/external_clip_sdxl.py @@ -0,0 +1,60 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +from ldm_patched.contrib.external import MAX_RESOLUTION + +class CLIPTextEncodeSDXLRefiner: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), + "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "encode" + + CATEGORY = "advanced/conditioning" + + def encode(self, clip, ascore, width, height, text): + tokens = clip.tokenize(text) + cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) + return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) + +class CLIPTextEncodeSDXL: + @classmethod + def INPUT_TYPES(s): + return {"required": { + "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), + "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), + "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), + "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), + "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "encode" + + CATEGORY = "advanced/conditioning" + + def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): + tokens = clip.tokenize(text_g) + tokens["l"] = clip.tokenize(text_l)["l"] + if len(tokens["l"]) != len(tokens["g"]): + empty = clip.tokenize("") + while len(tokens["l"]) < len(tokens["g"]): + tokens["l"] += empty["l"] + while len(tokens["l"]) > len(tokens["g"]): + tokens["g"] += empty["g"] + cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) + return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) + +NODE_CLASS_MAPPINGS = { + "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, + "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, +} diff --git a/ldm_patched/contrib/external_compositing.py b/ldm_patched/contrib/external_compositing.py new file mode 100644 index 0000000000000000000000000000000000000000..7241b16751ddc7f2a309b48f915da19485e1c89b --- /dev/null +++ b/ldm_patched/contrib/external_compositing.py @@ -0,0 +1,206 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import numpy as np +import torch +import ldm_patched.modules.utils +from enum import Enum + +def resize_mask(mask, shape): + return torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(shape[0], shape[1]), mode="bilinear").squeeze(1) + +class PorterDuffMode(Enum): + ADD = 0 + CLEAR = 1 + DARKEN = 2 + DST = 3 + DST_ATOP = 4 + DST_IN = 5 + DST_OUT = 6 + DST_OVER = 7 + LIGHTEN = 8 + MULTIPLY = 9 + OVERLAY = 10 + SCREEN = 11 + SRC = 12 + SRC_ATOP = 13 + SRC_IN = 14 + SRC_OUT = 15 + SRC_OVER = 16 + XOR = 17 + + +def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_image: torch.Tensor, dst_alpha: torch.Tensor, mode: PorterDuffMode): + if mode == PorterDuffMode.ADD: + out_alpha = torch.clamp(src_alpha + dst_alpha, 0, 1) + out_image = torch.clamp(src_image + dst_image, 0, 1) + elif mode == PorterDuffMode.CLEAR: + out_alpha = torch.zeros_like(dst_alpha) + out_image = torch.zeros_like(dst_image) + elif mode == PorterDuffMode.DARKEN: + out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha + out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image + torch.min(src_image, dst_image) + elif mode == PorterDuffMode.DST: + out_alpha = dst_alpha + out_image = dst_image + elif mode == PorterDuffMode.DST_ATOP: + out_alpha = src_alpha + out_image = src_alpha * dst_image + (1 - dst_alpha) * src_image + elif mode == PorterDuffMode.DST_IN: + out_alpha = src_alpha * dst_alpha + out_image = dst_image * src_alpha + elif mode == PorterDuffMode.DST_OUT: + out_alpha = (1 - src_alpha) * dst_alpha + out_image = (1 - src_alpha) * dst_image + elif mode == PorterDuffMode.DST_OVER: + out_alpha = dst_alpha + (1 - dst_alpha) * src_alpha + out_image = dst_image + (1 - dst_alpha) * src_image + elif mode == PorterDuffMode.LIGHTEN: + out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha + out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image + torch.max(src_image, dst_image) + elif mode == PorterDuffMode.MULTIPLY: + out_alpha = src_alpha * dst_alpha + out_image = src_image * dst_image + elif mode == PorterDuffMode.OVERLAY: + out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha + out_image = torch.where(2 * dst_image < dst_alpha, 2 * src_image * dst_image, + src_alpha * dst_alpha - 2 * (dst_alpha - src_image) * (src_alpha - dst_image)) + elif mode == PorterDuffMode.SCREEN: + out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha + out_image = src_image + dst_image - src_image * dst_image + elif mode == PorterDuffMode.SRC: + out_alpha = src_alpha + out_image = src_image + elif mode == PorterDuffMode.SRC_ATOP: + out_alpha = dst_alpha + out_image = dst_alpha * src_image + (1 - src_alpha) * dst_image + elif mode == PorterDuffMode.SRC_IN: + out_alpha = src_alpha * dst_alpha + out_image = src_image * dst_alpha + elif mode == PorterDuffMode.SRC_OUT: + out_alpha = (1 - dst_alpha) * src_alpha + out_image = (1 - dst_alpha) * src_image + elif mode == PorterDuffMode.SRC_OVER: + out_alpha = src_alpha + (1 - src_alpha) * dst_alpha + out_image = src_image + (1 - src_alpha) * dst_image + elif mode == PorterDuffMode.XOR: + out_alpha = (1 - dst_alpha) * src_alpha + (1 - src_alpha) * dst_alpha + out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image + else: + out_alpha = None + out_image = None + return out_image, out_alpha + + +class PorterDuffImageComposite: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "source": ("IMAGE",), + "source_alpha": ("MASK",), + "destination": ("IMAGE",), + "destination_alpha": ("MASK",), + "mode": ([mode.name for mode in PorterDuffMode], {"default": PorterDuffMode.DST.name}), + }, + } + + RETURN_TYPES = ("IMAGE", "MASK") + FUNCTION = "composite" + CATEGORY = "mask/compositing" + + def composite(self, source: torch.Tensor, source_alpha: torch.Tensor, destination: torch.Tensor, destination_alpha: torch.Tensor, mode): + batch_size = min(len(source), len(source_alpha), len(destination), len(destination_alpha)) + out_images = [] + out_alphas = [] + + for i in range(batch_size): + src_image = source[i] + dst_image = destination[i] + + assert src_image.shape[2] == dst_image.shape[2] # inputs need to have same number of channels + + src_alpha = source_alpha[i].unsqueeze(2) + dst_alpha = destination_alpha[i].unsqueeze(2) + + if dst_alpha.shape[:2] != dst_image.shape[:2]: + upscale_input = dst_alpha.unsqueeze(0).permute(0, 3, 1, 2) + upscale_output = ldm_patched.modules.utils.common_upscale(upscale_input, dst_image.shape[1], dst_image.shape[0], upscale_method='bicubic', crop='center') + dst_alpha = upscale_output.permute(0, 2, 3, 1).squeeze(0) + if src_image.shape != dst_image.shape: + upscale_input = src_image.unsqueeze(0).permute(0, 3, 1, 2) + upscale_output = ldm_patched.modules.utils.common_upscale(upscale_input, dst_image.shape[1], dst_image.shape[0], upscale_method='bicubic', crop='center') + src_image = upscale_output.permute(0, 2, 3, 1).squeeze(0) + if src_alpha.shape != dst_alpha.shape: + upscale_input = src_alpha.unsqueeze(0).permute(0, 3, 1, 2) + upscale_output = ldm_patched.modules.utils.common_upscale(upscale_input, dst_alpha.shape[1], dst_alpha.shape[0], upscale_method='bicubic', crop='center') + src_alpha = upscale_output.permute(0, 2, 3, 1).squeeze(0) + + out_image, out_alpha = porter_duff_composite(src_image, src_alpha, dst_image, dst_alpha, PorterDuffMode[mode]) + + out_images.append(out_image) + out_alphas.append(out_alpha.squeeze(2)) + + result = (torch.stack(out_images), torch.stack(out_alphas)) + return result + + +class SplitImageWithAlpha: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + } + } + + CATEGORY = "mask/compositing" + RETURN_TYPES = ("IMAGE", "MASK") + FUNCTION = "split_image_with_alpha" + + def split_image_with_alpha(self, image: torch.Tensor): + out_images = [i[:,:,:3] for i in image] + out_alphas = [i[:,:,3] if i.shape[2] > 3 else torch.ones_like(i[:,:,0]) for i in image] + result = (torch.stack(out_images), 1.0 - torch.stack(out_alphas)) + return result + + +class JoinImageWithAlpha: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "alpha": ("MASK",), + } + } + + CATEGORY = "mask/compositing" + RETURN_TYPES = ("IMAGE",) + FUNCTION = "join_image_with_alpha" + + def join_image_with_alpha(self, image: torch.Tensor, alpha: torch.Tensor): + batch_size = min(len(image), len(alpha)) + out_images = [] + + alpha = 1.0 - resize_mask(alpha, image.shape[1:]) + for i in range(batch_size): + out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2)) + + result = (torch.stack(out_images),) + return result + + +NODE_CLASS_MAPPINGS = { + "PorterDuffImageComposite": PorterDuffImageComposite, + "SplitImageWithAlpha": SplitImageWithAlpha, + "JoinImageWithAlpha": JoinImageWithAlpha, +} + + +NODE_DISPLAY_NAME_MAPPINGS = { + "PorterDuffImageComposite": "Porter-Duff Image Composite", + "SplitImageWithAlpha": "Split Image with Alpha", + "JoinImageWithAlpha": "Join Image with Alpha", +} diff --git a/ldm_patched/contrib/external_custom_sampler.py b/ldm_patched/contrib/external_custom_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1d53ef9d140a5b6776a9e8074b7d4c24c4d003 --- /dev/null +++ b/ldm_patched/contrib/external_custom_sampler.py @@ -0,0 +1,299 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.modules.samplers +import ldm_patched.modules.sample +from ldm_patched.k_diffusion import sampling as k_diffusion_sampling +import ldm_patched.utils.latent_visualization +import torch +import ldm_patched.modules.utils + + +class BasicScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model": ("MODEL",), + "scheduler": (ldm_patched.modules.samplers.SCHEDULER_NAMES, ), + "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, model, scheduler, steps, denoise): + total_steps = steps + if denoise < 1.0: + total_steps = int(steps/denoise) + + ldm_patched.modules.model_management.load_models_gpu([model]) + sigmas = ldm_patched.modules.samplers.calculate_sigmas_scheduler(model.model, scheduler, total_steps).cpu() + sigmas = sigmas[-(steps + 1):] + return (sigmas, ) + + +class KarrasScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "rho": ("FLOAT", {"default": 7.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, steps, sigma_max, sigma_min, rho): + sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho) + return (sigmas, ) + +class ExponentialScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, steps, sigma_max, sigma_min): + sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max) + return (sigmas, ) + +class PolyexponentialScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "rho": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, steps, sigma_max, sigma_min, rho): + sigmas = k_diffusion_sampling.get_sigmas_polyexponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho) + return (sigmas, ) + +class SDTurboScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model": ("MODEL",), + "steps": ("INT", {"default": 1, "min": 1, "max": 10}), + "denoise": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, model, steps, denoise): + start_step = 10 - int(10 * denoise) + timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps] + ldm_patched.modules.model_management.load_models_gpu([model]) + sigmas = model.model.model_sampling.sigma(timesteps) + sigmas = torch.cat([sigmas, sigmas.new_zeros([1])]) + return (sigmas, ) + +class VPScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}), + "beta_d": ("FLOAT", {"default": 19.9, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), #TODO: fix default values + "beta_min": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 1000.0, "step":0.01, "round": False}), + "eps_s": ("FLOAT", {"default": 0.001, "min": 0.0, "max": 1.0, "step":0.0001, "round": False}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, steps, beta_d, beta_min, eps_s): + sigmas = k_diffusion_sampling.get_sigmas_vp(n=steps, beta_d=beta_d, beta_min=beta_min, eps_s=eps_s) + return (sigmas, ) + +class SplitSigmas: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"sigmas": ("SIGMAS", ), + "step": ("INT", {"default": 0, "min": 0, "max": 10000}), + } + } + RETURN_TYPES = ("SIGMAS","SIGMAS") + CATEGORY = "sampling/custom_sampling/sigmas" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, sigmas, step): + sigmas1 = sigmas[:step + 1] + sigmas2 = sigmas[step:] + return (sigmas1, sigmas2) + +class FlipSigmas: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"sigmas": ("SIGMAS", ), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/sigmas" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, sigmas): + sigmas = sigmas.flip(0) + if sigmas[0] == 0: + sigmas[0] = 0.0001 + return (sigmas,) + +class KSamplerSelect: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"sampler_name": (ldm_patched.modules.samplers.SAMPLER_NAMES, ), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, sampler_name): + sampler = ldm_patched.modules.samplers.sampler_object(sampler_name) + return (sampler, ) + +class SamplerDPMPP_2M_SDE: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"solver_type": (['midpoint', 'heun'], ), + "eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + "noise_device": (['gpu', 'cpu'], ), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, solver_type, eta, s_noise, noise_device): + if noise_device == 'cpu': + sampler_name = "dpmpp_2m_sde" + else: + sampler_name = "dpmpp_2m_sde_gpu" + sampler = ldm_patched.modules.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "solver_type": solver_type}) + return (sampler, ) + + +class SamplerDPMPP_SDE: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + "r": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 100.0, "step":0.01, "round": False}), + "noise_device": (['gpu', 'cpu'], ), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, eta, s_noise, r, noise_device): + if noise_device == 'cpu': + sampler_name = "dpmpp_sde" + else: + sampler_name = "dpmpp_sde_gpu" + sampler = ldm_patched.modules.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "r": r}) + return (sampler, ) + +class SamplerCustom: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model": ("MODEL",), + "add_noise": ("BOOLEAN", {"default": True}), + "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}), + "positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "sampler": ("SAMPLER", ), + "sigmas": ("SIGMAS", ), + "latent_image": ("LATENT", ), + } + } + + RETURN_TYPES = ("LATENT","LATENT") + RETURN_NAMES = ("output", "denoised_output") + + FUNCTION = "sample" + + CATEGORY = "sampling/custom_sampling" + + def sample(self, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image): + latent = latent_image + latent_image = latent["samples"] + if not add_noise: + noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") + else: + batch_inds = latent["batch_index"] if "batch_index" in latent else None + noise = ldm_patched.modules.sample.prepare_noise(latent_image, noise_seed, batch_inds) + + noise_mask = None + if "noise_mask" in latent: + noise_mask = latent["noise_mask"] + + x0_output = {} + callback = ldm_patched.utils.latent_visualization.prepare_callback(model, sigmas.shape[-1] - 1, x0_output) + + disable_pbar = not ldm_patched.modules.utils.PROGRESS_BAR_ENABLED + samples = ldm_patched.modules.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) + + out = latent.copy() + out["samples"] = samples + if "x0" in x0_output: + out_denoised = latent.copy() + out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu()) + else: + out_denoised = out + return (out, out_denoised) + +NODE_CLASS_MAPPINGS = { + "SamplerCustom": SamplerCustom, + "BasicScheduler": BasicScheduler, + "KarrasScheduler": KarrasScheduler, + "ExponentialScheduler": ExponentialScheduler, + "PolyexponentialScheduler": PolyexponentialScheduler, + "VPScheduler": VPScheduler, + "SDTurboScheduler": SDTurboScheduler, + "KSamplerSelect": KSamplerSelect, + "SamplerDPMPP_2M_SDE": SamplerDPMPP_2M_SDE, + "SamplerDPMPP_SDE": SamplerDPMPP_SDE, + "SplitSigmas": SplitSigmas, + "FlipSigmas": FlipSigmas, +} diff --git a/ldm_patched/contrib/external_freelunch.py b/ldm_patched/contrib/external_freelunch.py new file mode 100644 index 0000000000000000000000000000000000000000..59ec5babd632d7804b9c34442729910f0cb338d2 --- /dev/null +++ b/ldm_patched/contrib/external_freelunch.py @@ -0,0 +1,115 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +#code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License) + +import torch + + +def Fourier_filter(x, threshold, scale): + # FFT + x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) + x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) + + B, C, H, W = x_freq.shape + mask = torch.ones((B, C, H, W), device=x.device) + + crow, ccol = H // 2, W //2 + mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale + x_freq = x_freq * mask + + # IFFT + x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) + x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real + + return x_filtered.to(x.dtype) + + +class FreeU: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}), + "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}), + "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), + "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "model_patches" + + def patch(self, model, b1, b2, s1, s2): + model_channels = model.model.model_config.unet_config["model_channels"] + scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} + on_cpu_devices = {} + + def output_block_patch(h, hsp, transformer_options): + scale = scale_dict.get(h.shape[1], None) + if scale is not None: + h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0] + if hsp.device not in on_cpu_devices: + try: + hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) + except: + print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") + on_cpu_devices[hsp.device] = True + hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) + else: + hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) + + return h, hsp + + m = model.clone() + m.set_model_output_block_patch(output_block_patch) + return (m, ) + +class FreeU_V2: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}), + "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}), + "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), + "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "model_patches" + + def patch(self, model, b1, b2, s1, s2): + model_channels = model.model.model_config.unet_config["model_channels"] + scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} + on_cpu_devices = {} + + def output_block_patch(h, hsp, transformer_options): + scale = scale_dict.get(h.shape[1], None) + if scale is not None: + hidden_mean = h.mean(1).unsqueeze(1) + B = hidden_mean.shape[0] + hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) + hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) + hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) + + h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1) + + if hsp.device not in on_cpu_devices: + try: + hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) + except: + print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") + on_cpu_devices[hsp.device] = True + hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) + else: + hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) + + return h, hsp + + m = model.clone() + m.set_model_output_block_patch(output_block_patch) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "FreeU": FreeU, + "FreeU_V2": FreeU_V2, +} diff --git a/ldm_patched/contrib/external_hypernetwork.py b/ldm_patched/contrib/external_hypernetwork.py new file mode 100644 index 0000000000000000000000000000000000000000..012f2ec279b46a72e554e0732d42b8cea16b1b8f --- /dev/null +++ b/ldm_patched/contrib/external_hypernetwork.py @@ -0,0 +1,123 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.modules.utils +import ldm_patched.utils.path_utils +import torch + +def load_hypernetwork_patch(path, strength): + sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True) + activation_func = sd.get('activation_func', 'linear') + is_layer_norm = sd.get('is_layer_norm', False) + use_dropout = sd.get('use_dropout', False) + activate_output = sd.get('activate_output', False) + last_layer_dropout = sd.get('last_layer_dropout', False) + + valid_activation = { + "linear": torch.nn.Identity, + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, + "softsign": torch.nn.Softsign, + "mish": torch.nn.Mish, + } + + if activation_func not in valid_activation: + print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) + return None + + out = {} + + for d in sd: + try: + dim = int(d) + except: + continue + + output = [] + for index in [0, 1]: + attn_weights = sd[dim][index] + keys = attn_weights.keys() + + linears = filter(lambda a: a.endswith(".weight"), keys) + linears = list(map(lambda a: a[:-len(".weight")], linears)) + layers = [] + + i = 0 + while i < len(linears): + lin_name = linears[i] + last_layer = (i == (len(linears) - 1)) + penultimate_layer = (i == (len(linears) - 2)) + + lin_weight = attn_weights['{}.weight'.format(lin_name)] + lin_bias = attn_weights['{}.bias'.format(lin_name)] + layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) + layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) + layers.append(layer) + if activation_func != "linear": + if (not last_layer) or (activate_output): + layers.append(valid_activation[activation_func]()) + if is_layer_norm: + i += 1 + ln_name = linears[i] + ln_weight = attn_weights['{}.weight'.format(ln_name)] + ln_bias = attn_weights['{}.bias'.format(ln_name)] + ln = torch.nn.LayerNorm(ln_weight.shape[0]) + ln.load_state_dict({"weight": ln_weight, "bias": ln_bias}) + layers.append(ln) + if use_dropout: + if (not last_layer) and (not penultimate_layer or last_layer_dropout): + layers.append(torch.nn.Dropout(p=0.3)) + i += 1 + + output.append(torch.nn.Sequential(*layers)) + out[dim] = torch.nn.ModuleList(output) + + class hypernetwork_patch: + def __init__(self, hypernet, strength): + self.hypernet = hypernet + self.strength = strength + def __call__(self, q, k, v, extra_options): + dim = k.shape[-1] + if dim in self.hypernet: + hn = self.hypernet[dim] + k = k + hn[0](k) * self.strength + v = v + hn[1](v) * self.strength + + return q, k, v + + def to(self, device): + for d in self.hypernet.keys(): + self.hypernet[d] = self.hypernet[d].to(device) + return self + + return hypernetwork_patch(out, strength) + +class HypernetworkLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ), + "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_hypernetwork" + + CATEGORY = "loaders" + + def load_hypernetwork(self, model, hypernetwork_name, strength): + hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name) + model_hypernetwork = model.clone() + patch = load_hypernetwork_patch(hypernetwork_path, strength) + if patch is not None: + model_hypernetwork.set_model_attn1_patch(patch) + model_hypernetwork.set_model_attn2_patch(patch) + return (model_hypernetwork,) + +NODE_CLASS_MAPPINGS = { + "HypernetworkLoader": HypernetworkLoader +} diff --git a/ldm_patched/contrib/external_hypertile.py b/ldm_patched/contrib/external_hypertile.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf7d9d6d1609b6a02a89a8a74aca9fdccaeda58 --- /dev/null +++ b/ldm_patched/contrib/external_hypertile.py @@ -0,0 +1,85 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +#Taken from: https://github.com/tfernd/HyperTile/ + +import math +from einops import rearrange +# Use torch rng for consistency across generations +from torch import randint + +def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: + min_value = min(min_value, value) + + # All big divisors of value (inclusive) + divisors = [i for i in range(min_value, value + 1) if value % i == 0] + + ns = [value // i for i in divisors[:max_options]] # has at least 1 element + + if len(ns) - 1 > 0: + idx = randint(low=0, high=len(ns) - 1, size=(1,)).item() + else: + idx = 0 + + return ns[idx] + +class HyperTile: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}), + "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}), + "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}), + "scale_depth": ("BOOLEAN", {"default": False}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "model_patches" + + def patch(self, model, tile_size, swap_size, max_depth, scale_depth): + model_channels = model.model.model_config.unet_config["model_channels"] + + latent_tile_size = max(32, tile_size) // 8 + self.temp = None + + def hypertile_in(q, k, v, extra_options): + model_chans = q.shape[-2] + orig_shape = extra_options['original_shape'] + apply_to = [] + for i in range(max_depth + 1): + apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i))) + + if model_chans in apply_to: + shape = extra_options["original_shape"] + aspect_ratio = shape[-1] / shape[-2] + + hw = q.size(1) + h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio)) + + factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1 + nh = random_divisor(h, latent_tile_size * factor, swap_size) + nw = random_divisor(w, latent_tile_size * factor, swap_size) + + if nh * nw > 1: + q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) + self.temp = (nh, nw, h, w) + return q, k, v + + return q, k, v + def hypertile_out(out, extra_options): + if self.temp is not None: + nh, nw, h, w = self.temp + self.temp = None + out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) + out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) + return out + + + m = model.clone() + m.set_model_attn1_patch(hypertile_in) + m.set_model_attn1_output_patch(hypertile_out) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "HyperTile": HyperTile, +} diff --git a/ldm_patched/contrib/external_images.py b/ldm_patched/contrib/external_images.py new file mode 100644 index 0000000000000000000000000000000000000000..47921003ff15845935cb21a4a580d3f82dd7bb11 --- /dev/null +++ b/ldm_patched/contrib/external_images.py @@ -0,0 +1,179 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.contrib.external +import ldm_patched.utils.path_utils +from ldm_patched.modules.args_parser import args + +from PIL import Image +from PIL.PngImagePlugin import PngInfo + +import numpy as np +import json +import os + +MAX_RESOLUTION = ldm_patched.contrib.external.MAX_RESOLUTION + +class ImageCrop: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), + "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "crop" + + CATEGORY = "image/transform" + + def crop(self, image, width, height, x, y): + x = min(x, image.shape[2] - 1) + y = min(y, image.shape[1] - 1) + to_x = width + x + to_y = height + y + img = image[:,y:to_y, x:to_x, :] + return (img,) + +class RepeatImageBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), + "amount": ("INT", {"default": 1, "min": 1, "max": 64}), + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "repeat" + + CATEGORY = "image/batch" + + def repeat(self, image, amount): + s = image.repeat((amount, 1,1,1)) + return (s,) + +class SaveAnimatedWEBP: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + self.type = "output" + self.prefix_append = "" + + methods = {"default": 4, "fastest": 0, "slowest": 6} + @classmethod + def INPUT_TYPES(s): + return {"required": + {"images": ("IMAGE", ), + "filename_prefix": ("STRING", {"default": "ldm_patched"}), + "fps": ("FLOAT", {"default": 6.0, "min": 0.01, "max": 1000.0, "step": 0.01}), + "lossless": ("BOOLEAN", {"default": True}), + "quality": ("INT", {"default": 80, "min": 0, "max": 100}), + "method": (list(s.methods.keys()),), + # "num_frames": ("INT", {"default": 0, "min": 0, "max": 8192}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_images" + + OUTPUT_NODE = True + + CATEGORY = "image/animation" + + def save_images(self, images, fps, filename_prefix, lossless, quality, method, num_frames=0, prompt=None, extra_pnginfo=None): + method = self.methods.get(method) + filename_prefix += self.prefix_append + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]) + results = list() + pil_images = [] + for image in images: + i = 255. * image.cpu().numpy() + img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) + pil_images.append(img) + + metadata = pil_images[0].getexif() + if not args.disable_server_info: + if prompt is not None: + metadata[0x0110] = "prompt:{}".format(json.dumps(prompt)) + if extra_pnginfo is not None: + inital_exif = 0x010f + for x in extra_pnginfo: + metadata[inital_exif] = "{}:{}".format(x, json.dumps(extra_pnginfo[x])) + inital_exif -= 1 + + if num_frames == 0: + num_frames = len(pil_images) + + c = len(pil_images) + for i in range(0, c, num_frames): + file = f"{filename}_{counter:05}_.webp" + pil_images[i].save(os.path.join(full_output_folder, file), save_all=True, duration=int(1000.0/fps), append_images=pil_images[i + 1:i + num_frames], exif=metadata, lossless=lossless, quality=quality, method=method) + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + animated = num_frames != 1 + return { "ui": { "images": results, "animated": (animated,) } } + +class SaveAnimatedPNG: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + self.type = "output" + self.prefix_append = "" + + @classmethod + def INPUT_TYPES(s): + return {"required": + {"images": ("IMAGE", ), + "filename_prefix": ("STRING", {"default": "ldm_patched"}), + "fps": ("FLOAT", {"default": 6.0, "min": 0.01, "max": 1000.0, "step": 0.01}), + "compress_level": ("INT", {"default": 4, "min": 0, "max": 9}) + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_images" + + OUTPUT_NODE = True + + CATEGORY = "image/animation" + + def save_images(self, images, fps, compress_level, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None): + filename_prefix += self.prefix_append + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]) + results = list() + pil_images = [] + for image in images: + i = 255. * image.cpu().numpy() + img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) + pil_images.append(img) + + metadata = None + if not args.disable_server_info: + metadata = PngInfo() + if prompt is not None: + metadata.add(b"ldm_patched", "prompt".encode("latin-1", "strict") + b"\0" + json.dumps(prompt).encode("latin-1", "strict"), after_idat=True) + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata.add(b"ldm_patched", x.encode("latin-1", "strict") + b"\0" + json.dumps(extra_pnginfo[x]).encode("latin-1", "strict"), after_idat=True) + + file = f"{filename}_{counter:05}_.png" + pil_images[0].save(os.path.join(full_output_folder, file), pnginfo=metadata, compress_level=compress_level, save_all=True, duration=int(1000.0/fps), append_images=pil_images[1:]) + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + + return { "ui": { "images": results, "animated": (True,)} } + +NODE_CLASS_MAPPINGS = { + "ImageCrop": ImageCrop, + "RepeatImageBatch": RepeatImageBatch, + "SaveAnimatedWEBP": SaveAnimatedWEBP, + "SaveAnimatedPNG": SaveAnimatedPNG, +} diff --git a/ldm_patched/contrib/external_latent.py b/ldm_patched/contrib/external_latent.py new file mode 100644 index 0000000000000000000000000000000000000000..522c9bc6b8f7a03078b3d85c248b1529b6db1bb5 --- /dev/null +++ b/ldm_patched/contrib/external_latent.py @@ -0,0 +1,159 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.modules.utils +import torch + +def reshape_latent_to(target_shape, latent): + if latent.shape[1:] != target_shape[1:]: + latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") + return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0]) + + +class LatentAdd: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/advanced" + + def op(self, samples1, samples2): + samples_out = samples1.copy() + + s1 = samples1["samples"] + s2 = samples2["samples"] + + s2 = reshape_latent_to(s1.shape, s2) + samples_out["samples"] = s1 + s2 + return (samples_out,) + +class LatentSubtract: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/advanced" + + def op(self, samples1, samples2): + samples_out = samples1.copy() + + s1 = samples1["samples"] + s2 = samples2["samples"] + + s2 = reshape_latent_to(s1.shape, s2) + samples_out["samples"] = s1 - s2 + return (samples_out,) + +class LatentMultiply: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + }} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/advanced" + + def op(self, samples, multiplier): + samples_out = samples.copy() + + s1 = samples["samples"] + samples_out["samples"] = s1 * multiplier + return (samples_out,) + +class LatentInterpolate: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples1": ("LATENT",), + "samples2": ("LATENT",), + "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + }} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/advanced" + + def op(self, samples1, samples2, ratio): + samples_out = samples1.copy() + + s1 = samples1["samples"] + s2 = samples2["samples"] + + s2 = reshape_latent_to(s1.shape, s2) + + m1 = torch.linalg.vector_norm(s1, dim=(1)) + m2 = torch.linalg.vector_norm(s2, dim=(1)) + + s1 = torch.nan_to_num(s1 / m1) + s2 = torch.nan_to_num(s2 / m2) + + t = (s1 * ratio + s2 * (1.0 - ratio)) + mt = torch.linalg.vector_norm(t, dim=(1)) + st = torch.nan_to_num(t / mt) + + samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio)) + return (samples_out,) + +class LatentBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "batch" + + CATEGORY = "latent/batch" + + def batch(self, samples1, samples2): + samples_out = samples1.copy() + s1 = samples1["samples"] + s2 = samples2["samples"] + + if s1.shape[1:] != s2.shape[1:]: + s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center") + s = torch.cat((s1, s2), dim=0) + samples_out["samples"] = s + samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])]) + return (samples_out,) + +class LatentBatchSeedBehavior: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples": ("LATENT",), + "seed_behavior": (["random", "fixed"],{"default": "fixed"}),}} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "op" + + CATEGORY = "latent/advanced" + + def op(self, samples, seed_behavior): + samples_out = samples.copy() + latent = samples["samples"] + if seed_behavior == "random": + if 'batch_index' in samples_out: + samples_out.pop('batch_index') + elif seed_behavior == "fixed": + batch_number = samples_out.get("batch_index", [0])[0] + samples_out["batch_index"] = [batch_number] * latent.shape[0] + + return (samples_out,) + +NODE_CLASS_MAPPINGS = { + "LatentAdd": LatentAdd, + "LatentSubtract": LatentSubtract, + "LatentMultiply": LatentMultiply, + "LatentInterpolate": LatentInterpolate, + "LatentBatch": LatentBatch, + "LatentBatchSeedBehavior": LatentBatchSeedBehavior, +} diff --git a/ldm_patched/contrib/external_mask.py b/ldm_patched/contrib/external_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..b6b4eb565f0dd4f3e3c97f6e87354ec4ffa9aa91 --- /dev/null +++ b/ldm_patched/contrib/external_mask.py @@ -0,0 +1,367 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import numpy as np +import scipy.ndimage +import torch +import ldm_patched.modules.utils + +from ldm_patched.contrib.external import MAX_RESOLUTION + +def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False): + source = source.to(destination.device) + if resize_source: + source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear") + + source = ldm_patched.modules.utils.repeat_to_batch_size(source, destination.shape[0]) + + x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier)) + y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier)) + + left, top = (x // multiplier, y // multiplier) + right, bottom = (left + source.shape[3], top + source.shape[2],) + + if mask is None: + mask = torch.ones_like(source) + else: + mask = mask.to(destination.device, copy=True) + mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear") + mask = ldm_patched.modules.utils.repeat_to_batch_size(mask, source.shape[0]) + + # calculate the bounds of the source that will be overlapping the destination + # this prevents the source trying to overwrite latent pixels that are out of bounds + # of the destination + visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),) + + mask = mask[:, :, :visible_height, :visible_width] + inverse_mask = torch.ones_like(mask) - mask + + source_portion = mask * source[:, :, :visible_height, :visible_width] + destination_portion = inverse_mask * destination[:, :, top:bottom, left:right] + + destination[:, :, top:bottom, left:right] = source_portion + destination_portion + return destination + +class LatentCompositeMasked: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "destination": ("LATENT",), + "source": ("LATENT",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "resize_source": ("BOOLEAN", {"default": False}), + }, + "optional": { + "mask": ("MASK",), + } + } + RETURN_TYPES = ("LATENT",) + FUNCTION = "composite" + + CATEGORY = "latent" + + def composite(self, destination, source, x, y, resize_source, mask = None): + output = destination.copy() + destination = destination["samples"].clone() + source = source["samples"] + output["samples"] = composite(destination, source, x, y, mask, 8, resize_source) + return (output,) + +class ImageCompositeMasked: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "destination": ("IMAGE",), + "source": ("IMAGE",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "resize_source": ("BOOLEAN", {"default": False}), + }, + "optional": { + "mask": ("MASK",), + } + } + RETURN_TYPES = ("IMAGE",) + FUNCTION = "composite" + + CATEGORY = "image" + + def composite(self, destination, source, x, y, resize_source, mask = None): + destination = destination.clone().movedim(-1, 1) + output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1) + return (output,) + +class MaskToImage: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "mask": ("MASK",), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "mask_to_image" + + def mask_to_image(self, mask): + result = mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])).movedim(1, -1).expand(-1, -1, -1, 3) + return (result,) + +class ImageToMask: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "channel": (["red", "green", "blue", "alpha"],), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + FUNCTION = "image_to_mask" + + def image_to_mask(self, image, channel): + channels = ["red", "green", "blue", "alpha"] + mask = image[:, :, :, channels.index(channel)] + return (mask,) + +class ImageColorToMask: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "color": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFF, "step": 1, "display": "color"}), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + FUNCTION = "image_to_mask" + + def image_to_mask(self, image, color): + temp = (torch.clamp(image, 0, 1.0) * 255.0).round().to(torch.int) + temp = torch.bitwise_left_shift(temp[:,:,:,0], 16) + torch.bitwise_left_shift(temp[:,:,:,1], 8) + temp[:,:,:,2] + mask = torch.where(temp == color, 255, 0).float() + return (mask,) + +class SolidMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "value": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "solid" + + def solid(self, value, width, height): + out = torch.full((1, height, width), value, dtype=torch.float32, device="cpu") + return (out,) + +class InvertMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "mask": ("MASK",), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "invert" + + def invert(self, mask): + out = 1.0 - mask + return (out,) + +class CropMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "mask": ("MASK",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + "height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "crop" + + def crop(self, mask, x, y, width, height): + mask = mask.reshape((-1, mask.shape[-2], mask.shape[-1])) + out = mask[:, y:y + height, x:x + width] + return (out,) + +class MaskComposite: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "destination": ("MASK",), + "source": ("MASK",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "operation": (["multiply", "add", "subtract", "and", "or", "xor"],), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "combine" + + def combine(self, destination, source, x, y, operation): + output = destination.reshape((-1, destination.shape[-2], destination.shape[-1])).clone() + source = source.reshape((-1, source.shape[-2], source.shape[-1])) + + left, top = (x, y,) + right, bottom = (min(left + source.shape[-1], destination.shape[-1]), min(top + source.shape[-2], destination.shape[-2])) + visible_width, visible_height = (right - left, bottom - top,) + + source_portion = source[:, :visible_height, :visible_width] + destination_portion = destination[:, top:bottom, left:right] + + if operation == "multiply": + output[:, top:bottom, left:right] = destination_portion * source_portion + elif operation == "add": + output[:, top:bottom, left:right] = destination_portion + source_portion + elif operation == "subtract": + output[:, top:bottom, left:right] = destination_portion - source_portion + elif operation == "and": + output[:, top:bottom, left:right] = torch.bitwise_and(destination_portion.round().bool(), source_portion.round().bool()).float() + elif operation == "or": + output[:, top:bottom, left:right] = torch.bitwise_or(destination_portion.round().bool(), source_portion.round().bool()).float() + elif operation == "xor": + output[:, top:bottom, left:right] = torch.bitwise_xor(destination_portion.round().bool(), source_portion.round().bool()).float() + + output = torch.clamp(output, 0.0, 1.0) + + return (output,) + +class FeatherMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "mask": ("MASK",), + "left": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "top": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "right": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "bottom": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + } + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "feather" + + def feather(self, mask, left, top, right, bottom): + output = mask.reshape((-1, mask.shape[-2], mask.shape[-1])).clone() + + left = min(left, output.shape[-1]) + right = min(right, output.shape[-1]) + top = min(top, output.shape[-2]) + bottom = min(bottom, output.shape[-2]) + + for x in range(left): + feather_rate = (x + 1.0) / left + output[:, :, x] *= feather_rate + + for x in range(right): + feather_rate = (x + 1) / right + output[:, :, -x] *= feather_rate + + for y in range(top): + feather_rate = (y + 1) / top + output[:, y, :] *= feather_rate + + for y in range(bottom): + feather_rate = (y + 1) / bottom + output[:, -y, :] *= feather_rate + + return (output,) + +class GrowMask: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "mask": ("MASK",), + "expand": ("INT", {"default": 0, "min": -MAX_RESOLUTION, "max": MAX_RESOLUTION, "step": 1}), + "tapered_corners": ("BOOLEAN", {"default": True}), + }, + } + + CATEGORY = "mask" + + RETURN_TYPES = ("MASK",) + + FUNCTION = "expand_mask" + + def expand_mask(self, mask, expand, tapered_corners): + c = 0 if tapered_corners else 1 + kernel = np.array([[c, 1, c], + [1, 1, 1], + [c, 1, c]]) + mask = mask.reshape((-1, mask.shape[-2], mask.shape[-1])) + out = [] + for m in mask: + output = m.numpy() + for _ in range(abs(expand)): + if expand < 0: + output = scipy.ndimage.grey_erosion(output, footprint=kernel) + else: + output = scipy.ndimage.grey_dilation(output, footprint=kernel) + output = torch.from_numpy(output) + out.append(output) + return (torch.stack(out, dim=0),) + + + +NODE_CLASS_MAPPINGS = { + "LatentCompositeMasked": LatentCompositeMasked, + "ImageCompositeMasked": ImageCompositeMasked, + "MaskToImage": MaskToImage, + "ImageToMask": ImageToMask, + "ImageColorToMask": ImageColorToMask, + "SolidMask": SolidMask, + "InvertMask": InvertMask, + "CropMask": CropMask, + "MaskComposite": MaskComposite, + "FeatherMask": FeatherMask, + "GrowMask": GrowMask, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "ImageToMask": "Convert Image to Mask", + "MaskToImage": "Convert Mask to Image", +} diff --git a/ldm_patched/contrib/external_model_advanced.py b/ldm_patched/contrib/external_model_advanced.py new file mode 100644 index 0000000000000000000000000000000000000000..5758c03f5aa304eeb9780fe3a2daec5d6ca3e310 --- /dev/null +++ b/ldm_patched/contrib/external_model_advanced.py @@ -0,0 +1,179 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.utils.path_utils +import ldm_patched.modules.sd +import ldm_patched.modules.model_sampling +import torch + +class LCM(ldm_patched.modules.model_sampling.EPS): + def calculate_denoised(self, sigma, model_output, model_input): + timestep = self.timestep(sigma).view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + x0 = model_input - model_output * sigma + + sigma_data = 0.5 + scaled_timestep = timestep * 10.0 #timestep_scaling + + c_skip = sigma_data**2 / (scaled_timestep**2 + sigma_data**2) + c_out = scaled_timestep / (scaled_timestep**2 + sigma_data**2) ** 0.5 + + return c_out * x0 + c_skip * model_input + +class ModelSamplingDiscreteDistilled(ldm_patched.modules.model_sampling.ModelSamplingDiscrete): + original_timesteps = 50 + + def __init__(self, model_config=None): + super().__init__(model_config) + + self.skip_steps = self.num_timesteps // self.original_timesteps + + sigmas_valid = torch.zeros((self.original_timesteps), dtype=torch.float32) + for x in range(self.original_timesteps): + sigmas_valid[self.original_timesteps - 1 - x] = self.sigmas[self.num_timesteps - 1 - x * self.skip_steps] + + self.set_sigmas(sigmas_valid) + + def timestep(self, sigma): + log_sigma = sigma.log() + dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None] + return (dists.abs().argmin(dim=0).view(sigma.shape) * self.skip_steps + (self.skip_steps - 1)).to(sigma.device) + + def sigma(self, timestep): + t = torch.clamp(((timestep.float().to(self.log_sigmas.device) - (self.skip_steps - 1)) / self.skip_steps).float(), min=0, max=(len(self.sigmas) - 1)) + low_idx = t.floor().long() + high_idx = t.ceil().long() + w = t.frac() + log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] + return log_sigma.exp().to(timestep.device) + + +def rescale_zero_terminal_snr_sigmas(sigmas): + alphas_cumprod = 1 / ((sigmas * sigmas) + 1) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= (alphas_bar_sqrt_T) + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas_bar[-1] = 4.8973451890853435e-08 + return ((1 - alphas_bar) / alphas_bar) ** 0.5 + +class ModelSamplingDiscrete: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "sampling": (["eps", "v_prediction", "lcm"],), + "zsnr": ("BOOLEAN", {"default": False}), + }} + + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "advanced/model" + + def patch(self, model, sampling, zsnr): + m = model.clone() + + sampling_base = ldm_patched.modules.model_sampling.ModelSamplingDiscrete + if sampling == "eps": + sampling_type = ldm_patched.modules.model_sampling.EPS + elif sampling == "v_prediction": + sampling_type = ldm_patched.modules.model_sampling.V_PREDICTION + elif sampling == "lcm": + sampling_type = LCM + sampling_base = ModelSamplingDiscreteDistilled + + class ModelSamplingAdvanced(sampling_base, sampling_type): + pass + + model_sampling = ModelSamplingAdvanced(model.model.model_config) + if zsnr: + model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas)) + + m.add_object_patch("model_sampling", model_sampling) + return (m, ) + +class ModelSamplingContinuousEDM: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "sampling": (["v_prediction", "eps"],), + "sigma_max": ("FLOAT", {"default": 120.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), + "sigma_min": ("FLOAT", {"default": 0.002, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), + }} + + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "advanced/model" + + def patch(self, model, sampling, sigma_max, sigma_min): + m = model.clone() + + if sampling == "eps": + sampling_type = ldm_patched.modules.model_sampling.EPS + elif sampling == "v_prediction": + sampling_type = ldm_patched.modules.model_sampling.V_PREDICTION + + class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type): + pass + + model_sampling = ModelSamplingAdvanced(model.model.model_config) + model_sampling.set_sigma_range(sigma_min, sigma_max) + m.add_object_patch("model_sampling", model_sampling) + return (m, ) + +class RescaleCFG: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "multiplier": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "advanced/model" + + def patch(self, model, multiplier): + def rescale_cfg(args): + cond = args["cond"] + uncond = args["uncond"] + cond_scale = args["cond_scale"] + sigma = args["sigma"] + sigma = sigma.view(sigma.shape[:1] + (1,) * (cond.ndim - 1)) + x_orig = args["input"] + + #rescale cfg has to be done on v-pred model output + x = x_orig / (sigma * sigma + 1.0) + cond = ((x - (x_orig - cond)) * (sigma ** 2 + 1.0) ** 0.5) / (sigma) + uncond = ((x - (x_orig - uncond)) * (sigma ** 2 + 1.0) ** 0.5) / (sigma) + + #rescalecfg + x_cfg = uncond + cond_scale * (cond - uncond) + ro_pos = torch.std(cond, dim=(1,2,3), keepdim=True) + ro_cfg = torch.std(x_cfg, dim=(1,2,3), keepdim=True) + + x_rescaled = x_cfg * (ro_pos / ro_cfg) + x_final = multiplier * x_rescaled + (1.0 - multiplier) * x_cfg + + return x_orig - (x - x_final * sigma / (sigma * sigma + 1.0) ** 0.5) + + m = model.clone() + m.set_model_sampler_cfg_function(rescale_cfg) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "ModelSamplingDiscrete": ModelSamplingDiscrete, + "ModelSamplingContinuousEDM": ModelSamplingContinuousEDM, + "RescaleCFG": RescaleCFG, +} diff --git a/ldm_patched/contrib/external_model_downscale.py b/ldm_patched/contrib/external_model_downscale.py new file mode 100644 index 0000000000000000000000000000000000000000..58a01d55106b98d1930fc87bc1391af57bdc48d3 --- /dev/null +++ b/ldm_patched/contrib/external_model_downscale.py @@ -0,0 +1,57 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +import ldm_patched.modules.utils + +class PatchModelAddDownscale: + upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}), + "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}), + "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), + "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}), + "downscale_after_skip": ("BOOLEAN", {"default": True}), + "downscale_method": (s.upscale_methods,), + "upscale_method": (s.upscale_methods,), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing" + + def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method): + sigma_start = model.model.model_sampling.percent_to_sigma(start_percent) + sigma_end = model.model.model_sampling.percent_to_sigma(end_percent) + + def input_block_patch(h, transformer_options): + if transformer_options["block"][1] == block_number: + sigma = transformer_options["sigmas"][0].item() + if sigma <= sigma_start and sigma >= sigma_end: + h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled") + return h + + def output_block_patch(h, hsp, transformer_options): + if h.shape[2] != hsp.shape[2]: + h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled") + return h, hsp + + m = model.clone() + if downscale_after_skip: + m.set_model_input_block_patch_after_skip(input_block_patch) + else: + m.set_model_input_block_patch(input_block_patch) + m.set_model_output_block_patch(output_block_patch) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "PatchModelAddDownscale": PatchModelAddDownscale, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + # Sampling + "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)", +} diff --git a/ldm_patched/contrib/external_model_merging.py b/ldm_patched/contrib/external_model_merging.py new file mode 100644 index 0000000000000000000000000000000000000000..e99596bf4fb30cb1b9a98ffa51e8d7a2085fd269 --- /dev/null +++ b/ldm_patched/contrib/external_model_merging.py @@ -0,0 +1,288 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.modules.sd +import ldm_patched.modules.utils +import ldm_patched.modules.model_base +import ldm_patched.modules.model_management + +import ldm_patched.utils.path_utils +import json +import os + +from ldm_patched.modules.args_parser import args + +class ModelMergeSimple: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model1": ("MODEL",), + "model2": ("MODEL",), + "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "merge" + + CATEGORY = "advanced/model_merging" + + def merge(self, model1, model2, ratio): + m = model1.clone() + kp = model2.get_key_patches("diffusion_model.") + for k in kp: + m.add_patches({k: kp[k]}, 1.0 - ratio, ratio) + return (m, ) + +class ModelSubtract: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model1": ("MODEL",), + "model2": ("MODEL",), + "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "merge" + + CATEGORY = "advanced/model_merging" + + def merge(self, model1, model2, multiplier): + m = model1.clone() + kp = model2.get_key_patches("diffusion_model.") + for k in kp: + m.add_patches({k: kp[k]}, - multiplier, multiplier) + return (m, ) + +class ModelAdd: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model1": ("MODEL",), + "model2": ("MODEL",), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "merge" + + CATEGORY = "advanced/model_merging" + + def merge(self, model1, model2): + m = model1.clone() + kp = model2.get_key_patches("diffusion_model.") + for k in kp: + m.add_patches({k: kp[k]}, 1.0, 1.0) + return (m, ) + + +class CLIPMergeSimple: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip1": ("CLIP",), + "clip2": ("CLIP",), + "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "merge" + + CATEGORY = "advanced/model_merging" + + def merge(self, clip1, clip2, ratio): + m = clip1.clone() + kp = clip2.get_key_patches() + for k in kp: + if k.endswith(".position_ids") or k.endswith(".logit_scale"): + continue + m.add_patches({k: kp[k]}, 1.0 - ratio, ratio) + return (m, ) + +class ModelMergeBlocks: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model1": ("MODEL",), + "model2": ("MODEL",), + "input": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + "middle": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "merge" + + CATEGORY = "advanced/model_merging" + + def merge(self, model1, model2, **kwargs): + m = model1.clone() + kp = model2.get_key_patches("diffusion_model.") + default_ratio = next(iter(kwargs.values())) + + for k in kp: + ratio = default_ratio + k_unet = k[len("diffusion_model."):] + + last_arg_size = 0 + for arg in kwargs: + if k_unet.startswith(arg) and last_arg_size < len(arg): + ratio = kwargs[arg] + last_arg_size = len(arg) + + m.add_patches({k: kp[k]}, 1.0 - ratio, ratio) + return (m, ) + +def save_checkpoint(model, clip=None, vae=None, clip_vision=None, filename_prefix=None, output_dir=None, prompt=None, extra_pnginfo=None): + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, output_dir) + prompt_info = "" + if prompt is not None: + prompt_info = json.dumps(prompt) + + metadata = {} + + enable_modelspec = True + if isinstance(model.model, ldm_patched.modules.model_base.SDXL): + metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base" + elif isinstance(model.model, ldm_patched.modules.model_base.SDXLRefiner): + metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner" + else: + enable_modelspec = False + + if enable_modelspec: + metadata["modelspec.sai_model_spec"] = "1.0.0" + metadata["modelspec.implementation"] = "sgm" + metadata["modelspec.title"] = "{} {}".format(filename, counter) + + #TODO: + # "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512", + # "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h", + # "v2-inpainting" + + if model.model.model_type == ldm_patched.modules.model_base.ModelType.EPS: + metadata["modelspec.predict_key"] = "epsilon" + elif model.model.model_type == ldm_patched.modules.model_base.ModelType.V_PREDICTION: + metadata["modelspec.predict_key"] = "v" + + if not args.disable_server_info: + metadata["prompt"] = prompt_info + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata[x] = json.dumps(extra_pnginfo[x]) + + output_checkpoint = f"{filename}_{counter:05}_.safetensors" + output_checkpoint = os.path.join(full_output_folder, output_checkpoint) + + ldm_patched.modules.sd.save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata) + +class CheckpointSave: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "clip": ("CLIP",), + "vae": ("VAE",), + "filename_prefix": ("STRING", {"default": "checkpoints/ldm_patched"}),}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + + CATEGORY = "advanced/model_merging" + + def save(self, model, clip, vae, filename_prefix, prompt=None, extra_pnginfo=None): + save_checkpoint(model, clip=clip, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) + return {} + +class CLIPSave: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip": ("CLIP",), + "filename_prefix": ("STRING", {"default": "clip/ldm_patched"}),}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + + CATEGORY = "advanced/model_merging" + + def save(self, clip, filename_prefix, prompt=None, extra_pnginfo=None): + prompt_info = "" + if prompt is not None: + prompt_info = json.dumps(prompt) + + metadata = {} + if not args.disable_server_info: + metadata["prompt"] = prompt_info + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata[x] = json.dumps(extra_pnginfo[x]) + + ldm_patched.modules.model_management.load_models_gpu([clip.load_model()]) + clip_sd = clip.get_sd() + + for prefix in ["clip_l.", "clip_g.", ""]: + k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys())) + current_clip_sd = {} + for x in k: + current_clip_sd[x] = clip_sd.pop(x) + if len(current_clip_sd) == 0: + continue + + p = prefix[:-1] + replace_prefix = {} + filename_prefix_ = filename_prefix + if len(p) > 0: + filename_prefix_ = "{}_{}".format(filename_prefix_, p) + replace_prefix[prefix] = "" + replace_prefix["transformer."] = "" + + full_output_folder, filename, counter, subfolder, filename_prefix_ = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix_, self.output_dir) + + output_checkpoint = f"{filename}_{counter:05}_.safetensors" + output_checkpoint = os.path.join(full_output_folder, output_checkpoint) + + current_clip_sd = ldm_patched.modules.utils.state_dict_prefix_replace(current_clip_sd, replace_prefix) + + ldm_patched.modules.utils.save_torch_file(current_clip_sd, output_checkpoint, metadata=metadata) + return {} + +class VAESave: + def __init__(self): + self.output_dir = ldm_patched.utils.path_utils.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": { "vae": ("VAE",), + "filename_prefix": ("STRING", {"default": "vae/ldm_patched_vae"}),}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + + CATEGORY = "advanced/model_merging" + + def save(self, vae, filename_prefix, prompt=None, extra_pnginfo=None): + full_output_folder, filename, counter, subfolder, filename_prefix = ldm_patched.utils.path_utils.get_save_image_path(filename_prefix, self.output_dir) + prompt_info = "" + if prompt is not None: + prompt_info = json.dumps(prompt) + + metadata = {} + if not args.disable_server_info: + metadata["prompt"] = prompt_info + if extra_pnginfo is not None: + for x in extra_pnginfo: + metadata[x] = json.dumps(extra_pnginfo[x]) + + output_checkpoint = f"{filename}_{counter:05}_.safetensors" + output_checkpoint = os.path.join(full_output_folder, output_checkpoint) + + ldm_patched.modules.utils.save_torch_file(vae.get_sd(), output_checkpoint, metadata=metadata) + return {} + +NODE_CLASS_MAPPINGS = { + "ModelMergeSimple": ModelMergeSimple, + "ModelMergeBlocks": ModelMergeBlocks, + "ModelMergeSubtract": ModelSubtract, + "ModelMergeAdd": ModelAdd, + "CheckpointSave": CheckpointSave, + "CLIPMergeSimple": CLIPMergeSimple, + "CLIPSave": CLIPSave, + "VAESave": VAESave, +} diff --git a/ldm_patched/contrib/external_perpneg.py b/ldm_patched/contrib/external_perpneg.py new file mode 100644 index 0000000000000000000000000000000000000000..f38eb0963f83bc9dec08cceb6b85e229f7a1cf16 --- /dev/null +++ b/ldm_patched/contrib/external_perpneg.py @@ -0,0 +1,59 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +import ldm_patched.modules.model_management +import ldm_patched.modules.sample +import ldm_patched.modules.samplers +import ldm_patched.modules.utils + + +class PerpNeg: + @classmethod + def INPUT_TYPES(s): + return {"required": {"model": ("MODEL", ), + "empty_conditioning": ("CONDITIONING", ), + "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing" + + def patch(self, model, empty_conditioning, neg_scale): + m = model.clone() + nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning) + + def cfg_function(args): + model = args["model"] + noise_pred_pos = args["cond_denoised"] + noise_pred_neg = args["uncond_denoised"] + cond_scale = args["cond_scale"] + x = args["input"] + sigma = args["sigma"] + model_options = args["model_options"] + nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative") + + (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options) + + pos = noise_pred_pos - noise_pred_nocond + neg = noise_pred_neg - noise_pred_nocond + perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg + perp_neg = perp * neg_scale + cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg) + cfg_result = x - cfg_result + return cfg_result + + m.set_model_sampler_cfg_function(cfg_function) + + return (m, ) + + +NODE_CLASS_MAPPINGS = { + "PerpNeg": PerpNeg, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PerpNeg": "Perp-Neg", +} diff --git a/ldm_patched/contrib/external_photomaker.py b/ldm_patched/contrib/external_photomaker.py new file mode 100644 index 0000000000000000000000000000000000000000..cc7f67100670d6407531d56f232517a9216e9dcf --- /dev/null +++ b/ldm_patched/contrib/external_photomaker.py @@ -0,0 +1,189 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +import torch +import torch.nn as nn +import ldm_patched.utils.path_utils +import ldm_patched.modules.clip_model +import ldm_patched.modules.clip_vision +import ldm_patched.modules.ops + +# code for model from: https://github.com/TencentARC/PhotoMaker/blob/main/photomaker/model.py under Apache License Version 2.0 +VISION_CONFIG_DICT = { + "hidden_size": 1024, + "image_size": 224, + "intermediate_size": 4096, + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 24, + "patch_size": 14, + "projection_dim": 768, + "hidden_act": "quick_gelu", +} + +class MLP(nn.Module): + def __init__(self, in_dim, out_dim, hidden_dim, use_residual=True, operations=ldm_patched.modules.ops): + super().__init__() + if use_residual: + assert in_dim == out_dim + self.layernorm = operations.LayerNorm(in_dim) + self.fc1 = operations.Linear(in_dim, hidden_dim) + self.fc2 = operations.Linear(hidden_dim, out_dim) + self.use_residual = use_residual + self.act_fn = nn.GELU() + + def forward(self, x): + residual = x + x = self.layernorm(x) + x = self.fc1(x) + x = self.act_fn(x) + x = self.fc2(x) + if self.use_residual: + x = x + residual + return x + + +class FuseModule(nn.Module): + def __init__(self, embed_dim, operations): + super().__init__() + self.mlp1 = MLP(embed_dim * 2, embed_dim, embed_dim, use_residual=False, operations=operations) + self.mlp2 = MLP(embed_dim, embed_dim, embed_dim, use_residual=True, operations=operations) + self.layer_norm = operations.LayerNorm(embed_dim) + + def fuse_fn(self, prompt_embeds, id_embeds): + stacked_id_embeds = torch.cat([prompt_embeds, id_embeds], dim=-1) + stacked_id_embeds = self.mlp1(stacked_id_embeds) + prompt_embeds + stacked_id_embeds = self.mlp2(stacked_id_embeds) + stacked_id_embeds = self.layer_norm(stacked_id_embeds) + return stacked_id_embeds + + def forward( + self, + prompt_embeds, + id_embeds, + class_tokens_mask, + ) -> torch.Tensor: + # id_embeds shape: [b, max_num_inputs, 1, 2048] + id_embeds = id_embeds.to(prompt_embeds.dtype) + num_inputs = class_tokens_mask.sum().unsqueeze(0) # TODO: check for training case + batch_size, max_num_inputs = id_embeds.shape[:2] + # seq_length: 77 + seq_length = prompt_embeds.shape[1] + # flat_id_embeds shape: [b*max_num_inputs, 1, 2048] + flat_id_embeds = id_embeds.view( + -1, id_embeds.shape[-2], id_embeds.shape[-1] + ) + # valid_id_mask [b*max_num_inputs] + valid_id_mask = ( + torch.arange(max_num_inputs, device=flat_id_embeds.device)[None, :] + < num_inputs[:, None] + ) + valid_id_embeds = flat_id_embeds[valid_id_mask.flatten()] + + prompt_embeds = prompt_embeds.view(-1, prompt_embeds.shape[-1]) + class_tokens_mask = class_tokens_mask.view(-1) + valid_id_embeds = valid_id_embeds.view(-1, valid_id_embeds.shape[-1]) + # slice out the image token embeddings + image_token_embeds = prompt_embeds[class_tokens_mask] + stacked_id_embeds = self.fuse_fn(image_token_embeds, valid_id_embeds) + assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" + prompt_embeds.masked_scatter_(class_tokens_mask[:, None], stacked_id_embeds.to(prompt_embeds.dtype)) + updated_prompt_embeds = prompt_embeds.view(batch_size, seq_length, -1) + return updated_prompt_embeds + +class PhotoMakerIDEncoder(ldm_patched.modules.clip_model.CLIPVisionModelProjection): + def __init__(self): + self.load_device = ldm_patched.modules.model_management.text_encoder_device() + offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() + dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device) + + super().__init__(VISION_CONFIG_DICT, dtype, offload_device, ldm_patched.modules.ops.manual_cast) + self.visual_projection_2 = ldm_patched.modules.ops.manual_cast.Linear(1024, 1280, bias=False) + self.fuse_module = FuseModule(2048, ldm_patched.modules.ops.manual_cast) + + def forward(self, id_pixel_values, prompt_embeds, class_tokens_mask): + b, num_inputs, c, h, w = id_pixel_values.shape + id_pixel_values = id_pixel_values.view(b * num_inputs, c, h, w) + + shared_id_embeds = self.vision_model(id_pixel_values)[2] + id_embeds = self.visual_projection(shared_id_embeds) + id_embeds_2 = self.visual_projection_2(shared_id_embeds) + + id_embeds = id_embeds.view(b, num_inputs, 1, -1) + id_embeds_2 = id_embeds_2.view(b, num_inputs, 1, -1) + + id_embeds = torch.cat((id_embeds, id_embeds_2), dim=-1) + updated_prompt_embeds = self.fuse_module(prompt_embeds, id_embeds, class_tokens_mask) + + return updated_prompt_embeds + + +class PhotoMakerLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "photomaker_model_name": (ldm_patched.utils.path_utils.get_filename_list("photomaker"), )}} + + RETURN_TYPES = ("PHOTOMAKER",) + FUNCTION = "load_photomaker_model" + + CATEGORY = "_for_testing/photomaker" + + def load_photomaker_model(self, photomaker_model_name): + photomaker_model_path = ldm_patched.utils.path_utils.get_full_path("photomaker", photomaker_model_name) + photomaker_model = PhotoMakerIDEncoder() + data = ldm_patched.modules.utils.load_torch_file(photomaker_model_path, safe_load=True) + if "id_encoder" in data: + data = data["id_encoder"] + photomaker_model.load_state_dict(data) + return (photomaker_model,) + + +class PhotoMakerEncode: + @classmethod + def INPUT_TYPES(s): + return {"required": { "photomaker": ("PHOTOMAKER",), + "image": ("IMAGE",), + "clip": ("CLIP", ), + "text": ("STRING", {"multiline": True, "default": "photograph of photomaker"}), + }} + + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "apply_photomaker" + + CATEGORY = "_for_testing/photomaker" + + def apply_photomaker(self, photomaker, image, clip, text): + special_token = "photomaker" + pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(photomaker.load_device)).float() + try: + index = text.split(" ").index(special_token) + 1 + except ValueError: + index = -1 + tokens = clip.tokenize(text, return_word_ids=True) + out_tokens = {} + for k in tokens: + out_tokens[k] = [] + for t in tokens[k]: + f = list(filter(lambda x: x[2] != index, t)) + while len(f) < len(t): + f.append(t[-1]) + out_tokens[k].append(f) + + cond, pooled = clip.encode_from_tokens(out_tokens, return_pooled=True) + + if index > 0: + token_index = index - 1 + num_id_images = 1 + class_tokens_mask = [True if token_index <= i < token_index+num_id_images else False for i in range(77)] + out = photomaker(id_pixel_values=pixel_values.unsqueeze(0), prompt_embeds=cond.to(photomaker.load_device), + class_tokens_mask=torch.tensor(class_tokens_mask, dtype=torch.bool, device=photomaker.load_device).unsqueeze(0)) + else: + out = cond + + return ([[out, {"pooled_output": pooled}]], ) + + +NODE_CLASS_MAPPINGS = { + "PhotoMakerLoader": PhotoMakerLoader, + "PhotoMakerEncode": PhotoMakerEncode, +} + diff --git a/ldm_patched/contrib/external_post_processing.py b/ldm_patched/contrib/external_post_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..8e936d5df334754448e25e4b6cf606525792bcb3 --- /dev/null +++ b/ldm_patched/contrib/external_post_processing.py @@ -0,0 +1,280 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import numpy as np +import torch +import torch.nn.functional as F +from PIL import Image +import math + +import ldm_patched.modules.utils + + +class Blend: + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image1": ("IMAGE",), + "image2": ("IMAGE",), + "blend_factor": ("FLOAT", { + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01 + }), + "blend_mode": (["normal", "multiply", "screen", "overlay", "soft_light", "difference"],), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "blend_images" + + CATEGORY = "image/postprocessing" + + def blend_images(self, image1: torch.Tensor, image2: torch.Tensor, blend_factor: float, blend_mode: str): + image2 = image2.to(image1.device) + if image1.shape != image2.shape: + image2 = image2.permute(0, 3, 1, 2) + image2 = ldm_patched.modules.utils.common_upscale(image2, image1.shape[2], image1.shape[1], upscale_method='bicubic', crop='center') + image2 = image2.permute(0, 2, 3, 1) + + blended_image = self.blend_mode(image1, image2, blend_mode) + blended_image = image1 * (1 - blend_factor) + blended_image * blend_factor + blended_image = torch.clamp(blended_image, 0, 1) + return (blended_image,) + + def blend_mode(self, img1, img2, mode): + if mode == "normal": + return img2 + elif mode == "multiply": + return img1 * img2 + elif mode == "screen": + return 1 - (1 - img1) * (1 - img2) + elif mode == "overlay": + return torch.where(img1 <= 0.5, 2 * img1 * img2, 1 - 2 * (1 - img1) * (1 - img2)) + elif mode == "soft_light": + return torch.where(img2 <= 0.5, img1 - (1 - 2 * img2) * img1 * (1 - img1), img1 + (2 * img2 - 1) * (self.g(img1) - img1)) + elif mode == "difference": + return img1 - img2 + else: + raise ValueError(f"Unsupported blend mode: {mode}") + + def g(self, x): + return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x)) + +def gaussian_kernel(kernel_size: int, sigma: float, device=None): + x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij") + d = torch.sqrt(x * x + y * y) + g = torch.exp(-(d * d) / (2.0 * sigma * sigma)) + return g / g.sum() + +class Blur: + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "blur_radius": ("INT", { + "default": 1, + "min": 1, + "max": 31, + "step": 1 + }), + "sigma": ("FLOAT", { + "default": 1.0, + "min": 0.1, + "max": 10.0, + "step": 0.1 + }), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "blur" + + CATEGORY = "image/postprocessing" + + def blur(self, image: torch.Tensor, blur_radius: int, sigma: float): + if blur_radius == 0: + return (image,) + + batch_size, height, width, channels = image.shape + + kernel_size = blur_radius * 2 + 1 + kernel = gaussian_kernel(kernel_size, sigma, device=image.device).repeat(channels, 1, 1).unsqueeze(1) + + image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C) + padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect') + blurred = F.conv2d(padded_image, kernel, padding=kernel_size // 2, groups=channels)[:,:,blur_radius:-blur_radius, blur_radius:-blur_radius] + blurred = blurred.permute(0, 2, 3, 1) + + return (blurred,) + +class Quantize: + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "colors": ("INT", { + "default": 256, + "min": 1, + "max": 256, + "step": 1 + }), + "dither": (["none", "floyd-steinberg", "bayer-2", "bayer-4", "bayer-8", "bayer-16"],), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "quantize" + + CATEGORY = "image/postprocessing" + + def bayer(im, pal_im, order): + def normalized_bayer_matrix(n): + if n == 0: + return np.zeros((1,1), "float32") + else: + q = 4 ** n + m = q * normalized_bayer_matrix(n - 1) + return np.bmat(((m-1.5, m+0.5), (m+1.5, m-0.5))) / q + + num_colors = len(pal_im.getpalette()) // 3 + spread = 2 * 256 / num_colors + bayer_n = int(math.log2(order)) + bayer_matrix = torch.from_numpy(spread * normalized_bayer_matrix(bayer_n) + 0.5) + + result = torch.from_numpy(np.array(im).astype(np.float32)) + tw = math.ceil(result.shape[0] / bayer_matrix.shape[0]) + th = math.ceil(result.shape[1] / bayer_matrix.shape[1]) + tiled_matrix = bayer_matrix.tile(tw, th).unsqueeze(-1) + result.add_(tiled_matrix[:result.shape[0],:result.shape[1]]).clamp_(0, 255) + result = result.to(dtype=torch.uint8) + + im = Image.fromarray(result.cpu().numpy()) + im = im.quantize(palette=pal_im, dither=Image.Dither.NONE) + return im + + def quantize(self, image: torch.Tensor, colors: int, dither: str): + batch_size, height, width, _ = image.shape + result = torch.zeros_like(image) + + for b in range(batch_size): + im = Image.fromarray((image[b] * 255).to(torch.uint8).numpy(), mode='RGB') + + pal_im = im.quantize(colors=colors) # Required as described in https://github.com/python-pillow/Pillow/issues/5836 + + if dither == "none": + quantized_image = im.quantize(palette=pal_im, dither=Image.Dither.NONE) + elif dither == "floyd-steinberg": + quantized_image = im.quantize(palette=pal_im, dither=Image.Dither.FLOYDSTEINBERG) + elif dither.startswith("bayer"): + order = int(dither.split('-')[-1]) + quantized_image = Quantize.bayer(im, pal_im, order) + + quantized_array = torch.tensor(np.array(quantized_image.convert("RGB"))).float() / 255 + result[b] = quantized_array + + return (result,) + +class Sharpen: + def __init__(self): + pass + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "sharpen_radius": ("INT", { + "default": 1, + "min": 1, + "max": 31, + "step": 1 + }), + "sigma": ("FLOAT", { + "default": 1.0, + "min": 0.1, + "max": 10.0, + "step": 0.1 + }), + "alpha": ("FLOAT", { + "default": 1.0, + "min": 0.0, + "max": 5.0, + "step": 0.1 + }), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "sharpen" + + CATEGORY = "image/postprocessing" + + def sharpen(self, image: torch.Tensor, sharpen_radius: int, sigma:float, alpha: float): + if sharpen_radius == 0: + return (image,) + + batch_size, height, width, channels = image.shape + + kernel_size = sharpen_radius * 2 + 1 + kernel = gaussian_kernel(kernel_size, sigma, device=image.device) * -(alpha*10) + center = kernel_size // 2 + kernel[center, center] = kernel[center, center] - kernel.sum() + 1.0 + kernel = kernel.repeat(channels, 1, 1).unsqueeze(1) + + tensor_image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C) + tensor_image = F.pad(tensor_image, (sharpen_radius,sharpen_radius,sharpen_radius,sharpen_radius), 'reflect') + sharpened = F.conv2d(tensor_image, kernel, padding=center, groups=channels)[:,:,sharpen_radius:-sharpen_radius, sharpen_radius:-sharpen_radius] + sharpened = sharpened.permute(0, 2, 3, 1) + + result = torch.clamp(sharpened, 0, 1) + + return (result,) + +class ImageScaleToTotalPixels: + upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"] + crop_methods = ["disabled", "center"] + + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), "upscale_method": (s.upscale_methods,), + "megapixels": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 16.0, "step": 0.01}), + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "upscale" + + CATEGORY = "image/upscaling" + + def upscale(self, image, upscale_method, megapixels): + samples = image.movedim(-1,1) + total = int(megapixels * 1024 * 1024) + + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = ldm_patched.modules.utils.common_upscale(samples, width, height, upscale_method, "disabled") + s = s.movedim(1,-1) + return (s,) + +NODE_CLASS_MAPPINGS = { + "ImageBlend": Blend, + "ImageBlur": Blur, + "ImageQuantize": Quantize, + "ImageSharpen": Sharpen, + "ImageScaleToTotalPixels": ImageScaleToTotalPixels, +} diff --git a/ldm_patched/contrib/external_rebatch.py b/ldm_patched/contrib/external_rebatch.py new file mode 100644 index 0000000000000000000000000000000000000000..e3be702eb7021d648dd3af42189550e6b64461be --- /dev/null +++ b/ldm_patched/contrib/external_rebatch.py @@ -0,0 +1,142 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch + +class LatentRebatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "latents": ("LATENT",), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }} + RETURN_TYPES = ("LATENT",) + INPUT_IS_LIST = True + OUTPUT_IS_LIST = (True, ) + + FUNCTION = "rebatch" + + CATEGORY = "latent/batch" + + @staticmethod + def get_batch(latents, list_ind, offset): + '''prepare a batch out of the list of latents''' + samples = latents[list_ind]['samples'] + shape = samples.shape + mask = latents[list_ind]['noise_mask'] if 'noise_mask' in latents[list_ind] else torch.ones((shape[0], 1, shape[2]*8, shape[3]*8), device='cpu') + if mask.shape[-1] != shape[-1] * 8 or mask.shape[-2] != shape[-2]: + torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(shape[-2]*8, shape[-1]*8), mode="bilinear") + if mask.shape[0] < samples.shape[0]: + mask = mask.repeat((shape[0] - 1) // mask.shape[0] + 1, 1, 1, 1)[:shape[0]] + if 'batch_index' in latents[list_ind]: + batch_inds = latents[list_ind]['batch_index'] + else: + batch_inds = [x+offset for x in range(shape[0])] + return samples, mask, batch_inds + + @staticmethod + def get_slices(indexable, num, batch_size): + '''divides an indexable object into num slices of length batch_size, and a remainder''' + slices = [] + for i in range(num): + slices.append(indexable[i*batch_size:(i+1)*batch_size]) + if num * batch_size < len(indexable): + return slices, indexable[num * batch_size:] + else: + return slices, None + + @staticmethod + def slice_batch(batch, num, batch_size): + result = [LatentRebatch.get_slices(x, num, batch_size) for x in batch] + return list(zip(*result)) + + @staticmethod + def cat_batch(batch1, batch2): + if batch1[0] is None: + return batch2 + result = [torch.cat((b1, b2)) if torch.is_tensor(b1) else b1 + b2 for b1, b2 in zip(batch1, batch2)] + return result + + def rebatch(self, latents, batch_size): + batch_size = batch_size[0] + + output_list = [] + current_batch = (None, None, None) + processed = 0 + + for i in range(len(latents)): + # fetch new entry of list + #samples, masks, indices = self.get_batch(latents, i) + next_batch = self.get_batch(latents, i, processed) + processed += len(next_batch[2]) + # set to current if current is None + if current_batch[0] is None: + current_batch = next_batch + # add previous to list if dimensions do not match + elif next_batch[0].shape[-1] != current_batch[0].shape[-1] or next_batch[0].shape[-2] != current_batch[0].shape[-2]: + sliced, _ = self.slice_batch(current_batch, 1, batch_size) + output_list.append({'samples': sliced[0][0], 'noise_mask': sliced[1][0], 'batch_index': sliced[2][0]}) + current_batch = next_batch + # cat if everything checks out + else: + current_batch = self.cat_batch(current_batch, next_batch) + + # add to list if dimensions gone above target batch size + if current_batch[0].shape[0] > batch_size: + num = current_batch[0].shape[0] // batch_size + sliced, remainder = self.slice_batch(current_batch, num, batch_size) + + for i in range(num): + output_list.append({'samples': sliced[0][i], 'noise_mask': sliced[1][i], 'batch_index': sliced[2][i]}) + + current_batch = remainder + + #add remainder + if current_batch[0] is not None: + sliced, _ = self.slice_batch(current_batch, 1, batch_size) + output_list.append({'samples': sliced[0][0], 'noise_mask': sliced[1][0], 'batch_index': sliced[2][0]}) + + #get rid of empty masks + for s in output_list: + if s['noise_mask'].mean() == 1.0: + del s['noise_mask'] + + return (output_list,) + +class ImageRebatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "images": ("IMAGE",), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }} + RETURN_TYPES = ("IMAGE",) + INPUT_IS_LIST = True + OUTPUT_IS_LIST = (True, ) + + FUNCTION = "rebatch" + + CATEGORY = "image/batch" + + def rebatch(self, images, batch_size): + batch_size = batch_size[0] + + output_list = [] + all_images = [] + for img in images: + for i in range(img.shape[0]): + all_images.append(img[i:i+1]) + + for i in range(0, len(all_images), batch_size): + output_list.append(torch.cat(all_images[i:i+batch_size], dim=0)) + + return (output_list,) + +NODE_CLASS_MAPPINGS = { + "RebatchLatents": LatentRebatch, + "RebatchImages": ImageRebatch, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "RebatchLatents": "Rebatch Latents", + "RebatchImages": "Rebatch Images", +} diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py new file mode 100644 index 0000000000000000000000000000000000000000..804d56113b281e573a27859fdb4bae82e125882d --- /dev/null +++ b/ldm_patched/contrib/external_sag.py @@ -0,0 +1,172 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +import torch +from torch import einsum +import torch.nn.functional as F +import math + +from einops import rearrange, repeat +import os +from ldm_patched.ldm.modules.attention import optimized_attention, _ATTN_PRECISION +import ldm_patched.modules.samplers + +# from ldm_patched.modules/ldm/modules/attention.py +# but modified to return attention scores as well as output +def attention_basic_with_sim(q, k, v, heads, mask=None): + b, _, dim_head = q.shape + dim_head //= heads + scale = dim_head ** -0.5 + + h = heads + q, k, v = map( + lambda t: t.unsqueeze(3) + .reshape(b, -1, heads, dim_head) + .permute(0, 2, 1, 3) + .reshape(b * heads, -1, dim_head) + .contiguous(), + (q, k, v), + ) + + # force cast to fp32 to avoid overflowing + if _ATTN_PRECISION =="fp32": + sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale + else: + sim = einsum('b i d, b j d -> b i j', q, k) * scale + + del q, k + + if mask is not None: + mask = rearrange(mask, 'b ... -> b (...)') + max_neg_value = -torch.finfo(sim.dtype).max + mask = repeat(mask, 'b j -> (b h) () j', h=h) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + sim = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', sim.to(v.dtype), v) + out = ( + out.unsqueeze(0) + .reshape(b, heads, -1, dim_head) + .permute(0, 2, 1, 3) + .reshape(b, -1, heads * dim_head) + ) + return (out, sim) + +def create_blur_map(x0, attn, sigma=3.0, threshold=1.0): + # reshape and GAP the attention map + _, hw1, hw2 = attn.shape + b, _, lh, lw = x0.shape + attn = attn.reshape(b, -1, hw1, hw2) + # Global Average Pool + mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold + ratio = 2**(math.ceil(math.sqrt(lh * lw / hw1)) - 1).bit_length() + mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)] + + # Reshape + mask = ( + mask.reshape(b, *mid_shape) + .unsqueeze(1) + .type(attn.dtype) + ) + # Upsample + mask = F.interpolate(mask, (lh, lw)) + + blurred = gaussian_blur_2d(x0, kernel_size=9, sigma=sigma) + blurred = blurred * mask + x0 * (1 - mask) + return blurred + +def gaussian_blur_2d(img, kernel_size, sigma): + ksize_half = (kernel_size - 1) * 0.5 + + x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) + + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + + x_kernel = pdf / pdf.sum() + x_kernel = x_kernel.to(device=img.device, dtype=img.dtype) + + kernel2d = torch.mm(x_kernel[:, None], x_kernel[None, :]) + kernel2d = kernel2d.expand(img.shape[-3], 1, kernel2d.shape[0], kernel2d.shape[1]) + + padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2] + + img = F.pad(img, padding, mode="reflect") + img = F.conv2d(img, kernel2d, groups=img.shape[-3]) + return img + +class SelfAttentionGuidance: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "scale": ("FLOAT", {"default": 0.5, "min": -2.0, "max": 5.0, "step": 0.1}), + "blur_sigma": ("FLOAT", {"default": 2.0, "min": 0.0, "max": 10.0, "step": 0.1}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing" + + def patch(self, model, scale, blur_sigma): + m = model.clone() + + attn_scores = None + + # TODO: make this work properly with chunked batches + # currently, we can only save the attn from one UNet call + def attn_and_record(q, k, v, extra_options): + nonlocal attn_scores + # if uncond, save the attention scores + heads = extra_options["n_heads"] + cond_or_uncond = extra_options["cond_or_uncond"] + b = q.shape[0] // len(cond_or_uncond) + if 1 in cond_or_uncond: + uncond_index = cond_or_uncond.index(1) + # do the entire attention operation, but save the attention scores to attn_scores + (out, sim) = attention_basic_with_sim(q, k, v, heads=heads) + # when using a higher batch size, I BELIEVE the result batch dimension is [uc1, ... ucn, c1, ... cn] + n_slices = heads * b + attn_scores = sim[n_slices * uncond_index:n_slices * (uncond_index+1)] + return out + else: + return optimized_attention(q, k, v, heads=heads) + + def post_cfg_function(args): + nonlocal attn_scores + uncond_attn = attn_scores + + sag_scale = scale + sag_sigma = blur_sigma + sag_threshold = 1.0 + model = args["model"] + uncond_pred = args["uncond_denoised"] + uncond = args["uncond"] + cfg_result = args["denoised"] + sigma = args["sigma"] + model_options = args["model_options"] + x = args["input"] + if min(cfg_result.shape[2:]) <= 4: #skip when too small to add padding + return cfg_result + + # create the adversarially blurred image + degraded = create_blur_map(uncond_pred, uncond_attn, sag_sigma, sag_threshold) + degraded_noised = degraded + x - uncond_pred + # call into the UNet + (sag, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, uncond, None, degraded_noised, sigma, model_options) + return cfg_result + (degraded - sag) * sag_scale + + m.set_model_sampler_post_cfg_function(post_cfg_function, disable_cfg1_optimization=True) + + # from diffusers: + # unet.mid_block.attentions[0].transformer_blocks[0].attn1.patch + m.set_model_attn1_replace(attn_and_record, "middle", 0, 0) + + return (m, ) + +NODE_CLASS_MAPPINGS = { + "SelfAttentionGuidance": SelfAttentionGuidance, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "SelfAttentionGuidance": "Self-Attention Guidance", +} diff --git a/ldm_patched/contrib/external_sdupscale.py b/ldm_patched/contrib/external_sdupscale.py new file mode 100644 index 0000000000000000000000000000000000000000..0917fa1d69a562cc776b545d80da419eff83f933 --- /dev/null +++ b/ldm_patched/contrib/external_sdupscale.py @@ -0,0 +1,51 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +import ldm_patched.contrib.external +import ldm_patched.modules.utils + +class SD_4XUpscale_Conditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": { "images": ("IMAGE",), + "positive": ("CONDITIONING",), + "negative": ("CONDITIONING",), + "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), + }} + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + + FUNCTION = "encode" + + CATEGORY = "conditioning/upscale_diffusion" + + def encode(self, images, positive, negative, scale_ratio, noise_augmentation): + width = max(1, round(images.shape[-2] * scale_ratio)) + height = max(1, round(images.shape[-3] * scale_ratio)) + + pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center") + + out_cp = [] + out_cn = [] + + for t in positive: + n = [t[0], t[1].copy()] + n[1]['concat_image'] = pixels + n[1]['noise_augmentation'] = noise_augmentation + out_cp.append(n) + + for t in negative: + n = [t[0], t[1].copy()] + n[1]['concat_image'] = pixels + n[1]['noise_augmentation'] = noise_augmentation + out_cn.append(n) + + latent = torch.zeros([images.shape[0], 4, height // 4, width // 4]) + return (out_cp, out_cn, {"samples":latent}) + +NODE_CLASS_MAPPINGS = { + "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning, +} diff --git a/ldm_patched/contrib/external_stable3d.py b/ldm_patched/contrib/external_stable3d.py new file mode 100644 index 0000000000000000000000000000000000000000..bae2623fada0efa2d70f93af9bf475e8a619ebb4 --- /dev/null +++ b/ldm_patched/contrib/external_stable3d.py @@ -0,0 +1,104 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +import torch +import ldm_patched.contrib.external +import ldm_patched.modules.utils + +def camera_embeddings(elevation, azimuth): + elevation = torch.as_tensor([elevation]) + azimuth = torch.as_tensor([azimuth]) + embeddings = torch.stack( + [ + torch.deg2rad( + (90 - elevation) - (90) + ), # Zero123 polar is 90-elevation + torch.sin(torch.deg2rad(azimuth)), + torch.cos(torch.deg2rad(azimuth)), + torch.deg2rad( + 90 - torch.full_like(elevation, 0) + ), + ], dim=-1).unsqueeze(1) + + return embeddings + + +class StableZero123_Conditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_vision": ("CLIP_VISION",), + "init_image": ("IMAGE",), + "vae": ("VAE",), + "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + }} + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + + FUNCTION = "encode" + + CATEGORY = "conditioning/3d_models" + + def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth): + output = clip_vision.encode_image(init_image) + pooled = output.image_embeds.unsqueeze(0) + pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) + encode_pixels = pixels[:,:,:,:3] + t = vae.encode(encode_pixels) + cam_embeds = camera_embeddings(elevation, azimuth) + cond = torch.cat([pooled, cam_embeds.to(pooled.device).repeat((pooled.shape[0], 1, 1))], dim=-1) + + positive = [[cond, {"concat_latent_image": t}]] + negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] + latent = torch.zeros([batch_size, 4, height // 8, width // 8]) + return (positive, negative, {"samples":latent}) + +class StableZero123_Conditioning_Batched: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_vision": ("CLIP_VISION",), + "init_image": ("IMAGE",), + "vae": ("VAE",), + "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), + }} + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + + FUNCTION = "encode" + + CATEGORY = "conditioning/3d_models" + + def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment): + output = clip_vision.encode_image(init_image) + pooled = output.image_embeds.unsqueeze(0) + pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) + encode_pixels = pixels[:,:,:,:3] + t = vae.encode(encode_pixels) + + cam_embeds = [] + for i in range(batch_size): + cam_embeds.append(camera_embeddings(elevation, azimuth)) + elevation += elevation_batch_increment + azimuth += azimuth_batch_increment + + cam_embeds = torch.cat(cam_embeds, dim=0) + cond = torch.cat([ldm_patched.modules.utils.repeat_to_batch_size(pooled, batch_size), cam_embeds], dim=-1) + + positive = [[cond, {"concat_latent_image": t}]] + negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] + latent = torch.zeros([batch_size, 4, height // 8, width // 8]) + return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size}) + + +NODE_CLASS_MAPPINGS = { + "StableZero123_Conditioning": StableZero123_Conditioning, + "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched, +} diff --git a/ldm_patched/contrib/external_tomesd.py b/ldm_patched/contrib/external_tomesd.py new file mode 100644 index 0000000000000000000000000000000000000000..1fceef8ac0746a3fc8d5bff223a2247d0a3ec1a1 --- /dev/null +++ b/ldm_patched/contrib/external_tomesd.py @@ -0,0 +1,164 @@ +# 1st edit: https://github.com/dbolya/tomesd +# 2nd edit: https://github.com/comfyanonymous/ComfyUI +# 3rd edit: Forge official + +import torch +from typing import Tuple, Callable +import math + +def do_nothing(x: torch.Tensor, mode:str=None): + return x + + +def mps_gather_workaround(input, dim, index): + if input.shape[-1] == 1: + return torch.gather( + input.unsqueeze(-1), + dim - 1 if dim < 0 else dim, + index.unsqueeze(-1) + ).squeeze(-1) + else: + return torch.gather(input, dim, index) + + +def bipartite_soft_matching_random2d(metric: torch.Tensor, + w: int, h: int, sx: int, sy: int, r: int, + no_rand: bool = False) -> Tuple[Callable, Callable]: + """ + Partitions the tokens into src and dst and merges r tokens from src to dst. + Dst tokens are partitioned by choosing one randomy in each (sx, sy) region. + Args: + - metric [B, N, C]: metric to use for similarity + - w: image width in tokens + - h: image height in tokens + - sx: stride in the x dimension for dst, must divide w + - sy: stride in the y dimension for dst, must divide h + - r: number of tokens to remove (by merging) + - no_rand: if true, disable randomness (use top left corner only) + """ + B, N, _ = metric.shape + + if r <= 0 or w == 1 or h == 1: + return do_nothing, do_nothing + + gather = mps_gather_workaround if metric.device.type == "mps" else torch.gather + + with torch.no_grad(): + + hsy, wsx = h // sy, w // sx + + # For each sy by sx kernel, randomly assign one token to be dst and the rest src + if no_rand: + rand_idx = torch.zeros(hsy, wsx, 1, device=metric.device, dtype=torch.int64) + else: + rand_idx = torch.randint(sy*sx, size=(hsy, wsx, 1), device=metric.device) + + # The image might not divide sx and sy, so we need to work on a view of the top left if the idx buffer instead + idx_buffer_view = torch.zeros(hsy, wsx, sy*sx, device=metric.device, dtype=torch.int64) + idx_buffer_view.scatter_(dim=2, index=rand_idx, src=-torch.ones_like(rand_idx, dtype=rand_idx.dtype)) + idx_buffer_view = idx_buffer_view.view(hsy, wsx, sy, sx).transpose(1, 2).reshape(hsy * sy, wsx * sx) + + # Image is not divisible by sx or sy so we need to move it into a new buffer + if (hsy * sy) < h or (wsx * sx) < w: + idx_buffer = torch.zeros(h, w, device=metric.device, dtype=torch.int64) + idx_buffer[:(hsy * sy), :(wsx * sx)] = idx_buffer_view + else: + idx_buffer = idx_buffer_view + + # We set dst tokens to be -1 and src to be 0, so an argsort gives us dst|src indices + rand_idx = idx_buffer.reshape(1, -1, 1).argsort(dim=1) + + # We're finished with these + del idx_buffer, idx_buffer_view + + # rand_idx is currently dst|src, so split them + num_dst = hsy * wsx + a_idx = rand_idx[:, num_dst:, :] # src + b_idx = rand_idx[:, :num_dst, :] # dst + + def split(x): + C = x.shape[-1] + src = gather(x, dim=1, index=a_idx.expand(B, N - num_dst, C)) + dst = gather(x, dim=1, index=b_idx.expand(B, num_dst, C)) + return src, dst + + # Cosine similarity between A and B + metric = metric / metric.norm(dim=-1, keepdim=True) + a, b = split(metric) + scores = a @ b.transpose(-1, -2) + + # Can't reduce more than the # tokens in src + r = min(a.shape[1], r) + + # Find the most similar greedily + node_max, node_idx = scores.max(dim=-1) + edge_idx = node_max.argsort(dim=-1, descending=True)[..., None] + + unm_idx = edge_idx[..., r:, :] # Unmerged Tokens + src_idx = edge_idx[..., :r, :] # Merged Tokens + dst_idx = gather(node_idx[..., None], dim=-2, index=src_idx) + + def merge(x: torch.Tensor, mode="mean") -> torch.Tensor: + src, dst = split(x) + n, t1, c = src.shape + + unm = gather(src, dim=-2, index=unm_idx.expand(n, t1 - r, c)) + src = gather(src, dim=-2, index=src_idx.expand(n, r, c)) + dst = dst.scatter_reduce(-2, dst_idx.expand(n, r, c), src, reduce=mode) + + return torch.cat([unm, dst], dim=1) + + def unmerge(x: torch.Tensor) -> torch.Tensor: + unm_len = unm_idx.shape[1] + unm, dst = x[..., :unm_len, :], x[..., unm_len:, :] + _, _, c = unm.shape + + src = gather(dst, dim=-2, index=dst_idx.expand(B, r, c)) + + # Combine back to the original shape + out = torch.zeros(B, N, c, device=x.device, dtype=x.dtype) + out.scatter_(dim=-2, index=b_idx.expand(B, num_dst, c), src=dst) + out.scatter_(dim=-2, index=gather(a_idx.expand(B, a_idx.shape[1], 1), dim=1, index=unm_idx).expand(B, unm_len, c), src=unm) + out.scatter_(dim=-2, index=gather(a_idx.expand(B, a_idx.shape[1], 1), dim=1, index=src_idx).expand(B, r, c), src=src) + + return out + + return merge, unmerge + + +def get_functions(x, ratio, original_shape): + b, c, original_h, original_w = original_shape + original_tokens = original_h * original_w + downsample = int(math.ceil(math.sqrt(original_tokens // x.shape[1]))) + stride_x = 2 + stride_y = 2 + max_downsample = 1 + + if downsample <= max_downsample: + w = int(math.ceil(original_w / downsample)) + h = int(math.ceil(original_h / downsample)) + r = int(x.shape[1] * ratio) + no_rand = False + m, u = bipartite_soft_matching_random2d(x, w, h, stride_x, stride_y, r, no_rand) + return m, u + + nothing = lambda y: y + return nothing, nothing + + +class TomePatcher: + def __init__(self): + self.u = None + + def patch(self, model, ratio): + def tomesd_m(q, k, v, extra_options): + m, self.u = get_functions(q, ratio, extra_options["original_shape"]) + return m(q), k, v + + def tomesd_u(n, extra_options): + return self.u(n) + + m = model.clone() + m.set_model_attn1_patch(tomesd_m) + m.set_model_attn1_output_patch(tomesd_u) + return m diff --git a/ldm_patched/contrib/external_upscale_model.py b/ldm_patched/contrib/external_upscale_model.py new file mode 100644 index 0000000000000000000000000000000000000000..674d8561b1e578c1c80d61a9818f4478bb272323 --- /dev/null +++ b/ldm_patched/contrib/external_upscale_model.py @@ -0,0 +1,70 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import os +from ldm_patched.pfn import model_loading +from ldm_patched.modules import model_management +import torch +import ldm_patched.modules.utils +import ldm_patched.utils.path_utils + +class UpscaleModelLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ), + }} + RETURN_TYPES = ("UPSCALE_MODEL",) + FUNCTION = "load_model" + + CATEGORY = "loaders" + + def load_model(self, model_name): + model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name) + sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True) + if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: + sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""}) + out = model_loading.load_state_dict(sd).eval() + return (out, ) + + +class ImageUpscaleWithModel: + @classmethod + def INPUT_TYPES(s): + return {"required": { "upscale_model": ("UPSCALE_MODEL",), + "image": ("IMAGE",), + }} + RETURN_TYPES = ("IMAGE",) + FUNCTION = "upscale" + + CATEGORY = "image/upscaling" + + def upscale(self, upscale_model, image): + device = model_management.get_torch_device() + upscale_model.to(device) + in_img = image.movedim(-1,-3).to(device) + free_memory = model_management.get_free_memory(device) + + tile = 512 + overlap = 32 + + oom = True + while oom: + try: + steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) + pbar = ldm_patched.modules.utils.ProgressBar(steps) + s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) + oom = False + except model_management.OOM_EXCEPTION as e: + tile //= 2 + if tile < 128: + raise e + + upscale_model.cpu() + s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) + return (s,) + +NODE_CLASS_MAPPINGS = { + "UpscaleModelLoader": UpscaleModelLoader, + "ImageUpscaleWithModel": ImageUpscaleWithModel +} diff --git a/ldm_patched/contrib/external_video_model.py b/ldm_patched/contrib/external_video_model.py new file mode 100644 index 0000000000000000000000000000000000000000..35c741e00be1fcd2686b9001a7d706150804ea23 --- /dev/null +++ b/ldm_patched/contrib/external_video_model.py @@ -0,0 +1,110 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import ldm_patched.contrib.external +import torch +import ldm_patched.modules.utils +import ldm_patched.modules.sd +import ldm_patched.utils.path_utils +import ldm_patched.contrib.external_model_merging + + +class ImageOnlyCheckpointLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ), + }} + RETURN_TYPES = ("MODEL", "CLIP_VISION", "VAE") + FUNCTION = "load_checkpoint" + + CATEGORY = "loaders/video_models" + + def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): + ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) + out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=False, output_clipvision=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) + return (out[0], out[3], out[2]) + + +class SVD_img2vid_Conditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_vision": ("CLIP_VISION",), + "init_image": ("IMAGE",), + "vae": ("VAE",), + "width": ("INT", {"default": 1024, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 576, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), + "video_frames": ("INT", {"default": 14, "min": 1, "max": 4096}), + "motion_bucket_id": ("INT", {"default": 127, "min": 1, "max": 1023}), + "fps": ("INT", {"default": 6, "min": 1, "max": 1024}), + "augmentation_level": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}) + }} + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level): + output = clip_vision.encode_image(init_image) + pooled = output.image_embeds.unsqueeze(0) + pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) + encode_pixels = pixels[:,:,:,:3] + if augmentation_level > 0: + encode_pixels += torch.randn_like(pixels) * augmentation_level + t = vae.encode(encode_pixels) + positive = [[pooled, {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": t}]] + negative = [[torch.zeros_like(pooled), {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": torch.zeros_like(t)}]] + latent = torch.zeros([video_frames, 4, height // 8, width // 8]) + return (positive, negative, {"samples":latent}) + +class VideoLinearCFGGuidance: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "min_cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "sampling/video_models" + + def patch(self, model, min_cfg): + def linear_cfg(args): + cond = args["cond"] + uncond = args["uncond"] + cond_scale = args["cond_scale"] + + scale = torch.linspace(min_cfg, cond_scale, cond.shape[0], device=cond.device).reshape((cond.shape[0], 1, 1, 1)) + return uncond + scale * (cond - uncond) + + m = model.clone() + m.set_model_sampler_cfg_function(linear_cfg) + return (m, ) + +class ImageOnlyCheckpointSave(ldm_patched.contrib.external_model_merging.CheckpointSave): + CATEGORY = "_for_testing" + + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "clip_vision": ("CLIP_VISION",), + "vae": ("VAE",), + "filename_prefix": ("STRING", {"default": "checkpoints/ldm_patched"}),}, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} + + def save(self, model, clip_vision, vae, filename_prefix, prompt=None, extra_pnginfo=None): + ldm_patched.contrib.external_model_merging.save_checkpoint(model, clip_vision=clip_vision, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) + return {} + +NODE_CLASS_MAPPINGS = { + "ImageOnlyCheckpointLoader": ImageOnlyCheckpointLoader, + "SVD_img2vid_Conditioning": SVD_img2vid_Conditioning, + "VideoLinearCFGGuidance": VideoLinearCFGGuidance, + "ImageOnlyCheckpointSave": ImageOnlyCheckpointSave, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "ImageOnlyCheckpointLoader": "Image Only Checkpoint Loader (img2vid model)", +} diff --git a/ldm_patched/controlnet/cldm.py b/ldm_patched/controlnet/cldm.py new file mode 100644 index 0000000000000000000000000000000000000000..82265ef955f6384895f0b530870195da7866fdbc --- /dev/null +++ b/ldm_patched/controlnet/cldm.py @@ -0,0 +1,312 @@ +#taken from: https://github.com/lllyasviel/ControlNet +#and modified + +import torch +import torch as th +import torch.nn as nn + +from ldm_patched.ldm.modules.diffusionmodules.util import ( + zero_module, + timestep_embedding, +) + +from ldm_patched.ldm.modules.attention import SpatialTransformer +from ldm_patched.ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample +from ldm_patched.ldm.util import exists +import ldm_patched.modules.ops + +class ControlledUnetModel(UNetModel): + #implemented in the ldm unet + pass + +class ControlNet(nn.Module): + def __init__( + self, + image_size, + in_channels, + model_channels, + hint_channels, + num_res_blocks, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + dtype=torch.float32, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + adm_in_channels=None, + transformer_depth_middle=None, + transformer_depth_output=None, + device=None, + operations=ldm_patched.modules.ops.disable_weight_init, + **kwargs, + ): + super().__init__() + assert use_spatial_transformer == True, "use_spatial_transformer has to be true" + if use_spatial_transformer: + assert context_dim is not None, 'Fool!! You forgot to include the dimension of your cross-attention conditioning...' + + if context_dim is not None: + assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + # from omegaconf.listconfig import ListConfig + # if type(context_dim) == ListConfig: + # context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.dims = dims + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + + if isinstance(num_res_blocks, int): + self.num_res_blocks = len(channel_mult) * [num_res_blocks] + else: + if len(num_res_blocks) != len(channel_mult): + raise ValueError("provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult") + self.num_res_blocks = num_res_blocks + + if disable_self_attentions is not None: + # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not + assert len(disable_self_attentions) == len(channel_mult) + if num_attention_blocks is not None: + assert len(num_attention_blocks) == len(self.num_res_blocks) + assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks)))) + + transformer_depth = transformer_depth[:] + + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = dtype + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + operations.Linear(model_channels, time_embed_dim, dtype=self.dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), + ) + + if self.num_classes is not None: + if isinstance(self.num_classes, int): + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + elif self.num_classes == "continuous": + print("setting up linear c_adm embedding layer") + self.label_emb = nn.Linear(1, time_embed_dim) + elif self.num_classes == "sequential": + assert adm_in_channels is not None + self.label_emb = nn.Sequential( + nn.Sequential( + operations.Linear(adm_in_channels, time_embed_dim, dtype=self.dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), + ) + ) + else: + raise ValueError() + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + operations.conv_nd(dims, in_channels, model_channels, 3, padding=1, dtype=self.dtype, device=device) + ) + ] + ) + self.zero_convs = nn.ModuleList([self.make_zero_conv(model_channels, operations=operations, dtype=self.dtype, device=device)]) + + self.input_hint_block = TimestepEmbedSequential( + operations.conv_nd(dims, hint_channels, 16, 3, padding=1, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 16, 16, 3, padding=1, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 16, 32, 3, padding=1, stride=2, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 32, 32, 3, padding=1, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 32, 96, 3, padding=1, stride=2, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 96, 96, 3, padding=1, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 96, 256, 3, padding=1, stride=2, dtype=self.dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, 256, model_channels, 3, padding=1, dtype=self.dtype, device=device) + ) + + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for nr in range(self.num_res_blocks[level]): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations, + ) + ] + ch = mult * model_channels + num_transformers = transformer_depth.pop(0) + if num_transformers > 0: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + #num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: + layers.append( + SpatialTransformer( + ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim, + disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, dtype=self.dtype, device=device, operations=operations + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self.zero_convs.append(self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + dtype=self.dtype, + device=device, + operations=operations + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + self.zero_convs.append(self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device)) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + #num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + mid_block = [ + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations + )] + if transformer_depth_middle >= 0: + mid_block += [SpatialTransformer( # always uses a self-attn + ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, dtype=self.dtype, device=device, operations=operations + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations + )] + self.middle_block = TimestepEmbedSequential(*mid_block) + self.middle_block_out = self.make_zero_conv(ch, operations=operations, dtype=self.dtype, device=device) + self._feature_size += ch + + def make_zero_conv(self, channels, operations=None, dtype=None, device=None): + return TimestepEmbedSequential(operations.conv_nd(self.dims, channels, channels, 1, padding=0, dtype=dtype, device=device)) + + def forward(self, x, hint, timesteps, context, y=None, **kwargs): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype) + emb = self.time_embed(t_emb) + + guided_hint = self.input_hint_block(hint, emb, context) + + outs = [] + + hs = [] + if self.num_classes is not None: + assert y.shape[0] == x.shape[0] + emb = emb + self.label_emb(y) + + h = x + for module, zero_conv in zip(self.input_blocks, self.zero_convs): + if guided_hint is not None: + h = module(h, emb, context) + h += guided_hint + guided_hint = None + else: + h = module(h, emb, context) + outs.append(zero_conv(h, emb, context)) + + h = self.middle_block(h, emb, context) + outs.append(self.middle_block_out(h, emb, context)) + + return outs + diff --git a/ldm_patched/k_diffusion/sampling.py b/ldm_patched/k_diffusion/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..6f2fbea75803868fbba485e766fcca5e6e0ea6af --- /dev/null +++ b/ldm_patched/k_diffusion/sampling.py @@ -0,0 +1,814 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import math + +from scipy import integrate +import torch +from torch import nn +import torchsde +from tqdm.auto import trange, tqdm + +from . import utils + + +def append_zero(x): + return torch.cat([x, x.new_zeros([1])]) + + +def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'): + """Constructs the noise schedule of Karras et al. (2022).""" + ramp = torch.linspace(0, 1, n, device=device) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return append_zero(sigmas).to(device) + + +def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'): + """Constructs an exponential noise schedule.""" + sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), n, device=device).exp() + return append_zero(sigmas) + + +def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1., device='cpu'): + """Constructs an polynomial in log sigma noise schedule.""" + ramp = torch.linspace(1, 0, n, device=device) ** rho + sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min)) + return append_zero(sigmas) + + +def get_sigmas_vp(n, beta_d=19.9, beta_min=0.1, eps_s=1e-3, device='cpu'): + """Constructs a continuous VP noise schedule.""" + t = torch.linspace(1, eps_s, n, device=device) + sigmas = torch.sqrt(torch.exp(beta_d * t ** 2 / 2 + beta_min * t) - 1) + return append_zero(sigmas) + + +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / utils.append_dims(sigma, x.ndim) + + +def get_ancestral_step(sigma_from, sigma_to, eta=1.): + """Calculates the noise level (sigma_down) to step down to and the amount + of noise to add (sigma_up) when doing an ancestral sampling step.""" + if not eta: + return sigma_to, 0. + sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5) + sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5 + return sigma_down, sigma_up + + +def default_noise_sampler(x): + return lambda sigma, sigma_next: torch.randn_like(x) + + +class BatchedBrownianTree: + """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" + + def __init__(self, x, t0, t1, seed=None, **kwargs): + self.cpu_tree = True + if "cpu" in kwargs: + self.cpu_tree = kwargs.pop("cpu") + t0, t1, self.sign = self.sort(t0, t1) + w0 = kwargs.get('w0', torch.zeros_like(x)) + if seed is None: + seed = torch.randint(0, 2 ** 63 - 1, []).item() + self.batched = True + try: + assert len(seed) == x.shape[0] + w0 = w0[0] + except TypeError: + seed = [seed] + self.batched = False + if self.cpu_tree: + self.trees = [torchsde.BrownianTree(t0.cpu(), w0.cpu(), t1.cpu(), entropy=s, **kwargs) for s in seed] + else: + self.trees = [torchsde.BrownianTree(t0, w0, t1, entropy=s, **kwargs) for s in seed] + + @staticmethod + def sort(a, b): + return (a, b, 1) if a < b else (b, a, -1) + + def __call__(self, t0, t1): + t0, t1, sign = self.sort(t0, t1) + if self.cpu_tree: + w = torch.stack([tree(t0.cpu().float(), t1.cpu().float()).to(t0.dtype).to(t0.device) for tree in self.trees]) * (self.sign * sign) + else: + w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign) + + return w if self.batched else w[0] + + +class BrownianTreeNoiseSampler: + """A noise sampler backed by a torchsde.BrownianTree. + + Args: + x (Tensor): The tensor whose shape, device and dtype to use to generate + random samples. + sigma_min (float): The low end of the valid interval. + sigma_max (float): The high end of the valid interval. + seed (int or List[int]): The random seed. If a list of seeds is + supplied instead of a single integer, then the noise sampler will + use one BrownianTree per batch item, each with its own seed. + transform (callable): A function that maps sigma to the sampler's + internal timestep. + """ + + def __init__(self, x, sigma_min, sigma_max, seed=None, transform=lambda x: x, cpu=False): + self.transform = transform + t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max)) + self.tree = BatchedBrownianTree(x, t0, t1, seed, cpu=cpu) + + def __call__(self, sigma, sigma_next): + t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next)) + return self.tree(t0, t1) / (t1 - t0).abs().sqrt() + + +@torch.no_grad() +def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): + """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + eps = torch.randn_like(x) * s_noise + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + # Euler method + x = x + d * dt + return x + + +@torch.no_grad() +def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + """Ancestral sampling with Euler method steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): + """Implements Algorithm 2 (Heun steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + eps = torch.randn_like(x) * s_noise + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + if sigmas[i + 1] == 0: + # Euler method + x = x + d * dt + else: + # Heun's method + x_2 = x + d * dt + denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) + d_2 = to_d(x_2, sigmas[i + 1], denoised_2) + d_prime = (d + d_2) / 2 + x = x + d_prime * dt + return x + + +@torch.no_grad() +def sample_dpm_2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): + """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + eps = torch.randn_like(x) * s_noise + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + dt = sigmas[i + 1] - sigma_hat + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigma_hat.log().lerp(sigmas[i + 1].log(), 0.5).exp() + dt_1 = sigma_mid - sigma_hat + dt_2 = sigmas[i + 1] - sigma_hat + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + return x + + +@torch.no_grad() +def sample_dpm_2_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + """Ancestral sampling with DPM-Solver second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + d = to_d(x, sigmas[i], denoised) + if sigma_down == 0: + # Euler method + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver-2 + sigma_mid = sigmas[i].log().lerp(sigma_down.log(), 0.5).exp() + dt_1 = sigma_mid - sigmas[i] + dt_2 = sigma_down - sigmas[i] + x_2 = x + d * dt_1 + denoised_2 = model(x_2, sigma_mid * s_in, **extra_args) + d_2 = to_d(x_2, sigma_mid, denoised_2) + x = x + d_2 * dt_2 + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +def linear_multistep_coeff(order, t, i, j): + if order - 1 > i: + raise ValueError(f'Order {order} too high for step {i}') + def fn(tau): + prod = 1. + for k in range(order): + if j == k: + continue + prod *= (tau - t[i - k]) / (t[i - j] - t[i - k]) + return prod + return integrate.quad(fn, t[i], t[i + 1], epsrel=1e-4)[0] + + +@torch.no_grad() +def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, order=4): + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigmas_cpu = sigmas.detach().cpu().numpy() + ds = [] + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + d = to_d(x, sigmas[i], denoised) + ds.append(d) + if len(ds) > order: + ds.pop(0) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + cur_order = min(i + 1, order) + coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)] + x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds))) + return x + + +class PIDStepSizeController: + """A PID controller for ODE adaptive step size control.""" + def __init__(self, h, pcoeff, icoeff, dcoeff, order=1, accept_safety=0.81, eps=1e-8): + self.h = h + self.b1 = (pcoeff + icoeff + dcoeff) / order + self.b2 = -(pcoeff + 2 * dcoeff) / order + self.b3 = dcoeff / order + self.accept_safety = accept_safety + self.eps = eps + self.errs = [] + + def limiter(self, x): + return 1 + math.atan(x - 1) + + def propose_step(self, error): + inv_error = 1 / (float(error) + self.eps) + if not self.errs: + self.errs = [inv_error, inv_error, inv_error] + self.errs[0] = inv_error + factor = self.errs[0] ** self.b1 * self.errs[1] ** self.b2 * self.errs[2] ** self.b3 + factor = self.limiter(factor) + accept = factor >= self.accept_safety + if accept: + self.errs[2] = self.errs[1] + self.errs[1] = self.errs[0] + self.h *= factor + return accept + + +class DPMSolver(nn.Module): + """DPM-Solver. See https://arxiv.org/abs/2206.00927.""" + + def __init__(self, model, extra_args=None, eps_callback=None, info_callback=None): + super().__init__() + self.model = model + self.extra_args = {} if extra_args is None else extra_args + self.eps_callback = eps_callback + self.info_callback = info_callback + + def t(self, sigma): + return -sigma.log() + + def sigma(self, t): + return t.neg().exp() + + def eps(self, eps_cache, key, x, t, *args, **kwargs): + if key in eps_cache: + return eps_cache[key], eps_cache + sigma = self.sigma(t) * x.new_ones([x.shape[0]]) + eps = (x - self.model(x, sigma, *args, **self.extra_args, **kwargs)) / self.sigma(t) + if self.eps_callback is not None: + self.eps_callback() + return eps, {key: eps, **eps_cache} + + def dpm_solver_1_step(self, x, t, t_next, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + x_1 = x - self.sigma(t_next) * h.expm1() * eps + return x_1, eps_cache + + def dpm_solver_2_step(self, x, t, t_next, r1=1 / 2, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + x_2 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / (2 * r1) * h.expm1() * (eps_r1 - eps) + return x_2, eps_cache + + def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None): + eps_cache = {} if eps_cache is None else eps_cache + h = t_next - t + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + s1 = t + r1 * h + s2 = t + r2 * h + u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps + eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1) + u2 = x - self.sigma(s2) * (r2 * h).expm1() * eps - self.sigma(s2) * (r2 / r1) * ((r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps) + eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2) + x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps) + return x_3, eps_cache + + def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_sampler=None): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if not t_end > t_start and eta: + raise ValueError('eta must be 0 for reverse sampling') + + m = math.floor(nfe / 3) + 1 + ts = torch.linspace(t_start, t_end, m + 1, device=x.device) + + if nfe % 3 == 0: + orders = [3] * (m - 2) + [2, 1] + else: + orders = [3] * (m - 1) + [nfe % 3] + + for i in range(len(orders)): + eps_cache = {} + t, t_next = ts[i], ts[i + 1] + if eta: + sd, su = get_ancestral_step(self.sigma(t), self.sigma(t_next), eta) + t_next_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5 + else: + t_next_, su = t_next, 0. + + eps, eps_cache = self.eps(eps_cache, 'eps', x, t) + denoised = x - self.sigma(t) * eps + if self.info_callback is not None: + self.info_callback({'x': x, 'i': i, 't': ts[i], 't_up': t, 'denoised': denoised}) + + if orders[i] == 1: + x, eps_cache = self.dpm_solver_1_step(x, t, t_next_, eps_cache=eps_cache) + elif orders[i] == 2: + x, eps_cache = self.dpm_solver_2_step(x, t, t_next_, eps_cache=eps_cache) + else: + x, eps_cache = self.dpm_solver_3_step(x, t, t_next_, eps_cache=eps_cache) + + x = x + su * s_noise * noise_sampler(self.sigma(t), self.sigma(t_next)) + + return x + + def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None): + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + if order not in {2, 3}: + raise ValueError('order should be 2 or 3') + forward = t_end > t_start + if not forward and eta: + raise ValueError('eta must be 0 for reverse sampling') + h_init = abs(h_init) * (1 if forward else -1) + atol = torch.tensor(atol) + rtol = torch.tensor(rtol) + s = t_start + x_prev = x + accept = True + pid = PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) + info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} + + while s < t_end - 1e-5 if forward else s > t_end + 1e-5: + eps_cache = {} + t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) + if eta: + sd, su = get_ancestral_step(self.sigma(s), self.sigma(t), eta) + t_ = torch.minimum(t_end, self.t(sd)) + su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 + else: + t_, su = t, 0. + + eps, eps_cache = self.eps(eps_cache, 'eps', x, s) + denoised = x - self.sigma(s) * eps + + if order == 2: + x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) + else: + x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) + x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) + delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) + error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 + accept = pid.propose_step(error) + if accept: + x_prev = x_low + x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) + s = t + info['n_accept'] += 1 + else: + info['n_reject'] += 1 + info['nfe'] += order + info['steps'] += 1 + + if self.info_callback is not None: + self.info_callback({'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, 'h': pid.h, **info}) + + return x, info + + +@torch.no_grad() +def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., noise_sampler=None): + """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(total=n, disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) + return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max)), dpm_solver.t(torch.tensor(sigma_min)), n, eta, s_noise, noise_sampler) + + +@torch.no_grad() +def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None, return_info=False): + """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" + if sigma_min <= 0 or sigma_max <= 0: + raise ValueError('sigma_min and sigma_max must not be 0') + with tqdm(disable=disable) as pbar: + dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update) + if callback is not None: + dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) + x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max)), dpm_solver.t(torch.tensor(sigma_min)), order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler) + if return_info: + return x, info + return x + + +@torch.no_grad() +def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + """Ancestral sampling with DPM-Solver++(2S) second-order steps.""" + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigma_down == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigma_down - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++(2S) + t, t_next = t_fn(sigmas[i]), t_fn(sigma_down) + r = 1 / 2 + h = t_next - t + s = t + r * h + x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_2 + # Noise addition + if sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + return x + + +@torch.no_grad() +def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): + """DPM-Solver++ (stochastic).""" + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + seed = extra_args.get("seed", None) + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Euler method + d = to_d(x, sigmas[i], denoised) + dt = sigmas[i + 1] - sigmas[i] + x = x + d * dt + else: + # DPM-Solver++ + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + s = t + h * r + fac = 1 / (2 * r) + + # Step 1 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta) + s_ = t_fn(sd) + x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised + x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su + denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args) + + # Step 2 + sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta) + t_next_ = t_fn(sd) + denoised_d = (1 - fac) * denoised + fac * denoised_2 + x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d + x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su + return x + + +@torch.no_grad() +def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): + """DPM-Solver++(2M).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + sigma_fn = lambda t: t.neg().exp() + t_fn = lambda sigma: sigma.log().neg() + old_denoised = None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1]) + h = t_next - t + if old_denoised is None or sigmas[i + 1] == 0: + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised + else: + h_last = t - t_fn(sigmas[i - 1]) + r = h_last / h + denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d + old_denoised = denoised + return x + +@torch.no_grad() +def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): + """DPM-Solver++(2M) SDE.""" + + if solver_type not in {'heun', 'midpoint'}: + raise ValueError('solver_type must be \'heun\' or \'midpoint\'') + + seed = extra_args.get("seed", None) + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + + old_denoised = None + h_last = None + h = None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Denoising step + x = denoised + else: + # DPM-Solver++(2M) SDE + t, s = -sigmas[i].log(), -sigmas[i + 1].log() + h = s - t + eta_h = eta * h + + x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised + + if old_denoised is not None: + r = h_last / h + if solver_type == 'heun': + x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised) + elif solver_type == 'midpoint': + x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised) + + if eta: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise + + old_denoised = denoised + h_last = h + return x + +@torch.no_grad() +def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + """DPM-Solver++(3M) SDE.""" + + seed = extra_args.get("seed", None) + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + + denoised_1, denoised_2 = None, None + h, h_1, h_2 = None, None, None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Denoising step + x = denoised + else: + t, s = -sigmas[i].log(), -sigmas[i + 1].log() + h = s - t + h_eta = h * (eta + 1) + + x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised + + if h_2 is not None: + r0 = h_1 / h + r1 = h_2 / h + d1_0 = (denoised - denoised_1) / r0 + d1_1 = (denoised_1 - denoised_2) / r1 + d1 = d1_0 + (d1_0 - d1_1) * r0 / (r0 + r1) + d2 = (d1_0 - d1_1) / (r0 + r1) + phi_2 = h_eta.neg().expm1() / h_eta + 1 + phi_3 = phi_2 / h_eta - 0.5 + x = x + phi_2 * d1 - phi_3 * d2 + elif h_1 is not None: + r = h_1 / h + d = (denoised - denoised_1) / r + phi_2 = h_eta.neg().expm1() / h_eta + 1 + x = x + phi_2 * d + + if eta: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise + + denoised_1, denoised_2 = denoised, denoised_1 + h_1, h_2 = h, h_1 + return x + +@torch.no_grad() +def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler + return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler) + +@torch.no_grad() +def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler + return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type) + +@torch.no_grad() +def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler + return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r) + + +def DDPMSampler_step(x, sigma, sigma_prev, noise, noise_sampler): + alpha_cumprod = 1 / ((sigma * sigma) + 1) + alpha_cumprod_prev = 1 / ((sigma_prev * sigma_prev) + 1) + alpha = (alpha_cumprod / alpha_cumprod_prev) + + mu = (1.0 / alpha).sqrt() * (x - (1 - alpha) * noise / (1 - alpha_cumprod).sqrt()) + if sigma_prev > 0: + mu += ((1 - alpha) * (1. - alpha_cumprod_prev) / (1. - alpha_cumprod)).sqrt() * noise_sampler(sigma, sigma_prev) + return mu + +def generic_step_sampler(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, step_function=None): + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + x = step_function(x / torch.sqrt(1.0 + sigmas[i] ** 2.0), sigmas[i], sigmas[i + 1], (x - denoised) / sigmas[i], noise_sampler) + if sigmas[i + 1] != 0: + x *= torch.sqrt(1.0 + sigmas[i + 1] ** 2.0) + return x + + +@torch.no_grad() +def sample_ddpm(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None): + return generic_step_sampler(model, x, sigmas, extra_args, callback, disable, noise_sampler, DDPMSampler_step) + +@torch.no_grad() +def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None): + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + + x = denoised + if sigmas[i + 1] > 0: + x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) + return x + + + +@torch.no_grad() +def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): + # From MIT licensed: https://github.com/Carzit/sd-webui-samplers-scheduler/ + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + s_end = sigmas[-1] + for i in trange(len(sigmas) - 1, disable=disable): + gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0. + eps = torch.randn_like(x) * s_noise + sigma_hat = sigmas[i] * (gamma + 1) + if gamma > 0: + x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5 + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised}) + dt = sigmas[i + 1] - sigma_hat + if sigmas[i + 1] == s_end: + # Euler method + x = x + d * dt + elif sigmas[i + 2] == s_end: + + # Heun's method + x_2 = x + d * dt + denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) + d_2 = to_d(x_2, sigmas[i + 1], denoised_2) + + w = 2 * sigmas[0] + w2 = sigmas[i+1]/w + w1 = 1 - w2 + + d_prime = d * w1 + d_2 * w2 + + + x = x + d_prime * dt + + else: + # Heun++ + x_2 = x + d * dt + denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args) + d_2 = to_d(x_2, sigmas[i + 1], denoised_2) + dt_2 = sigmas[i + 2] - sigmas[i + 1] + + x_3 = x_2 + d_2 * dt_2 + denoised_3 = model(x_3, sigmas[i + 2] * s_in, **extra_args) + d_3 = to_d(x_3, sigmas[i + 2], denoised_3) + + w = 3 * sigmas[0] + w2 = sigmas[i + 1] / w + w3 = sigmas[i + 2] / w + w1 = 1 - w2 - w3 + + d_prime = w1 * d + w2 * d_2 + w3 * d_3 + x = x + d_prime * dt + return x diff --git a/ldm_patched/k_diffusion/utils.py b/ldm_patched/k_diffusion/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ed31768378995c9dbb0495a4348e19d783229800 --- /dev/null +++ b/ldm_patched/k_diffusion/utils.py @@ -0,0 +1,317 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +from contextlib import contextmanager +import hashlib +import math +from pathlib import Path +import shutil +import urllib +import warnings + +from PIL import Image +import torch +from torch import nn, optim +from torch.utils import data + + +def hf_datasets_augs_helper(examples, transform, image_key, mode='RGB'): + """Apply passed in transforms for HuggingFace Datasets.""" + images = [transform(image.convert(mode)) for image in examples[image_key]] + return {image_key: images} + + +def append_dims(x, target_dims): + """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" + dims_to_append = target_dims - x.ndim + if dims_to_append < 0: + raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') + expanded = x[(...,) + (None,) * dims_to_append] + # MPS will get inf values if it tries to index into the new axes, but detaching fixes this. + # https://github.com/pytorch/pytorch/issues/84364 + return expanded.detach().clone() if expanded.device.type == 'mps' else expanded + + +def n_params(module): + """Returns the number of trainable parameters in a module.""" + return sum(p.numel() for p in module.parameters()) + + +def download_file(path, url, digest=None): + """Downloads a file if it does not exist, optionally checking its SHA-256 hash.""" + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + if not path.exists(): + with urllib.request.urlopen(url) as response, open(path, 'wb') as f: + shutil.copyfileobj(response, f) + if digest is not None: + file_digest = hashlib.sha256(open(path, 'rb').read()).hexdigest() + if digest != file_digest: + raise OSError(f'hash of {path} (url: {url}) failed to validate') + return path + + +@contextmanager +def train_mode(model, mode=True): + """A context manager that places a model into training mode and restores + the previous mode on exit.""" + modes = [module.training for module in model.modules()] + try: + yield model.train(mode) + finally: + for i, module in enumerate(model.modules()): + module.training = modes[i] + + +def eval_mode(model): + """A context manager that places a model into evaluation mode and restores + the previous mode on exit.""" + return train_mode(model, False) + + +@torch.no_grad() +def ema_update(model, averaged_model, decay): + """Incorporates updated model parameters into an exponential moving averaged + version of a model. It should be called after each optimizer step.""" + model_params = dict(model.named_parameters()) + averaged_params = dict(averaged_model.named_parameters()) + assert model_params.keys() == averaged_params.keys() + + for name, param in model_params.items(): + averaged_params[name].mul_(decay).add_(param, alpha=1 - decay) + + model_buffers = dict(model.named_buffers()) + averaged_buffers = dict(averaged_model.named_buffers()) + assert model_buffers.keys() == averaged_buffers.keys() + + for name, buf in model_buffers.items(): + averaged_buffers[name].copy_(buf) + + +class EMAWarmup: + """Implements an EMA warmup using an inverse decay schedule. + If inv_gamma=1 and power=1, implements a simple average. inv_gamma=1, power=2/3 are + good values for models you plan to train for a million or more steps (reaches decay + factor 0.999 at 31.6K steps, 0.9999 at 1M steps), inv_gamma=1, power=3/4 for models + you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 at + 215.4k steps). + Args: + inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1. + power (float): Exponential factor of EMA warmup. Default: 1. + min_value (float): The minimum EMA decay rate. Default: 0. + max_value (float): The maximum EMA decay rate. Default: 1. + start_at (int): The epoch to start averaging at. Default: 0. + last_epoch (int): The index of last epoch. Default: 0. + """ + + def __init__(self, inv_gamma=1., power=1., min_value=0., max_value=1., start_at=0, + last_epoch=0): + self.inv_gamma = inv_gamma + self.power = power + self.min_value = min_value + self.max_value = max_value + self.start_at = start_at + self.last_epoch = last_epoch + + def state_dict(self): + """Returns the state of the class as a :class:`dict`.""" + return dict(self.__dict__.items()) + + def load_state_dict(self, state_dict): + """Loads the class's state. + Args: + state_dict (dict): scaler state. Should be an object returned + from a call to :meth:`state_dict`. + """ + self.__dict__.update(state_dict) + + def get_value(self): + """Gets the current EMA decay rate.""" + epoch = max(0, self.last_epoch - self.start_at) + value = 1 - (1 + epoch / self.inv_gamma) ** -self.power + return 0. if epoch < 0 else min(self.max_value, max(self.min_value, value)) + + def step(self): + """Updates the step count.""" + self.last_epoch += 1 + + +class InverseLR(optim.lr_scheduler._LRScheduler): + """Implements an inverse decay learning rate schedule with an optional exponential + warmup. When last_epoch=-1, sets initial lr as lr. + inv_gamma is the number of steps/epochs required for the learning rate to decay to + (1 / 2)**power of its original value. + Args: + optimizer (Optimizer): Wrapped optimizer. + inv_gamma (float): Inverse multiplicative factor of learning rate decay. Default: 1. + power (float): Exponential factor of learning rate decay. Default: 1. + warmup (float): Exponential warmup factor (0 <= warmup < 1, 0 to disable) + Default: 0. + min_lr (float): The minimum learning rate. Default: 0. + last_epoch (int): The index of last epoch. Default: -1. + verbose (bool): If ``True``, prints a message to stdout for + each update. Default: ``False``. + """ + + def __init__(self, optimizer, inv_gamma=1., power=1., warmup=0., min_lr=0., + last_epoch=-1, verbose=False): + self.inv_gamma = inv_gamma + self.power = power + if not 0. <= warmup < 1: + raise ValueError('Invalid value for warmup') + self.warmup = warmup + self.min_lr = min_lr + super().__init__(optimizer, last_epoch, verbose) + + def get_lr(self): + if not self._get_lr_called_within_step: + warnings.warn("To get the last learning rate computed by the scheduler, " + "please use `get_last_lr()`.") + + return self._get_closed_form_lr() + + def _get_closed_form_lr(self): + warmup = 1 - self.warmup ** (self.last_epoch + 1) + lr_mult = (1 + self.last_epoch / self.inv_gamma) ** -self.power + return [warmup * max(self.min_lr, base_lr * lr_mult) + for base_lr in self.base_lrs] + + +class ExponentialLR(optim.lr_scheduler._LRScheduler): + """Implements an exponential learning rate schedule with an optional exponential + warmup. When last_epoch=-1, sets initial lr as lr. Decays the learning rate + continuously by decay (default 0.5) every num_steps steps. + Args: + optimizer (Optimizer): Wrapped optimizer. + num_steps (float): The number of steps to decay the learning rate by decay in. + decay (float): The factor by which to decay the learning rate every num_steps + steps. Default: 0.5. + warmup (float): Exponential warmup factor (0 <= warmup < 1, 0 to disable) + Default: 0. + min_lr (float): The minimum learning rate. Default: 0. + last_epoch (int): The index of last epoch. Default: -1. + verbose (bool): If ``True``, prints a message to stdout for + each update. Default: ``False``. + """ + + def __init__(self, optimizer, num_steps, decay=0.5, warmup=0., min_lr=0., + last_epoch=-1, verbose=False): + self.num_steps = num_steps + self.decay = decay + if not 0. <= warmup < 1: + raise ValueError('Invalid value for warmup') + self.warmup = warmup + self.min_lr = min_lr + super().__init__(optimizer, last_epoch, verbose) + + def get_lr(self): + if not self._get_lr_called_within_step: + warnings.warn("To get the last learning rate computed by the scheduler, " + "please use `get_last_lr()`.") + + return self._get_closed_form_lr() + + def _get_closed_form_lr(self): + warmup = 1 - self.warmup ** (self.last_epoch + 1) + lr_mult = (self.decay ** (1 / self.num_steps)) ** self.last_epoch + return [warmup * max(self.min_lr, base_lr * lr_mult) + for base_lr in self.base_lrs] + + +def rand_log_normal(shape, loc=0., scale=1., device='cpu', dtype=torch.float32): + """Draws samples from an lognormal distribution.""" + return (torch.randn(shape, device=device, dtype=dtype) * scale + loc).exp() + + +def rand_log_logistic(shape, loc=0., scale=1., min_value=0., max_value=float('inf'), device='cpu', dtype=torch.float32): + """Draws samples from an optionally truncated log-logistic distribution.""" + min_value = torch.as_tensor(min_value, device=device, dtype=torch.float64) + max_value = torch.as_tensor(max_value, device=device, dtype=torch.float64) + min_cdf = min_value.log().sub(loc).div(scale).sigmoid() + max_cdf = max_value.log().sub(loc).div(scale).sigmoid() + u = torch.rand(shape, device=device, dtype=torch.float64) * (max_cdf - min_cdf) + min_cdf + return u.logit().mul(scale).add(loc).exp().to(dtype) + + +def rand_log_uniform(shape, min_value, max_value, device='cpu', dtype=torch.float32): + """Draws samples from an log-uniform distribution.""" + min_value = math.log(min_value) + max_value = math.log(max_value) + return (torch.rand(shape, device=device, dtype=dtype) * (max_value - min_value) + min_value).exp() + + +def rand_v_diffusion(shape, sigma_data=1., min_value=0., max_value=float('inf'), device='cpu', dtype=torch.float32): + """Draws samples from a truncated v-diffusion training timestep distribution.""" + min_cdf = math.atan(min_value / sigma_data) * 2 / math.pi + max_cdf = math.atan(max_value / sigma_data) * 2 / math.pi + u = torch.rand(shape, device=device, dtype=dtype) * (max_cdf - min_cdf) + min_cdf + return torch.tan(u * math.pi / 2) * sigma_data + + +def rand_split_log_normal(shape, loc, scale_1, scale_2, device='cpu', dtype=torch.float32): + """Draws samples from a split lognormal distribution.""" + n = torch.randn(shape, device=device, dtype=dtype).abs() + u = torch.rand(shape, device=device, dtype=dtype) + n_left = n * -scale_1 + loc + n_right = n * scale_2 + loc + ratio = scale_1 / (scale_1 + scale_2) + return torch.where(u < ratio, n_left, n_right).exp() + + +class FolderOfImages(data.Dataset): + """Recursively finds all images in a directory. It does not support + classes/targets.""" + + IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'} + + def __init__(self, root, transform=None): + super().__init__() + self.root = Path(root) + self.transform = nn.Identity() if transform is None else transform + self.paths = sorted(path for path in self.root.rglob('*') if path.suffix.lower() in self.IMG_EXTENSIONS) + + def __repr__(self): + return f'FolderOfImages(root="{self.root}", len: {len(self)})' + + def __len__(self): + return len(self.paths) + + def __getitem__(self, key): + path = self.paths[key] + with open(path, 'rb') as f: + image = Image.open(f).convert('RGB') + image = self.transform(image) + return image, + + +class CSVLogger: + def __init__(self, filename, columns): + self.filename = Path(filename) + self.columns = columns + if self.filename.exists(): + self.file = open(self.filename, 'a') + else: + self.file = open(self.filename, 'w') + self.write(*self.columns) + + def write(self, *args): + print(*args, sep=',', file=self.file, flush=True) + + +@contextmanager +def tf32_mode(cudnn=None, matmul=None): + """A context manager that sets whether TF32 is allowed on cuDNN or matmul.""" + cudnn_old = torch.backends.cudnn.allow_tf32 + matmul_old = torch.backends.cuda.matmul.allow_tf32 + try: + if cudnn is not None: + torch.backends.cudnn.allow_tf32 = cudnn + if matmul is not None: + torch.backends.cuda.matmul.allow_tf32 = matmul + yield + finally: + if cudnn is not None: + torch.backends.cudnn.allow_tf32 = cudnn_old + if matmul is not None: + torch.backends.cuda.matmul.allow_tf32 = matmul_old diff --git a/ldm_patched/ldm/models/autoencoder.py b/ldm_patched/ldm/models/autoencoder.py new file mode 100644 index 0000000000000000000000000000000000000000..fadefee82298753064800faa1a8cfaecb33fac79 --- /dev/null +++ b/ldm_patched/ldm/models/autoencoder.py @@ -0,0 +1,235 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI +# 5th edit by Forge + + +import torch +# import pytorch_lightning as pl +import torch.nn.functional as F +from contextlib import contextmanager +from typing import Any, Dict, List, Optional, Tuple, Union + +from ldm_patched.ldm.modules.distributions.distributions import DiagonalGaussianDistribution + +from ldm_patched.ldm.util import instantiate_from_config +from ldm_patched.ldm.modules.ema import LitEma +import ldm_patched.modules.ops + +class DiagonalGaussianRegularizer(torch.nn.Module): + def __init__(self, sample: bool = True): + super().__init__() + self.sample = sample + + def get_trainable_parameters(self) -> Any: + yield from () + + def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, dict]: + log = dict() + posterior = DiagonalGaussianDistribution(z) + if self.sample: + z = posterior.sample() + else: + z = posterior.mode() + kl_loss = posterior.kl() + kl_loss = torch.sum(kl_loss) / kl_loss.shape[0] + log["kl_loss"] = kl_loss + return z, log + + +class AbstractAutoencoder(torch.nn.Module): + """ + This is the base class for all autoencoders, including image autoencoders, image autoencoders with discriminators, + unCLIP models, etc. Hence, it is fairly general, and specific features + (e.g. discriminator training, encoding, decoding) must be implemented in subclasses. + """ + + def __init__( + self, + ema_decay: Union[None, float] = None, + monitor: Union[None, str] = None, + input_key: str = "jpg", + **kwargs, + ): + super().__init__() + + self.input_key = input_key + self.use_ema = ema_decay is not None + if monitor is not None: + self.monitor = monitor + + if self.use_ema: + self.model_ema = LitEma(self, decay=ema_decay) + logpy.info(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") + + def get_input(self, batch) -> Any: + raise NotImplementedError() + + def on_train_batch_end(self, *args, **kwargs): + # for EMA computation + if self.use_ema: + self.model_ema(self) + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.parameters()) + self.model_ema.copy_to(self) + if context is not None: + logpy.info(f"{context}: Switched to EMA weights") + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.parameters()) + if context is not None: + logpy.info(f"{context}: Restored training weights") + + def encode(self, *args, **kwargs) -> torch.Tensor: + raise NotImplementedError("encode()-method of abstract base class called") + + def decode(self, *args, **kwargs) -> torch.Tensor: + raise NotImplementedError("decode()-method of abstract base class called") + + def instantiate_optimizer_from_config(self, params, lr, cfg): + logpy.info(f"loading >>> {cfg['target']} <<< optimizer from config") + return get_obj_from_str(cfg["target"])( + params, lr=lr, **cfg.get("params", dict()) + ) + + def configure_optimizers(self) -> Any: + raise NotImplementedError() + + +class AutoencodingEngine(AbstractAutoencoder): + """ + Base class for all image autoencoders that we train, like VQGAN or AutoencoderKL + (we also restore them explicitly as special cases for legacy reasons). + Regularizations such as KL or VQ are moved to the regularizer class. + """ + + def __init__( + self, + *args, + encoder_config: Dict, + decoder_config: Dict, + regularizer_config: Dict, + **kwargs, + ): + super().__init__(*args, **kwargs) + + self.encoder: torch.nn.Module = instantiate_from_config(encoder_config) + self.decoder: torch.nn.Module = instantiate_from_config(decoder_config) + self.regularization: AbstractRegularizer = instantiate_from_config( + regularizer_config + ) + + def get_last_layer(self): + return self.decoder.get_last_layer() + + def encode( + self, + x: torch.Tensor, + return_reg_log: bool = False, + unregularized: bool = False, + ) -> Union[torch.Tensor, Tuple[torch.Tensor, dict]]: + z = self.encoder(x) + if unregularized: + return z, dict() + z, reg_log = self.regularization(z) + if return_reg_log: + return z, reg_log + return z + + def decode(self, z: torch.Tensor, **kwargs) -> torch.Tensor: + x = self.decoder(z, **kwargs) + return x + + def forward( + self, x: torch.Tensor, **additional_decode_kwargs + ) -> Tuple[torch.Tensor, torch.Tensor, dict]: + z, reg_log = self.encode(x, return_reg_log=True) + dec = self.decode(z, **additional_decode_kwargs) + return z, dec, reg_log + + +class AutoencodingEngineLegacy(AutoencodingEngine): + def __init__(self, embed_dim: int, **kwargs): + self.max_batch_size = kwargs.pop("max_batch_size", None) + ddconfig = kwargs.pop("ddconfig") + super().__init__( + encoder_config={ + "target": "ldm_patched.ldm.modules.diffusionmodules.model.Encoder", + "params": ddconfig, + }, + decoder_config={ + "target": "ldm_patched.ldm.modules.diffusionmodules.model.Decoder", + "params": ddconfig, + }, + **kwargs, + ) + self.quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d( + (1 + ddconfig["double_z"]) * ddconfig["z_channels"], + (1 + ddconfig["double_z"]) * embed_dim, + 1, + ) + self.post_quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(embed_dim, ddconfig["z_channels"], 1) + self.embed_dim = embed_dim + + def get_autoencoder_params(self) -> list: + params = super().get_autoencoder_params() + return params + + def encode( + self, x: torch.Tensor, return_reg_log: bool = False + ) -> Union[torch.Tensor, Tuple[torch.Tensor, dict]]: + if self.max_batch_size is None: + z = self.encoder(x) + z = self.quant_conv(z) + else: + N = x.shape[0] + bs = self.max_batch_size + n_batches = int(math.ceil(N / bs)) + z = list() + for i_batch in range(n_batches): + z_batch = self.encoder(x[i_batch * bs : (i_batch + 1) * bs]) + z_batch = self.quant_conv(z_batch) + z.append(z_batch) + z = torch.cat(z, 0) + + z, reg_log = self.regularization(z) + if return_reg_log: + return z, reg_log + return z + + def decode(self, z: torch.Tensor, **decoder_kwargs) -> torch.Tensor: + if self.max_batch_size is None: + dec = self.post_quant_conv(z) + dec = self.decoder(dec, **decoder_kwargs) + else: + N = z.shape[0] + bs = self.max_batch_size + n_batches = int(math.ceil(N / bs)) + dec = list() + for i_batch in range(n_batches): + dec_batch = self.post_quant_conv(z[i_batch * bs : (i_batch + 1) * bs]) + dec_batch = self.decoder(dec_batch, **decoder_kwargs) + dec.append(dec_batch) + dec = torch.cat(dec, 0) + + return dec + + +class AutoencoderKL(AutoencodingEngineLegacy): + def __init__(self, **kwargs): + if "lossconfig" in kwargs: + kwargs["loss_config"] = kwargs.pop("lossconfig") + super().__init__( + regularizer_config={ + "target": ( + "ldm_patched.ldm.models.autoencoder.DiagonalGaussianRegularizer" + ) + }, + **kwargs, + ) diff --git a/ldm_patched/ldm/modules/attention.py b/ldm_patched/ldm/modules/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..13183e5f50cfbef6379e54dee814bd8cf19d6772 --- /dev/null +++ b/ldm_patched/ldm/modules/attention.py @@ -0,0 +1,788 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI +# 5th edit by Forge + + +import math +import torch +import torch.nn.functional as F +from torch import nn, einsum +from einops import rearrange, repeat +from typing import Optional, Any + +from .diffusionmodules.util import checkpoint, AlphaBlender, timestep_embedding +from .sub_quadratic_attention import efficient_dot_product_attention + +from ldm_patched.modules import model_management + +if model_management.xformers_enabled(): + import xformers + import xformers.ops + +from ldm_patched.modules.args_parser import args +import ldm_patched.modules.ops +ops = ldm_patched.modules.ops.disable_weight_init + +# CrossAttn precision handling +if args.disable_attention_upcast: + print("disabling upcasting of attention") + _ATTN_PRECISION = "fp16" +else: + _ATTN_PRECISION = "fp32" + + +def exists(val): + return val is not None + + +def uniq(arr): + return{el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + return d + + +def max_neg_value(t): + return -torch.finfo(t.dtype).max + + +def init_(tensor): + dim = tensor.shape[-1] + std = 1 / math.sqrt(dim) + tensor.uniform_(-std, std) + return tensor + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=ops): + super().__init__() + self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=ops): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential( + operations.Linear(dim, inner_dim, dtype=dtype, device=device), + nn.GELU() + ) if not glu else GEGLU(dim, inner_dim, dtype=dtype, device=device, operations=operations) + + self.net = nn.Sequential( + project_in, + nn.Dropout(dropout), + operations.Linear(inner_dim, dim_out, dtype=dtype, device=device) + ) + + def forward(self, x): + return self.net(x) + +def Normalize(in_channels, dtype=None, device=None): + return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) + +def attention_basic(q, k, v, heads, mask=None): + b, _, dim_head = q.shape + dim_head //= heads + scale = dim_head ** -0.5 + + h = heads + q, k, v = map( + lambda t: t.unsqueeze(3) + .reshape(b, -1, heads, dim_head) + .permute(0, 2, 1, 3) + .reshape(b * heads, -1, dim_head) + .contiguous(), + (q, k, v), + ) + + # force cast to fp32 to avoid overflowing + if _ATTN_PRECISION =="fp32": + sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale + else: + sim = einsum('b i d, b j d -> b i j', q, k) * scale + + del q, k + + if exists(mask): + if mask.dtype == torch.bool: + mask = rearrange(mask, 'b ... -> b (...)') #TODO: check if this bool part matches pytorch attention + max_neg_value = -torch.finfo(sim.dtype).max + mask = repeat(mask, 'b j -> (b h) () j', h=h) + sim.masked_fill_(~mask, max_neg_value) + else: + sim += mask + + # attention, what we cannot get enough of + sim = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', sim.to(v.dtype), v) + out = ( + out.unsqueeze(0) + .reshape(b, heads, -1, dim_head) + .permute(0, 2, 1, 3) + .reshape(b, -1, heads * dim_head) + ) + return out + + +def attention_sub_quad(query, key, value, heads, mask=None): + b, _, dim_head = query.shape + dim_head //= heads + + scale = dim_head ** -0.5 + query = query.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head) + value = value.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head) + + key = key.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 3, 1).reshape(b * heads, dim_head, -1) + + dtype = query.dtype + upcast_attention = _ATTN_PRECISION =="fp32" and query.dtype != torch.float32 + if upcast_attention: + bytes_per_token = torch.finfo(torch.float32).bits//8 + else: + bytes_per_token = torch.finfo(query.dtype).bits//8 + batch_x_heads, q_tokens, _ = query.shape + _, _, k_tokens = key.shape + qk_matmul_size_bytes = batch_x_heads * bytes_per_token * q_tokens * k_tokens + + mem_free_total, mem_free_torch = model_management.get_free_memory(query.device, True) + + kv_chunk_size_min = None + kv_chunk_size = None + query_chunk_size = None + + for x in [4096, 2048, 1024, 512, 256]: + count = mem_free_total / (batch_x_heads * bytes_per_token * x * 4.0) + if count >= k_tokens: + kv_chunk_size = k_tokens + query_chunk_size = x + break + + if query_chunk_size is None: + query_chunk_size = 512 + + hidden_states = efficient_dot_product_attention( + query, + key, + value, + query_chunk_size=query_chunk_size, + kv_chunk_size=kv_chunk_size, + kv_chunk_size_min=kv_chunk_size_min, + use_checkpoint=False, + upcast_attention=upcast_attention, + mask=mask, + ) + + hidden_states = hidden_states.to(dtype) + + hidden_states = hidden_states.unflatten(0, (-1, heads)).transpose(1,2).flatten(start_dim=2) + return hidden_states + +def attention_split(q, k, v, heads, mask=None): + b, _, dim_head = q.shape + dim_head //= heads + scale = dim_head ** -0.5 + + h = heads + q, k, v = map( + lambda t: t.unsqueeze(3) + .reshape(b, -1, heads, dim_head) + .permute(0, 2, 1, 3) + .reshape(b * heads, -1, dim_head) + .contiguous(), + (q, k, v), + ) + + r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + + mem_free_total = model_management.get_free_memory(q.device) + + if _ATTN_PRECISION =="fp32": + element_size = 4 + else: + element_size = q.element_size() + + gb = 1024 ** 3 + tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size + modifier = 3 + mem_required = tensor_size * modifier + steps = 1 + + + if mem_required > mem_free_total: + steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) + # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB " + # f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}") + + if steps > 64: + max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64 + raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). ' + f'Need: {mem_required/64/gb:0.1f}GB free, Have:{mem_free_total/gb:0.1f}GB free') + + # print("steps", steps, mem_required, mem_free_total, modifier, q.element_size(), tensor_size) + first_op_done = False + cleared_cache = False + while True: + try: + slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + if _ATTN_PRECISION =="fp32": + with torch.autocast(enabled=False, device_type = 'cuda'): + s1 = einsum('b i d, b j d -> b i j', q[:, i:end].float(), k.float()) * scale + else: + s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k) * scale + + if mask is not None: + if len(mask.shape) == 2: + s1 += mask[i:end] + else: + s1 += mask[:, i:end] + + s2 = s1.softmax(dim=-1).to(v.dtype) + del s1 + first_op_done = True + + r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v) + del s2 + break + except model_management.OOM_EXCEPTION as e: + if first_op_done == False: + model_management.soft_empty_cache(True) + if cleared_cache == False: + cleared_cache = True + print("out of memory error, emptying cache and trying again") + continue + steps *= 2 + if steps > 64: + raise e + print("out of memory error, increasing steps and trying again", steps) + else: + raise e + + del q, k, v + + r1 = ( + r1.unsqueeze(0) + .reshape(b, heads, -1, dim_head) + .permute(0, 2, 1, 3) + .reshape(b, -1, heads * dim_head) + ) + return r1 + +BROKEN_XFORMERS = False +try: + x_vers = xformers.__version__ + #I think 0.0.23 is also broken (q with bs bigger than 65535 gives CUDA error) + BROKEN_XFORMERS = x_vers.startswith("0.0.21") or x_vers.startswith("0.0.22") or x_vers.startswith("0.0.23") +except: + pass + +def attention_xformers(q, k, v, heads, mask=None): + b, _, dim_head = q.shape + dim_head //= heads + if BROKEN_XFORMERS: + if b * heads > 65535: + return attention_pytorch(q, k, v, heads, mask) + + q, k, v = map( + lambda t: t.unsqueeze(3) + .reshape(b, -1, heads, dim_head) + .permute(0, 2, 1, 3) + .reshape(b * heads, -1, dim_head) + .contiguous(), + (q, k, v), + ) + + if mask is not None: + pad = 8 - q.shape[1] % 8 + mask_out = torch.empty([q.shape[0], q.shape[1], q.shape[1] + pad], dtype=q.dtype, device=q.device) + mask_out[:, :, :mask.shape[-1]] = mask + mask = mask_out[:, :, :mask.shape[-1]] + + out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask) + + out = ( + out.unsqueeze(0) + .reshape(b, heads, -1, dim_head) + .permute(0, 2, 1, 3) + .reshape(b, -1, heads * dim_head) + ) + return out + +def attention_pytorch(q, k, v, heads, mask=None): + b, _, dim_head = q.shape + dim_head //= heads + q, k, v = map( + lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), + (q, k, v), + ) + + out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) + out = ( + out.transpose(1, 2).reshape(b, -1, heads * dim_head) + ) + return out + + +optimized_attention = attention_basic + +if model_management.xformers_enabled(): + print("Using xformers cross attention") + optimized_attention = attention_xformers +elif model_management.pytorch_attention_enabled(): + print("Using pytorch cross attention") + optimized_attention = attention_pytorch +else: + if args.attention_split: + print("Using split optimization for cross attention") + optimized_attention = attention_split + else: + print("Using sub quadratic optimization for cross attention, if you have memory or speed issues try using: --attention-split") + optimized_attention = attention_sub_quad + +optimized_attention_masked = optimized_attention + +def optimized_attention_for_device(device, mask=False, small_input=False): + if small_input: + if model_management.pytorch_attention_enabled(): + return attention_pytorch #TODO: need to confirm but this is probably slightly faster for small inputs in all cases + else: + return attention_basic + + if device == torch.device("cpu"): + return attention_sub_quad + + if mask: + return optimized_attention_masked + + return optimized_attention + + +class CrossAttention(nn.Module): + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., dtype=None, device=None, operations=ops): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + + self.heads = heads + self.dim_head = dim_head + + self.to_q = operations.Linear(query_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.to_k = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.to_v = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) + + self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout)) + + def forward(self, x, context=None, value=None, mask=None, transformer_options=None): + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) + + if mask is None: + out = optimized_attention(q, k, v, self.heads) + else: + out = optimized_attention_masked(q, k, v, self.heads, mask) + return self.to_out(out) + + +class BasicTransformerBlock(nn.Module): + def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, ff_in=False, inner_dim=None, + disable_self_attn=False, disable_temporal_crossattention=False, switch_temporal_ca_to_sa=False, dtype=None, device=None, operations=ops): + super().__init__() + + self.ff_in = ff_in or inner_dim is not None + if inner_dim is None: + inner_dim = dim + + self.is_res = inner_dim == dim + + if self.ff_in: + self.norm_in = operations.LayerNorm(dim, dtype=dtype, device=device) + self.ff_in = FeedForward(dim, dim_out=inner_dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations) + + self.disable_self_attn = disable_self_attn + self.attn1 = CrossAttention(query_dim=inner_dim, heads=n_heads, dim_head=d_head, dropout=dropout, + context_dim=context_dim if self.disable_self_attn else None, dtype=dtype, device=device, operations=operations) # is a self-attention if not self.disable_self_attn + self.ff = FeedForward(inner_dim, dim_out=dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations) + + if disable_temporal_crossattention: + if switch_temporal_ca_to_sa: + raise ValueError + else: + self.attn2 = None + else: + context_dim_attn2 = None + if not switch_temporal_ca_to_sa: + context_dim_attn2 = context_dim + + self.attn2 = CrossAttention(query_dim=inner_dim, context_dim=context_dim_attn2, + heads=n_heads, dim_head=d_head, dropout=dropout, dtype=dtype, device=device, operations=operations) # is self-attn if context is none + self.norm2 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) + + self.norm1 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) + self.norm3 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) + self.checkpoint = checkpoint + self.n_heads = n_heads + self.d_head = d_head + self.switch_temporal_ca_to_sa = switch_temporal_ca_to_sa + + def forward(self, x, context=None, transformer_options={}): + return checkpoint(self._forward, (x, context, transformer_options), self.parameters(), self.checkpoint) + + def _forward(self, x, context=None, transformer_options={}): + extra_options = {} + block = transformer_options.get("block", None) + block_index = transformer_options.get("block_index", 0) + transformer_patches = {} + transformer_patches_replace = {} + + for k in transformer_options: + if k == "patches": + transformer_patches = transformer_options[k] + elif k == "patches_replace": + transformer_patches_replace = transformer_options[k] + else: + extra_options[k] = transformer_options[k] + + extra_options["n_heads"] = self.n_heads + extra_options["dim_head"] = self.d_head + + if self.ff_in: + x_skip = x + x = self.ff_in(self.norm_in(x)) + if self.is_res: + x += x_skip + + n = self.norm1(x) + if self.disable_self_attn: + context_attn1 = context + else: + context_attn1 = None + value_attn1 = None + + if "attn1_patch" in transformer_patches: + patch = transformer_patches["attn1_patch"] + if context_attn1 is None: + context_attn1 = n + value_attn1 = context_attn1 + for p in patch: + n, context_attn1, value_attn1 = p(n, context_attn1, value_attn1, extra_options) + + if block is not None: + transformer_block = (block[0], block[1], block_index) + else: + transformer_block = None + attn1_replace_patch = transformer_patches_replace.get("attn1", {}) + block_attn1 = transformer_block + if block_attn1 not in attn1_replace_patch: + block_attn1 = block + + if block_attn1 in attn1_replace_patch: + if context_attn1 is None: + context_attn1 = n + value_attn1 = n + n = self.attn1.to_q(n) + context_attn1 = self.attn1.to_k(context_attn1) + value_attn1 = self.attn1.to_v(value_attn1) + n = attn1_replace_patch[block_attn1](n, context_attn1, value_attn1, extra_options) + n = self.attn1.to_out(n) + else: + n = self.attn1(n, context=context_attn1, value=value_attn1, transformer_options=extra_options) + + if "attn1_output_patch" in transformer_patches: + patch = transformer_patches["attn1_output_patch"] + for p in patch: + n = p(n, extra_options) + + x += n + if "middle_patch" in transformer_patches: + patch = transformer_patches["middle_patch"] + for p in patch: + x = p(x, extra_options) + + if self.attn2 is not None: + n = self.norm2(x) + if self.switch_temporal_ca_to_sa: + context_attn2 = n + else: + context_attn2 = context + value_attn2 = None + if "attn2_patch" in transformer_patches: + patch = transformer_patches["attn2_patch"] + value_attn2 = context_attn2 + for p in patch: + n, context_attn2, value_attn2 = p(n, context_attn2, value_attn2, extra_options) + + attn2_replace_patch = transformer_patches_replace.get("attn2", {}) + block_attn2 = transformer_block + if block_attn2 not in attn2_replace_patch: + block_attn2 = block + + if block_attn2 in attn2_replace_patch: + if value_attn2 is None: + value_attn2 = context_attn2 + n = self.attn2.to_q(n) + context_attn2 = self.attn2.to_k(context_attn2) + value_attn2 = self.attn2.to_v(value_attn2) + n = attn2_replace_patch[block_attn2](n, context_attn2, value_attn2, extra_options) + n = self.attn2.to_out(n) + else: + n = self.attn2(n, context=context_attn2, value=value_attn2, transformer_options=extra_options) + + if "attn2_output_patch" in transformer_patches: + patch = transformer_patches["attn2_output_patch"] + for p in patch: + n = p(n, extra_options) + + x += n + if self.is_res: + x_skip = x + x = self.ff(self.norm3(x)) + if self.is_res: + x += x_skip + + return x + + +class SpatialTransformer(nn.Module): + """ + Transformer block for image-like data. + First, project the input (aka embedding) + and reshape to b, t, d. + Then apply standard transformer action. + Finally, reshape to image + NEW: use_linear for more efficiency instead of the 1x1 convs + """ + def __init__(self, in_channels, n_heads, d_head, + depth=1, dropout=0., context_dim=None, + disable_self_attn=False, use_linear=False, + use_checkpoint=True, dtype=None, device=None, operations=ops): + super().__init__() + if exists(context_dim) and not isinstance(context_dim, list): + context_dim = [context_dim] * depth + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = operations.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) + if not use_linear: + self.proj_in = operations.Conv2d(in_channels, + inner_dim, + kernel_size=1, + stride=1, + padding=0, dtype=dtype, device=device) + else: + self.proj_in = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device) + + self.transformer_blocks = nn.ModuleList( + [BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim[d], + disable_self_attn=disable_self_attn, checkpoint=use_checkpoint, dtype=dtype, device=device, operations=operations) + for d in range(depth)] + ) + if not use_linear: + self.proj_out = operations.Conv2d(inner_dim,in_channels, + kernel_size=1, + stride=1, + padding=0, dtype=dtype, device=device) + else: + self.proj_out = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device) + self.use_linear = use_linear + + def forward(self, x, context=None, transformer_options={}): + # note: if no context is given, cross-attention defaults to self-attention + if not isinstance(context, list): + context = [context] * len(self.transformer_blocks) + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + if not self.use_linear: + x = self.proj_in(x) + x = rearrange(x, 'b c h w -> b (h w) c').contiguous() + if self.use_linear: + x = self.proj_in(x) + for i, block in enumerate(self.transformer_blocks): + transformer_options["block_index"] = i + x = block(x, context=context[i], transformer_options=transformer_options) + if self.use_linear: + x = self.proj_out(x) + x = rearrange(x, 'b (h w) c -> b c h w', h=h, w=w).contiguous() + if not self.use_linear: + x = self.proj_out(x) + return x + x_in + + +class SpatialVideoTransformer(SpatialTransformer): + def __init__( + self, + in_channels, + n_heads, + d_head, + depth=1, + dropout=0.0, + use_linear=False, + context_dim=None, + use_spatial_context=False, + timesteps=None, + merge_strategy: str = "fixed", + merge_factor: float = 0.5, + time_context_dim=None, + ff_in=False, + checkpoint=False, + time_depth=1, + disable_self_attn=False, + disable_temporal_crossattention=False, + max_time_embed_period: int = 10000, + dtype=None, device=None, operations=ops + ): + super().__init__( + in_channels, + n_heads, + d_head, + depth=depth, + dropout=dropout, + use_checkpoint=checkpoint, + context_dim=context_dim, + use_linear=use_linear, + disable_self_attn=disable_self_attn, + dtype=dtype, device=device, operations=operations + ) + self.time_depth = time_depth + self.depth = depth + self.max_time_embed_period = max_time_embed_period + + time_mix_d_head = d_head + n_time_mix_heads = n_heads + + time_mix_inner_dim = int(time_mix_d_head * n_time_mix_heads) + + inner_dim = n_heads * d_head + if use_spatial_context: + time_context_dim = context_dim + + self.time_stack = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + n_time_mix_heads, + time_mix_d_head, + dropout=dropout, + context_dim=time_context_dim, + # timesteps=timesteps, + checkpoint=checkpoint, + ff_in=ff_in, + inner_dim=time_mix_inner_dim, + disable_self_attn=disable_self_attn, + disable_temporal_crossattention=disable_temporal_crossattention, + dtype=dtype, device=device, operations=operations + ) + for _ in range(self.depth) + ] + ) + + assert len(self.time_stack) == len(self.transformer_blocks) + + self.use_spatial_context = use_spatial_context + self.in_channels = in_channels + + time_embed_dim = self.in_channels * 4 + self.time_pos_embed = nn.Sequential( + operations.Linear(self.in_channels, time_embed_dim, dtype=dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, self.in_channels, dtype=dtype, device=device), + ) + + self.time_mixer = AlphaBlender( + alpha=merge_factor, merge_strategy=merge_strategy + ) + + def forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + time_context: Optional[torch.Tensor] = None, + timesteps: Optional[int] = None, + image_only_indicator: Optional[torch.Tensor] = None, + transformer_options={} + ) -> torch.Tensor: + _, _, h, w = x.shape + x_in = x + spatial_context = None + if exists(context): + spatial_context = context + + if self.use_spatial_context: + assert ( + context.ndim == 3 + ), f"n dims of spatial context should be 3 but are {context.ndim}" + + if time_context is None: + time_context = context + time_context_first_timestep = time_context[::timesteps] + time_context = repeat( + time_context_first_timestep, "b ... -> (b n) ...", n=h * w + ) + elif time_context is not None and not self.use_spatial_context: + time_context = repeat(time_context, "b ... -> (b n) ...", n=h * w) + if time_context.ndim == 2: + time_context = rearrange(time_context, "b c -> b 1 c") + + x = self.norm(x) + if not self.use_linear: + x = self.proj_in(x) + x = rearrange(x, "b c h w -> b (h w) c") + if self.use_linear: + x = self.proj_in(x) + + num_frames = torch.arange(timesteps, device=x.device) + num_frames = repeat(num_frames, "t -> b t", b=x.shape[0] // timesteps) + num_frames = rearrange(num_frames, "b t -> (b t)") + t_emb = timestep_embedding(num_frames, self.in_channels, repeat_only=False, max_period=self.max_time_embed_period).to(x.dtype) + emb = self.time_pos_embed(t_emb) + emb = emb[:, None, :] + + for it_, (block, mix_block) in enumerate( + zip(self.transformer_blocks, self.time_stack) + ): + transformer_options["block_index"] = it_ + x = block( + x, + context=spatial_context, + transformer_options=transformer_options, + ) + + x_mix = x + x_mix = x_mix + emb + + B, S, C = x_mix.shape + x_mix = rearrange(x_mix, "(b t) s c -> (b s) t c", t=timesteps) + x_mix = mix_block(x_mix, context=time_context) #TODO: transformer_options + x_mix = rearrange( + x_mix, "(b s) t c -> (b t) s c", s=S, b=B // timesteps, c=C, t=timesteps + ) + + x = self.time_mixer(x_spatial=x, x_temporal=x_mix, image_only_indicator=image_only_indicator) + + if self.use_linear: + x = self.proj_out(x) + x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w) + if not self.use_linear: + x = self.proj_out(x) + out = x + x_in + return out + + diff --git a/ldm_patched/ldm/modules/diffusionmodules/__init__.py b/ldm_patched/ldm/modules/diffusionmodules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ldm_patched/ldm/modules/diffusionmodules/model.py b/ldm_patched/ldm/modules/diffusionmodules/model.py new file mode 100644 index 0000000000000000000000000000000000000000..95b5052d8458332675ec2f92c537750133b0ff95 --- /dev/null +++ b/ldm_patched/ldm/modules/diffusionmodules/model.py @@ -0,0 +1,657 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI +# 5th edit by Forge + + +# pytorch_diffusion + derived encoder decoder +import math +import torch +import torch.nn as nn +import numpy as np +from einops import rearrange +from typing import Optional, Any + +from ldm_patched.modules import model_management +import ldm_patched.modules.ops +ops = ldm_patched.modules.ops.disable_weight_init + +if model_management.xformers_enabled_vae(): + import xformers + import xformers.ops + +def get_timestep_embedding(timesteps, embedding_dim): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: + From Fairseq. + Build sinusoidal embeddings. + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + assert len(timesteps.shape) == 1 + + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb) + emb = emb.to(device=timesteps.device) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0,1,0,0)) + return emb + + +def nonlinearity(x): + # swish + return x*torch.sigmoid(x) + + +def Normalize(in_channels, num_groups=32): + return ops.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True) + + +class Upsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = ops.Conv2d(in_channels, + in_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + try: + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + except: #operation not implemented for bf16 + b, c, h, w = x.shape + out = torch.empty((b, c, h*2, w*2), dtype=x.dtype, layout=x.layout, device=x.device) + split = 8 + l = out.shape[1] // split + for i in range(0, out.shape[1], l): + out[:,i:i+l] = torch.nn.functional.interpolate(x[:,i:i+l].to(torch.float32), scale_factor=2.0, mode="nearest").to(x.dtype) + del x + x = out + + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # no asymmetric padding in torch conv, must do it ourselves + self.conv = ops.Conv2d(in_channels, + in_channels, + kernel_size=3, + stride=2, + padding=0) + + def forward(self, x): + if self.with_conv: + pad = (0,1,0,1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) + return x + + +class ResnetBlock(nn.Module): + def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, + dropout, temb_channels=512): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + + self.swish = torch.nn.SiLU(inplace=True) + self.norm1 = Normalize(in_channels) + self.conv1 = ops.Conv2d(in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + if temb_channels > 0: + self.temb_proj = ops.Linear(temb_channels, + out_channels) + self.norm2 = Normalize(out_channels) + self.dropout = torch.nn.Dropout(dropout, inplace=True) + self.conv2 = ops.Conv2d(out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = ops.Conv2d(in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + else: + self.nin_shortcut = ops.Conv2d(in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = self.swish(h) + h = self.conv1(h) + + if temb is not None: + h = h + self.temb_proj(self.swish(temb))[:,:,None,None] + + h = self.norm2(h) + h = self.swish(h) + h = self.dropout(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + + return x+h + +def slice_attention(q, k, v): + r1 = torch.zeros_like(k, device=q.device) + scale = (int(q.shape[-1])**(-0.5)) + + mem_free_total = model_management.get_free_memory(q.device) + + gb = 1024 ** 3 + tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size() + modifier = 3 if q.element_size() == 2 else 2.5 + mem_required = tensor_size * modifier + steps = 1 + + if mem_required > mem_free_total: + steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) + + while True: + try: + slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + s1 = torch.bmm(q[:, i:end], k) * scale + + s2 = torch.nn.functional.softmax(s1, dim=2).permute(0,2,1) + del s1 + + r1[:, :, i:end] = torch.bmm(v, s2) + del s2 + break + except model_management.OOM_EXCEPTION as e: + model_management.soft_empty_cache(True) + steps *= 2 + if steps > 128: + raise e + print("out of memory error, increasing steps and trying again", steps) + + return r1 + +def normal_attention(q, k, v): + # compute attention + b,c,h,w = q.shape + + q = q.reshape(b,c,h*w) + q = q.permute(0,2,1) # b,hw,c + k = k.reshape(b,c,h*w) # b,c,hw + v = v.reshape(b,c,h*w) + + r1 = slice_attention(q, k, v) + h_ = r1.reshape(b,c,h,w) + del r1 + return h_ + +def xformers_attention(q, k, v): + # compute attention + B, C, H, W = q.shape + q, k, v = map( + lambda t: t.view(B, C, -1).transpose(1, 2).contiguous(), + (q, k, v), + ) + + try: + out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None) + out = out.transpose(1, 2).reshape(B, C, H, W) + except NotImplementedError as e: + out = slice_attention(q.view(B, -1, C), k.view(B, -1, C).transpose(1, 2), v.view(B, -1, C).transpose(1, 2)).reshape(B, C, H, W) + return out + +def pytorch_attention(q, k, v): + # compute attention + B, C, H, W = q.shape + q, k, v = map( + lambda t: t.view(B, 1, C, -1).transpose(2, 3).contiguous(), + (q, k, v), + ) + + try: + out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=0.0, is_causal=False) + out = out.transpose(2, 3).reshape(B, C, H, W) + except model_management.OOM_EXCEPTION as e: + print("scaled_dot_product_attention OOMed: switched to slice attention") + out = slice_attention(q.view(B, -1, C), k.view(B, -1, C).transpose(1, 2), v.view(B, -1, C).transpose(1, 2)).reshape(B, C, H, W) + return out + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = ops.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.k = ops.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.v = ops.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + self.proj_out = ops.Conv2d(in_channels, + in_channels, + kernel_size=1, + stride=1, + padding=0) + + if model_management.xformers_enabled_vae(): + print("Using xformers attention in VAE") + self.optimized_attention = xformers_attention + elif model_management.pytorch_attention_enabled(): + print("Using pytorch attention in VAE") + self.optimized_attention = pytorch_attention + else: + print("Using split attention in VAE") + self.optimized_attention = normal_attention + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + h_ = self.optimized_attention(q, k, v) + + h_ = self.proj_out(h_) + + return x+h_ + + +def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None): + return AttnBlock(in_channels) + + +class Model(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, use_timestep=True, use_linear_attn=False, attn_type="vanilla"): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = self.ch*4 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + self.use_timestep = use_timestep + if self.use_timestep: + # timestep embedding + self.temb = nn.Module() + self.temb.dense = nn.ModuleList([ + ops.Linear(self.ch, + self.temb_ch), + ops.Linear(self.temb_ch, + self.temb_ch), + ]) + + # downsampling + self.conv_in = ops.Conv2d(in_channels, + self.ch, + kernel_size=3, + stride=1, + padding=1) + + curr_res = resolution + in_ch_mult = (1,)+tuple(ch_mult) + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch*in_ch_mult[i_level] + block_out = ch*ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions-1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch*ch_mult[i_level] + skip_in = ch*ch_mult[i_level] + for i_block in range(self.num_res_blocks+1): + if i_block == self.num_res_blocks: + skip_in = ch*in_ch_mult[i_level] + block.append(ResnetBlock(in_channels=block_in+skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = ops.Conv2d(block_in, + out_ch, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x, t=None, context=None): + #assert x.shape[2] == x.shape[3] == self.resolution + if context is not None: + # assume aligned context, cat along channel axis + x = torch.cat((x, context), dim=1) + if self.use_timestep: + # timestep embedding + assert t is not None + temb = get_timestep_embedding(t, self.ch) + temb = self.temb.dense[0](temb) + temb = nonlinearity(temb) + temb = self.temb.dense[1](temb) + else: + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions-1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks+1): + h = self.up[i_level].block[i_block]( + torch.cat([h, hs.pop()], dim=1), temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + def get_last_layer(self): + return self.conv_out.weight + + +class Encoder(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, z_channels, double_z=True, use_linear_attn=False, attn_type="vanilla", + **ignore_kwargs): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + # downsampling + self.conv_in = ops.Conv2d(in_channels, + self.ch, + kernel_size=3, + stride=1, + padding=1) + + curr_res = resolution + in_ch_mult = (1,)+tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch*in_ch_mult[i_level] + block_out = ch*ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions-1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # end + self.norm_out = Normalize(block_in) + self.conv_out = ops.Conv2d(block_in, + 2*z_channels if double_z else z_channels, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, x): + # timestep embedding + temb = None + # downsampling + h = self.conv_in(x) + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](h, temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + if i_level != self.num_resolutions-1: + h = self.down[i_level].downsample(h) + + # middle + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, + attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels, + resolution, z_channels, give_pre_end=False, tanh_out=False, use_linear_attn=False, + conv_out_op=ops.Conv2d, + resnet_op=ResnetBlock, + attn_op=AttnBlock, + **ignorekwargs): + super().__init__() + if use_linear_attn: attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.tanh_out = tanh_out + + # compute in_ch_mult, block_in and curr_res at lowest res + in_ch_mult = (1,)+tuple(ch_mult) + block_in = ch*ch_mult[self.num_resolutions-1] + curr_res = resolution // 2**(self.num_resolutions-1) + self.z_shape = (1,z_channels,curr_res,curr_res) + print("Working with z of shape {} = {} dimensions.".format( + self.z_shape, np.prod(self.z_shape))) + + # z to block_in + self.conv_in = ops.Conv2d(z_channels, + block_in, + kernel_size=3, + stride=1, + padding=1) + + # middle + self.mid = nn.Module() + self.mid.block_1 = resnet_op(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + self.mid.attn_1 = attn_op(block_in) + self.mid.block_2 = resnet_op(in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch*ch_mult[i_level] + for i_block in range(self.num_res_blocks+1): + block.append(resnet_op(in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout)) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(attn_op(block_in)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = conv_out_op(block_in, + out_ch, + kernel_size=3, + stride=1, + padding=1) + + def forward(self, z, **kwargs): + #assert z.shape[1:] == self.z_shape[1:] + self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h, temb, **kwargs) + h = self.mid.attn_1(h, **kwargs) + h = self.mid.block_2(h, temb, **kwargs) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks+1): + h = self.up[i_level].block[i_block](h, temb, **kwargs) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h, **kwargs) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h, **kwargs) + if self.tanh_out: + h = torch.tanh(h) + return h diff --git a/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py new file mode 100644 index 0000000000000000000000000000000000000000..9904e74411de4bd8ac387ff30d11e3669fcdebdf --- /dev/null +++ b/ldm_patched/ldm/modules/diffusionmodules/openaimodel.py @@ -0,0 +1,933 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI +# 5th edit by Forge + + +from abc import abstractmethod + +import torch as th +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange + +from .util import ( + checkpoint, + avg_pool_nd, + zero_module, + timestep_embedding, + AlphaBlender, +) +from ..attention import SpatialTransformer, SpatialVideoTransformer, default +from ldm_patched.ldm.util import exists +import ldm_patched.modules.ops +ops = ldm_patched.modules.ops.disable_weight_init + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + +#This is needed because accelerate makes a copy of transformer_options which breaks "transformer_index" +def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, output_shape=None, time_context=None, num_video_frames=None, image_only_indicator=None): + block_inner_modifiers = transformer_options.get("block_inner_modifiers", []) + + for layer_index, layer in enumerate(ts): + for modifier in block_inner_modifiers: + x = modifier(x, 'before', layer, layer_index, ts, transformer_options) + + if isinstance(layer, VideoResBlock): + x = layer(x, emb, num_video_frames, image_only_indicator) + elif isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, SpatialVideoTransformer): + x = layer(x, context, time_context, num_video_frames, image_only_indicator, transformer_options) + if "transformer_index" in transformer_options: + transformer_options["transformer_index"] += 1 + elif isinstance(layer, SpatialTransformer): + x = layer(x, context, transformer_options) + if "transformer_index" in transformer_options: + transformer_options["transformer_index"] += 1 + elif isinstance(layer, Upsample): + x = layer(x, output_shape=output_shape) + else: + x = layer(x) + + for modifier in block_inner_modifiers: + x = modifier(x, 'after', layer, layer_index, ts, transformer_options) + return x + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, *args, **kwargs): + return forward_timestep_embed(self, *args, **kwargs) + +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=ops): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = operations.conv_nd(dims, self.channels, self.out_channels, 3, padding=padding, dtype=dtype, device=device) + + def forward(self, x, output_shape=None): + assert x.shape[1] == self.channels + if self.dims == 3: + shape = [x.shape[2], x.shape[3] * 2, x.shape[4] * 2] + if output_shape is not None: + shape[1] = output_shape[3] + shape[2] = output_shape[4] + else: + shape = [x.shape[2] * 2, x.shape[3] * 2] + if output_shape is not None: + shape[0] = output_shape[2] + shape[1] = output_shape[3] + + x = F.interpolate(x, size=shape, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=ops): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = operations.conv_nd( + dims, self.channels, self.out_channels, 3, stride=stride, padding=padding, dtype=dtype, device=device + ) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + kernel_size=3, + exchange_temb_dims=False, + skip_t_emb=False, + dtype=None, + device=None, + operations=ops + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + self.exchange_temb_dims = exchange_temb_dims + + if isinstance(kernel_size, list): + padding = [k // 2 for k in kernel_size] + else: + padding = kernel_size // 2 + + self.in_layers = nn.Sequential( + operations.GroupNorm(32, channels, dtype=dtype, device=device), + nn.SiLU(), + operations.conv_nd(dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims, dtype=dtype, device=device) + self.x_upd = Upsample(channels, False, dims, dtype=dtype, device=device) + elif down: + self.h_upd = Downsample(channels, False, dims, dtype=dtype, device=device) + self.x_upd = Downsample(channels, False, dims, dtype=dtype, device=device) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.skip_t_emb = skip_t_emb + if self.skip_t_emb: + self.emb_layers = None + self.exchange_temb_dims = False + else: + self.emb_layers = nn.Sequential( + nn.SiLU(), + operations.Linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, dtype=dtype, device=device + ), + ) + self.out_layers = nn.Sequential( + operations.GroupNorm(32, self.out_channels, dtype=dtype, device=device), + nn.SiLU(), + nn.Dropout(p=dropout), + operations.conv_nd(dims, self.out_channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device) + , + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = operations.conv_nd( + dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device + ) + else: + self.skip_connection = operations.conv_nd(dims, channels, self.out_channels, 1, dtype=dtype, device=device) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint( + self._forward, (x, emb), self.parameters(), self.use_checkpoint + ) + + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + + emb_out = None + if not self.skip_t_emb: + emb_out = self.emb_layers(emb).type(h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + h = out_norm(h) + if emb_out is not None: + scale, shift = th.chunk(emb_out, 2, dim=1) + h *= (1 + scale) + h += shift + h = out_rest(h) + else: + if emb_out is not None: + if self.exchange_temb_dims: + emb_out = rearrange(emb_out, "b t c ... -> b c t ...") + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class VideoResBlock(ResBlock): + def __init__( + self, + channels: int, + emb_channels: int, + dropout: float, + video_kernel_size=3, + merge_strategy: str = "fixed", + merge_factor: float = 0.5, + out_channels=None, + use_conv: bool = False, + use_scale_shift_norm: bool = False, + dims: int = 2, + use_checkpoint: bool = False, + up: bool = False, + down: bool = False, + dtype=None, + device=None, + operations=ops + ): + super().__init__( + channels, + emb_channels, + dropout, + out_channels=out_channels, + use_conv=use_conv, + use_scale_shift_norm=use_scale_shift_norm, + dims=dims, + use_checkpoint=use_checkpoint, + up=up, + down=down, + dtype=dtype, + device=device, + operations=operations + ) + + self.time_stack = ResBlock( + default(out_channels, channels), + emb_channels, + dropout=dropout, + dims=3, + out_channels=default(out_channels, channels), + use_scale_shift_norm=False, + use_conv=False, + up=False, + down=False, + kernel_size=video_kernel_size, + use_checkpoint=use_checkpoint, + exchange_temb_dims=True, + dtype=dtype, + device=device, + operations=operations + ) + self.time_mixer = AlphaBlender( + alpha=merge_factor, + merge_strategy=merge_strategy, + rearrange_pattern="b t -> b 1 t 1 1", + ) + + def forward( + self, + x: th.Tensor, + emb: th.Tensor, + num_video_frames: int, + image_only_indicator = None, + ) -> th.Tensor: + x = super().forward(x, emb) + + x_mix = rearrange(x, "(b t) c h w -> b c t h w", t=num_video_frames) + x = rearrange(x, "(b t) c h w -> b c t h w", t=num_video_frames) + + x = self.time_stack( + x, rearrange(emb, "(b t) ... -> b t ...", t=num_video_frames) + ) + x = self.time_mixer( + x_spatial=x_mix, x_temporal=x, image_only_indicator=image_only_indicator + ) + x = rearrange(x, "b c t h w -> (b t) c h w") + return x + + +class Timestep(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, t): + return timestep_embedding(t, self.dim) + +def apply_control(h, control, name): + if control is not None and name in control and len(control[name]) > 0: + ctrl = control[name].pop() + if ctrl is not None: + try: + h += ctrl + except: + print("warning control could not be applied", h.shape, ctrl.shape) + return h + +class UNetModel(nn.Module): + """ + The full UNet model with attention and timestep embedding. + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + dtype=th.float32, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + disable_self_attentions=None, + num_attention_blocks=None, + disable_middle_self_attn=False, + use_linear_in_transformer=False, + adm_in_channels=None, + transformer_depth_middle=None, + transformer_depth_output=None, + use_temporal_resblock=False, + use_temporal_attention=False, + time_context_dim=None, + extra_ff_mix_layer=False, + use_spatial_context=False, + merge_strategy=None, + merge_factor=0.0, + video_kernel_size=None, + disable_temporal_crossattention=False, + max_ddpm_temb_period=10000, + device=None, + operations=ops, + ): + super().__init__() + + if context_dim is not None: + assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' + # from omegaconf.listconfig import ListConfig + # if type(context_dim) == ListConfig: + # context_dim = list(context_dim) + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' + + if num_head_channels == -1: + assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + + if isinstance(num_res_blocks, int): + self.num_res_blocks = len(channel_mult) * [num_res_blocks] + else: + if len(num_res_blocks) != len(channel_mult): + raise ValueError("provide num_res_blocks either as an int (globally constant) or " + "as a list/tuple (per-level) with the same length as channel_mult") + self.num_res_blocks = num_res_blocks + + if disable_self_attentions is not None: + # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not + assert len(disable_self_attentions) == len(channel_mult) + if num_attention_blocks is not None: + assert len(num_attention_blocks) == len(self.num_res_blocks) + + transformer_depth = transformer_depth[:] + transformer_depth_output = transformer_depth_output[:] + + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = dtype + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.use_temporal_resblocks = use_temporal_resblock + self.predict_codebook_ids = n_embed is not None + + self.default_num_video_frames = None + self.default_image_only_indicator = None + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + operations.Linear(model_channels, time_embed_dim, dtype=self.dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), + ) + + if self.num_classes is not None: + if isinstance(self.num_classes, int): + self.label_emb = nn.Embedding(num_classes, time_embed_dim, dtype=self.dtype, device=device) + elif self.num_classes == "continuous": + print("setting up linear c_adm embedding layer") + self.label_emb = nn.Linear(1, time_embed_dim) + elif self.num_classes == "sequential": + assert adm_in_channels is not None + self.label_emb = nn.Sequential( + nn.Sequential( + operations.Linear(adm_in_channels, time_embed_dim, dtype=self.dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), + ) + ) + else: + raise ValueError() + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + operations.conv_nd(dims, in_channels, model_channels, 3, padding=1, dtype=self.dtype, device=device) + ) + ] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + + def get_attention_layer( + ch, + num_heads, + dim_head, + depth=1, + context_dim=None, + use_checkpoint=False, + disable_self_attn=False, + ): + if use_temporal_attention: + return SpatialVideoTransformer( + ch, + num_heads, + dim_head, + depth=depth, + context_dim=context_dim, + time_context_dim=time_context_dim, + dropout=dropout, + ff_in=extra_ff_mix_layer, + use_spatial_context=use_spatial_context, + merge_strategy=merge_strategy, + merge_factor=merge_factor, + checkpoint=use_checkpoint, + use_linear=use_linear_in_transformer, + disable_self_attn=disable_self_attn, + disable_temporal_crossattention=disable_temporal_crossattention, + max_time_embed_period=max_ddpm_temb_period, + dtype=self.dtype, device=device, operations=operations + ) + else: + return SpatialTransformer( + ch, num_heads, dim_head, depth=depth, context_dim=context_dim, + disable_self_attn=disable_self_attn, use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, dtype=self.dtype, device=device, operations=operations + ) + + def get_resblock( + merge_factor, + merge_strategy, + video_kernel_size, + ch, + time_embed_dim, + dropout, + out_channels, + dims, + use_checkpoint, + use_scale_shift_norm, + down=False, + up=False, + dtype=None, + device=None, + operations=ops + ): + if self.use_temporal_resblocks: + return VideoResBlock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=down, + up=up, + dtype=dtype, + device=device, + operations=operations + ) + else: + return ResBlock( + channels=ch, + emb_channels=time_embed_dim, + dropout=dropout, + out_channels=out_channels, + use_checkpoint=use_checkpoint, + dims=dims, + use_scale_shift_norm=use_scale_shift_norm, + down=down, + up=up, + dtype=dtype, + device=device, + operations=operations + ) + + for level, mult in enumerate(channel_mult): + for nr in range(self.num_res_blocks[level]): + layers = [ + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations, + ) + ] + ch = mult * model_channels + num_transformers = transformer_depth.pop(0) + if num_transformers > 0: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + #num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: + layers.append(get_attention_layer( + ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim, + disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + dtype=self.dtype, + device=device, + operations=operations + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + #num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + mid_block = [ + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=None, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations + )] + if transformer_depth_middle >= 0: + mid_block += [get_attention_layer( # always uses a self-attn + ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, use_checkpoint=use_checkpoint + ), + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=None, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations + )] + self.middle_block = TimestepEmbedSequential(*mid_block) + self._feature_size += ch + + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(self.num_res_blocks[level] + 1): + ich = input_block_chans.pop() + layers = [ + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch + ich, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=model_channels * mult, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + dtype=self.dtype, + device=device, + operations=operations + ) + ] + ch = model_channels * mult + num_transformers = transformer_depth_output.pop() + if num_transformers > 0: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + #num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + if exists(disable_self_attentions): + disabled_sa = disable_self_attentions[level] + else: + disabled_sa = False + + if not exists(num_attention_blocks) or i < num_attention_blocks[level]: + layers.append( + get_attention_layer( + ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim, + disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint + ) + ) + if level and i == self.num_res_blocks[level]: + out_ch = ch + layers.append( + get_resblock( + merge_factor=merge_factor, + merge_strategy=merge_strategy, + video_kernel_size=video_kernel_size, + ch=ch, + time_embed_dim=time_embed_dim, + dropout=dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + dtype=self.dtype, + device=device, + operations=operations + ) + if resblock_updown + else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations) + ) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + operations.GroupNorm(32, ch, dtype=self.dtype, device=device), + nn.SiLU(), + zero_module(operations.conv_nd(dims, model_channels, out_channels, 3, padding=1, dtype=self.dtype, device=device)), + ) + if self.predict_codebook_ids: + self.id_predictor = nn.Sequential( + operations.GroupNorm(32, ch, dtype=self.dtype, device=device), + operations.conv_nd(dims, model_channels, n_embed, 1, dtype=self.dtype, device=device), + #nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits + ) + + def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param context: conditioning plugged in via crossattn + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + transformer_options["original_shape"] = list(x.shape) + transformer_options["transformer_index"] = 0 + transformer_patches = transformer_options.get("patches", {}) + block_modifiers = transformer_options.get("block_modifiers", []) + + num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames) + image_only_indicator = kwargs.get("image_only_indicator", self.default_image_only_indicator) + time_context = kwargs.get("time_context", None) + + assert (y is not None) == ( + self.num_classes is not None + ), "must specify y if and only if the model is class-conditional" + hs = [] + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype) + emb = self.time_embed(t_emb) + + if self.num_classes is not None: + assert y.shape[0] == x.shape[0] + emb = emb + self.label_emb(y) + + h = x + for id, module in enumerate(self.input_blocks): + transformer_options["block"] = ("input", id) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + + h = forward_timestep_embed(module, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) + h = apply_control(h, control, 'input') + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + if "input_block_patch" in transformer_patches: + patch = transformer_patches["input_block_patch"] + for p in patch: + h = p(h, transformer_options) + + hs.append(h) + if "input_block_patch_after_skip" in transformer_patches: + patch = transformer_patches["input_block_patch_after_skip"] + for p in patch: + h = p(h, transformer_options) + + transformer_options["block"] = ("middle", 0) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + + h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) + h = apply_control(h, control, 'middle') + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + for id, module in enumerate(self.output_blocks): + transformer_options["block"] = ("output", id) + hsp = hs.pop() + hsp = apply_control(hsp, control, 'output') + + if "output_block_patch" in transformer_patches: + patch = transformer_patches["output_block_patch"] + for p in patch: + h, hsp = p(h, hsp, transformer_options) + + h = th.cat([h, hsp], dim=1) + del hsp + if len(hs) > 0: + output_shape = hs[-1].shape + else: + output_shape = None + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + + h = forward_timestep_embed(module, h, emb, context, transformer_options, output_shape, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + transformer_options["block"] = ("last", 0) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'before', transformer_options) + + if self.predict_codebook_ids: + h = self.id_predictor(h) + else: + h = self.out(h) + + for block_modifier in block_modifiers: + h = block_modifier(h, 'after', transformer_options) + + return h.type(x.dtype) diff --git a/ldm_patched/ldm/modules/diffusionmodules/upscaling.py b/ldm_patched/ldm/modules/diffusionmodules/upscaling.py new file mode 100644 index 0000000000000000000000000000000000000000..91407bf1cf4751298b52367cffb1e2050009e027 --- /dev/null +++ b/ldm_patched/ldm/modules/diffusionmodules/upscaling.py @@ -0,0 +1,93 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI + +# This file is only for reference, and not used in the backend or runtime. + + +import torch +import torch.nn as nn +import numpy as np +from functools import partial + +from .util import extract_into_tensor, make_beta_schedule +from ldm_patched.ldm.util import default + + +class AbstractLowScaleModel(nn.Module): + # for concatenating a downsampled image to the latent representation + def __init__(self, noise_schedule_config=None): + super(AbstractLowScaleModel, self).__init__() + if noise_schedule_config is not None: + self.register_schedule(**noise_schedule_config) + + def register_schedule(self, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, + cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + def q_sample(self, x_start, t, noise=None, seed=None): + if noise is None: + if seed is None: + noise = torch.randn_like(x_start) + else: + noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device) + return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise) + + def forward(self, x): + return x, None + + def decode(self, x): + return x + + +class SimpleImageConcat(AbstractLowScaleModel): + # no noise level conditioning + def __init__(self): + super(SimpleImageConcat, self).__init__(noise_schedule_config=None) + self.max_noise_level = 0 + + def forward(self, x): + # fix to constant noise level + return x, torch.zeros(x.shape[0], device=x.device).long() + + +class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): + def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): + super().__init__(noise_schedule_config=noise_schedule_config) + self.max_noise_level = max_noise_level + + def forward(self, x, noise_level=None, seed=None): + if noise_level is None: + noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() + else: + assert isinstance(noise_level, torch.Tensor) + z = self.q_sample(x, noise_level, seed=seed) + return z, noise_level + + + diff --git a/ldm_patched/ldm/modules/diffusionmodules/util.py b/ldm_patched/ldm/modules/diffusionmodules/util.py new file mode 100644 index 0000000000000000000000000000000000000000..dcab58e5afb1ee602346162f74ec956e11dd6d6a --- /dev/null +++ b/ldm_patched/ldm/modules/diffusionmodules/util.py @@ -0,0 +1,303 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models + + +# adopted from +# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py +# and +# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +# and +# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py +# +# thanks! + + +import os +import math +import torch +import torch.nn as nn +import numpy as np +from einops import repeat, rearrange + +from ldm_patched.ldm.util import instantiate_from_config + +class AlphaBlender(nn.Module): + strategies = ["learned", "fixed", "learned_with_images"] + + def __init__( + self, + alpha: float, + merge_strategy: str = "learned_with_images", + rearrange_pattern: str = "b t -> (b t) 1 1", + ): + super().__init__() + self.merge_strategy = merge_strategy + self.rearrange_pattern = rearrange_pattern + + assert ( + merge_strategy in self.strategies + ), f"merge_strategy needs to be in {self.strategies}" + + if self.merge_strategy == "fixed": + self.register_buffer("mix_factor", torch.Tensor([alpha])) + elif ( + self.merge_strategy == "learned" + or self.merge_strategy == "learned_with_images" + ): + self.register_parameter( + "mix_factor", torch.nn.Parameter(torch.Tensor([alpha])) + ) + else: + raise ValueError(f"unknown merge strategy {self.merge_strategy}") + + def get_alpha(self, image_only_indicator: torch.Tensor) -> torch.Tensor: + # skip_time_mix = rearrange(repeat(skip_time_mix, 'b -> (b t) () () ()', t=t), '(b t) 1 ... -> b 1 t ...', t=t) + if self.merge_strategy == "fixed": + # make shape compatible + # alpha = repeat(self.mix_factor, '1 -> b () t () ()', t=t, b=bs) + alpha = self.mix_factor.to(image_only_indicator.device) + elif self.merge_strategy == "learned": + alpha = torch.sigmoid(self.mix_factor.to(image_only_indicator.device)) + # make shape compatible + # alpha = repeat(alpha, '1 -> s () ()', s = t * bs) + elif self.merge_strategy == "learned_with_images": + assert image_only_indicator is not None, "need image_only_indicator ..." + alpha = torch.where( + image_only_indicator.bool(), + torch.ones(1, 1, device=image_only_indicator.device), + rearrange(torch.sigmoid(self.mix_factor.to(image_only_indicator.device)), "... -> ... 1"), + ) + alpha = rearrange(alpha, self.rearrange_pattern) + # make shape compatible + # alpha = repeat(alpha, '1 -> s () ()', s = t * bs) + else: + raise NotImplementedError() + return alpha + + def forward( + self, + x_spatial, + x_temporal, + image_only_indicator=None, + ) -> torch.Tensor: + alpha = self.get_alpha(image_only_indicator) + x = ( + alpha.to(x_spatial.dtype) * x_spatial + + (1.0 - alpha).to(x_spatial.dtype) * x_temporal + ) + return x + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == "linear": + betas = ( + torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2 + ) + + elif schedule == "cosine": + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + ) + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "squaredcos_cap_v2": # used for karlo prior + # return early + return betas_for_alpha_bar( + n_timestep, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2, + ) + + elif schedule == "sqrt_linear": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + elif schedule == "sqrt": + betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5 + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True): + if ddim_discr_method == 'uniform': + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == 'quad': + ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * .8), num_ddim_timesteps)) ** 2).astype(int) + else: + raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"') + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f'Selected timesteps for ddim sampler: {steps_out}') + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) + if verbose: + print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') + print(f'For the chosen value of eta, which is {eta}, ' + f'this results in the following sigma_t schedule for ddim sampler {sigmas}') + return sigmas, alphas, alphas_prev + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +def extract_into_tensor(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t) + return out.reshape(b, *((1,) * (len(x_shape) - 1))) + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + ctx.gpu_autocast_kwargs = {"enabled": torch.is_autocast_enabled(), + "dtype": torch.get_autocast_gpu_dtype(), + "cache_enabled": torch.is_autocast_cache_enabled()} + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(), \ + torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): + # Consistent with Kohya to reduce differences between model training and inference. + + if not repeat_only: + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half + ).to(device=timesteps.device) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + else: + embedding = repeat(timesteps, 'b -> b d', d=dim) + return embedding + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +class HybridConditioner(nn.Module): + + def __init__(self, c_concat_config, c_crossattn_config): + super().__init__() + self.concat_conditioner = instantiate_from_config(c_concat_config) + self.crossattn_conditioner = instantiate_from_config(c_crossattn_config) + + def forward(self, c_concat, c_crossattn): + c_concat = self.concat_conditioner(c_concat) + c_crossattn = self.crossattn_conditioner(c_crossattn) + return {'c_concat': [c_concat], 'c_crossattn': [c_crossattn]} + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() diff --git a/ldm_patched/ldm/modules/distributions/__init__.py b/ldm_patched/ldm/modules/distributions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ldm_patched/ldm/modules/distributions/distributions.py b/ldm_patched/ldm/modules/distributions/distributions.py new file mode 100644 index 0000000000000000000000000000000000000000..9f4b27f59cc3369d1c6240429ab692ab85e8a1fd --- /dev/null +++ b/ldm_patched/ldm/modules/distributions/distributions.py @@ -0,0 +1,96 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion + + +import torch +import numpy as np + + +class AbstractDistribution: + def sample(self): + raise NotImplementedError() + + def mode(self): + raise NotImplementedError() + + +class DiracDistribution(AbstractDistribution): + def __init__(self, value): + self.value = value + + def sample(self): + return self.value + + def mode(self): + return self.value + + +class DiagonalGaussianDistribution(object): + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) + self.logvar = torch.clamp(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = torch.exp(0.5 * self.logvar) + self.var = torch.exp(self.logvar) + if self.deterministic: + self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) + + def sample(self): + x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) + return x + + def kl(self, other=None): + if self.deterministic: + return torch.Tensor([0.]) + else: + if other is None: + return 0.5 * torch.sum(torch.pow(self.mean, 2) + + self.var - 1.0 - self.logvar, + dim=[1, 2, 3]) + else: + return 0.5 * torch.sum( + torch.pow(self.mean - other.mean, 2) / other.var + + self.var / other.var - 1.0 - self.logvar + other.logvar, + dim=[1, 2, 3]) + + def nll(self, sample, dims=[1,2,3]): + if self.deterministic: + return torch.Tensor([0.]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * torch.sum( + logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, + dim=dims) + + def mode(self): + return self.mean + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 + Compute the KL divergence between two gaussians. + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, torch.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for torch.exp(). + logvar1, logvar2 = [ + x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) + for x in (logvar1, logvar2) + ] + + return 0.5 * ( + -1.0 + + logvar2 + - logvar1 + + torch.exp(logvar1 - logvar2) + + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) + ) diff --git a/ldm_patched/ldm/modules/ema.py b/ldm_patched/ldm/modules/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..58726bf1c447a7d7d55f632a4f0270cf83efb502 --- /dev/null +++ b/ldm_patched/ldm/modules/ema.py @@ -0,0 +1,89 @@ +# 1st edit by https://github.com/CompVis/latent-diffusion +# 2nd edit by https://github.com/Stability-AI/stablediffusion +# 3rd edit by https://github.com/Stability-AI/generative-models +# 4th edit by https://github.com/comfyanonymous/ComfyUI + + +# This file is not used in image diffusion backend. + + +import torch +from torch import nn + + +class LitEma(nn.Module): + def __init__(self, model, decay=0.9999, use_num_upates=True): + super().__init__() + if decay < 0.0 or decay > 1.0: + raise ValueError('Decay must be between 0 and 1') + + self.m_name2s_name = {} + self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) + self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates + else torch.tensor(-1, dtype=torch.int)) + + for name, p in model.named_parameters(): + if p.requires_grad: + # remove as '.'-character is not allowed in buffers + s_name = name.replace('.', '') + self.m_name2s_name.update({name: s_name}) + self.register_buffer(s_name, p.clone().detach().data) + + self.collected_params = [] + + def reset_num_updates(self): + del self.num_updates + self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) + + def forward(self, model): + decay = self.decay + + if self.num_updates >= 0: + self.num_updates += 1 + decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) + + one_minus_decay = 1.0 - decay + + with torch.no_grad(): + m_param = dict(model.named_parameters()) + shadow_params = dict(self.named_buffers()) + + for key in m_param: + if m_param[key].requires_grad: + sname = self.m_name2s_name[key] + shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) + shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) + else: + assert not key in self.m_name2s_name + + def copy_to(self, model): + m_param = dict(model.named_parameters()) + shadow_params = dict(self.named_buffers()) + for key in m_param: + if m_param[key].requires_grad: + m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) + else: + assert not key in self.m_name2s_name + + def store(self, parameters): + """ + Save the current parameters for restoring later. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + temporarily stored. + """ + self.collected_params = [param.clone() for param in parameters] + + def restore(self, parameters): + """ + Restore the parameters stored with the `store` method. + Useful to validate the model with EMA parameters without affecting the + original optimization process. Store the parameters before the + `copy_to` method. After validation (or model saving), use this to + restore the former parameters. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + updated with the stored parameters. + """ + for c_param, param in zip(self.collected_params, parameters): + param.data.copy_(c_param.data) diff --git a/ldm_patched/ldm/modules/encoders/__init__.py b/ldm_patched/ldm/modules/encoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ldm_patched/ldm/modules/encoders/noise_aug_modules.py b/ldm_patched/ldm/modules/encoders/noise_aug_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..4ab769f8e6b0e7c3bb8cc5ea93c95fc1e4914254 --- /dev/null +++ b/ldm_patched/ldm/modules/encoders/noise_aug_modules.py @@ -0,0 +1,39 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation +from ..diffusionmodules.openaimodel import Timestep +import torch + +class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): + def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): + super().__init__(*args, **kwargs) + if clip_stats_path is None: + clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) + else: + clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") + self.register_buffer("data_mean", clip_mean[None, :], persistent=False) + self.register_buffer("data_std", clip_std[None, :], persistent=False) + self.time_embed = Timestep(timestep_dim) + + def scale(self, x): + # re-normalize to centered mean and unit variance + x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device) + return x + + def unscale(self, x): + # back to original data stats + x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device) + return x + + def forward(self, x, noise_level=None, seed=None): + if noise_level is None: + noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() + else: + assert isinstance(noise_level, torch.Tensor) + x = self.scale(x) + z = self.q_sample(x, noise_level, seed=seed) + z = self.unscale(z) + noise_level = self.time_embed(noise_level) + return z, noise_level diff --git a/ldm_patched/ldm/modules/sub_quadratic_attention.py b/ldm_patched/ldm/modules/sub_quadratic_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..9f4c23c7ecdac38ef4d7c200cb342a04f3fd0cfb --- /dev/null +++ b/ldm_patched/ldm/modules/sub_quadratic_attention.py @@ -0,0 +1,273 @@ +# original source: +# https://github.com/AminRezaei0x443/memory-efficient-attention/blob/1bc0d9e6ac5f82ea43a375135c4e1d3896ee1694/memory_efficient_attention/attention_torch.py +# license: +# MIT +# credit: +# Amin Rezaei (original author) +# Alex Birch (optimized algorithm for 3D tensors, at the expense of removing bias, masking and callbacks) +# implementation of: +# Self-attention Does Not Need O(n2) Memory": +# https://arxiv.org/abs/2112.05682v2 + +from functools import partial +import torch +from torch import Tensor +from torch.utils.checkpoint import checkpoint +import math + +try: + from typing import Optional, NamedTuple, List, Protocol +except ImportError: + from typing import Optional, NamedTuple, List + from typing_extensions import Protocol + +from torch import Tensor +from typing import List + +from ldm_patched.modules import model_management + +def dynamic_slice( + x: Tensor, + starts: List[int], + sizes: List[int], +) -> Tensor: + slicing = [slice(start, start + size) for start, size in zip(starts, sizes)] + return x[slicing] + +class AttnChunk(NamedTuple): + exp_values: Tensor + exp_weights_sum: Tensor + max_score: Tensor + +class SummarizeChunk(Protocol): + @staticmethod + def __call__( + query: Tensor, + key_t: Tensor, + value: Tensor, + ) -> AttnChunk: ... + +class ComputeQueryChunkAttn(Protocol): + @staticmethod + def __call__( + query: Tensor, + key_t: Tensor, + value: Tensor, + ) -> Tensor: ... + +def _summarize_chunk( + query: Tensor, + key_t: Tensor, + value: Tensor, + scale: float, + upcast_attention: bool, + mask, +) -> AttnChunk: + if upcast_attention: + with torch.autocast(enabled=False, device_type = 'cuda'): + query = query.float() + key_t = key_t.float() + attn_weights = torch.baddbmm( + torch.empty(1, 1, 1, device=query.device, dtype=query.dtype), + query, + key_t, + alpha=scale, + beta=0, + ) + else: + attn_weights = torch.baddbmm( + torch.empty(1, 1, 1, device=query.device, dtype=query.dtype), + query, + key_t, + alpha=scale, + beta=0, + ) + max_score, _ = torch.max(attn_weights, -1, keepdim=True) + max_score = max_score.detach() + attn_weights -= max_score + if mask is not None: + attn_weights += mask + torch.exp(attn_weights, out=attn_weights) + exp_weights = attn_weights.to(value.dtype) + exp_values = torch.bmm(exp_weights, value) + max_score = max_score.squeeze(-1) + return AttnChunk(exp_values, exp_weights.sum(dim=-1), max_score) + +def _query_chunk_attention( + query: Tensor, + key_t: Tensor, + value: Tensor, + summarize_chunk: SummarizeChunk, + kv_chunk_size: int, + mask, +) -> Tensor: + batch_x_heads, k_channels_per_head, k_tokens = key_t.shape + _, _, v_channels_per_head = value.shape + + def chunk_scanner(chunk_idx: int, mask) -> AttnChunk: + key_chunk = dynamic_slice( + key_t, + (0, 0, chunk_idx), + (batch_x_heads, k_channels_per_head, kv_chunk_size) + ) + value_chunk = dynamic_slice( + value, + (0, chunk_idx, 0), + (batch_x_heads, kv_chunk_size, v_channels_per_head) + ) + if mask is not None: + mask = mask[:,:,chunk_idx:chunk_idx + kv_chunk_size] + + return summarize_chunk(query, key_chunk, value_chunk, mask=mask) + + chunks: List[AttnChunk] = [ + chunk_scanner(chunk, mask) for chunk in torch.arange(0, k_tokens, kv_chunk_size) + ] + acc_chunk = AttnChunk(*map(torch.stack, zip(*chunks))) + chunk_values, chunk_weights, chunk_max = acc_chunk + + global_max, _ = torch.max(chunk_max, 0, keepdim=True) + max_diffs = torch.exp(chunk_max - global_max) + chunk_values *= torch.unsqueeze(max_diffs, -1) + chunk_weights *= max_diffs + + all_values = chunk_values.sum(dim=0) + all_weights = torch.unsqueeze(chunk_weights, -1).sum(dim=0) + return all_values / all_weights + +# TODO: refactor CrossAttention#get_attention_scores to share code with this +def _get_attention_scores_no_kv_chunking( + query: Tensor, + key_t: Tensor, + value: Tensor, + scale: float, + upcast_attention: bool, + mask, +) -> Tensor: + if upcast_attention: + with torch.autocast(enabled=False, device_type = 'cuda'): + query = query.float() + key_t = key_t.float() + attn_scores = torch.baddbmm( + torch.empty(1, 1, 1, device=query.device, dtype=query.dtype), + query, + key_t, + alpha=scale, + beta=0, + ) + else: + attn_scores = torch.baddbmm( + torch.empty(1, 1, 1, device=query.device, dtype=query.dtype), + query, + key_t, + alpha=scale, + beta=0, + ) + + if mask is not None: + attn_scores += mask + try: + attn_probs = attn_scores.softmax(dim=-1) + del attn_scores + except model_management.OOM_EXCEPTION: + print("ran out of memory while running softmax in _get_attention_scores_no_kv_chunking, trying slower in place softmax instead") + attn_scores -= attn_scores.max(dim=-1, keepdim=True).values + torch.exp(attn_scores, out=attn_scores) + summed = torch.sum(attn_scores, dim=-1, keepdim=True) + attn_scores /= summed + attn_probs = attn_scores + + hidden_states_slice = torch.bmm(attn_probs.to(value.dtype), value) + return hidden_states_slice + +class ScannedChunk(NamedTuple): + chunk_idx: int + attn_chunk: AttnChunk + +def efficient_dot_product_attention( + query: Tensor, + key_t: Tensor, + value: Tensor, + query_chunk_size=1024, + kv_chunk_size: Optional[int] = None, + kv_chunk_size_min: Optional[int] = None, + use_checkpoint=True, + upcast_attention=False, + mask = None, +): + """Computes efficient dot-product attention given query, transposed key, and value. + This is efficient version of attention presented in + https://arxiv.org/abs/2112.05682v2 which comes with O(sqrt(n)) memory requirements. + Args: + query: queries for calculating attention with shape of + `[batch * num_heads, tokens, channels_per_head]`. + key_t: keys for calculating attention with shape of + `[batch * num_heads, channels_per_head, tokens]`. + value: values to be used in attention with shape of + `[batch * num_heads, tokens, channels_per_head]`. + query_chunk_size: int: query chunks size + kv_chunk_size: Optional[int]: key/value chunks size. if None: defaults to sqrt(key_tokens) + kv_chunk_size_min: Optional[int]: key/value minimum chunk size. only considered when kv_chunk_size is None. changes `sqrt(key_tokens)` into `max(sqrt(key_tokens), kv_chunk_size_min)`, to ensure our chunk sizes don't get too small (smaller chunks = more chunks = less concurrent work done). + use_checkpoint: bool: whether to use checkpointing (recommended True for training, False for inference) + Returns: + Output of shape `[batch * num_heads, query_tokens, channels_per_head]`. + """ + batch_x_heads, q_tokens, q_channels_per_head = query.shape + _, _, k_tokens = key_t.shape + scale = q_channels_per_head ** -0.5 + + kv_chunk_size = min(kv_chunk_size or int(math.sqrt(k_tokens)), k_tokens) + if kv_chunk_size_min is not None: + kv_chunk_size = max(kv_chunk_size, kv_chunk_size_min) + + if mask is not None and len(mask.shape) == 2: + mask = mask.unsqueeze(0) + + def get_query_chunk(chunk_idx: int) -> Tensor: + return dynamic_slice( + query, + (0, chunk_idx, 0), + (batch_x_heads, min(query_chunk_size, q_tokens), q_channels_per_head) + ) + + def get_mask_chunk(chunk_idx: int) -> Tensor: + if mask is None: + return None + chunk = min(query_chunk_size, q_tokens) + return mask[:,chunk_idx:chunk_idx + chunk] + + summarize_chunk: SummarizeChunk = partial(_summarize_chunk, scale=scale, upcast_attention=upcast_attention) + summarize_chunk: SummarizeChunk = partial(checkpoint, summarize_chunk) if use_checkpoint else summarize_chunk + compute_query_chunk_attn: ComputeQueryChunkAttn = partial( + _get_attention_scores_no_kv_chunking, + scale=scale, + upcast_attention=upcast_attention + ) if k_tokens <= kv_chunk_size else ( + # fast-path for when there's just 1 key-value chunk per query chunk (this is just sliced attention btw) + partial( + _query_chunk_attention, + kv_chunk_size=kv_chunk_size, + summarize_chunk=summarize_chunk, + ) + ) + + if q_tokens <= query_chunk_size: + # fast-path for when there's just 1 query chunk + return compute_query_chunk_attn( + query=query, + key_t=key_t, + value=value, + mask=mask, + ) + + # TODO: maybe we should use torch.empty_like(query) to allocate storage in-advance, + # and pass slices to be mutated, instead of torch.cat()ing the returned slices + res = torch.cat([ + compute_query_chunk_attn( + query=get_query_chunk(i * query_chunk_size), + key_t=key_t, + value=value, + mask=get_mask_chunk(i * query_chunk_size) + ) for i in range(math.ceil(q_tokens / query_chunk_size)) + ], dim=1) + return res diff --git a/ldm_patched/ldm/modules/temporal_ae.py b/ldm_patched/ldm/modules/temporal_ae.py new file mode 100644 index 0000000000000000000000000000000000000000..24fae1d3dd4f3f6c29ff5e2dc3c50274d338b2da --- /dev/null +++ b/ldm_patched/ldm/modules/temporal_ae.py @@ -0,0 +1,252 @@ +# 1st edit by https://github.com/Stability-AI/generative-models +# 2nd edit by https://github.com/comfyanonymous/ComfyUI +# 3rd edit by Forge + +# This file is not used in image diffusion backend. (but used in SVD.) + + +import functools +from typing import Callable, Iterable, Union + +import torch +from einops import rearrange, repeat + +import ldm_patched.modules.ops +ops = ldm_patched.modules.ops.disable_weight_init + +from .diffusionmodules.model import ( + AttnBlock, + Decoder, + ResnetBlock, +) +from .diffusionmodules.openaimodel import ResBlock, timestep_embedding +from .attention import BasicTransformerBlock + +def partialclass(cls, *args, **kwargs): + class NewCls(cls): + __init__ = functools.partialmethod(cls.__init__, *args, **kwargs) + + return NewCls + + +class VideoResBlock(ResnetBlock): + def __init__( + self, + out_channels, + *args, + dropout=0.0, + video_kernel_size=3, + alpha=0.0, + merge_strategy="learned", + **kwargs, + ): + super().__init__(out_channels=out_channels, dropout=dropout, *args, **kwargs) + if video_kernel_size is None: + video_kernel_size = [3, 1, 1] + self.time_stack = ResBlock( + channels=out_channels, + emb_channels=0, + dropout=dropout, + dims=3, + use_scale_shift_norm=False, + use_conv=False, + up=False, + down=False, + kernel_size=video_kernel_size, + use_checkpoint=False, + skip_t_emb=True, + ) + + self.merge_strategy = merge_strategy + if self.merge_strategy == "fixed": + self.register_buffer("mix_factor", torch.Tensor([alpha])) + elif self.merge_strategy == "learned": + self.register_parameter( + "mix_factor", torch.nn.Parameter(torch.Tensor([alpha])) + ) + else: + raise ValueError(f"unknown merge strategy {self.merge_strategy}") + + def get_alpha(self, bs): + if self.merge_strategy == "fixed": + return self.mix_factor + elif self.merge_strategy == "learned": + return torch.sigmoid(self.mix_factor) + else: + raise NotImplementedError() + + def forward(self, x, temb, skip_video=False, timesteps=None): + b, c, h, w = x.shape + if timesteps is None: + timesteps = b + + x = super().forward(x, temb) + + if not skip_video: + x_mix = rearrange(x, "(b t) c h w -> b c t h w", t=timesteps) + + x = rearrange(x, "(b t) c h w -> b c t h w", t=timesteps) + + x = self.time_stack(x, temb) + + alpha = self.get_alpha(bs=b // timesteps).to(x.device) + x = alpha * x + (1.0 - alpha) * x_mix + + x = rearrange(x, "b c t h w -> (b t) c h w") + return x + + +class AE3DConv(ops.Conv2d): + def __init__(self, in_channels, out_channels, video_kernel_size=3, *args, **kwargs): + super().__init__(in_channels, out_channels, *args, **kwargs) + if isinstance(video_kernel_size, Iterable): + padding = [int(k // 2) for k in video_kernel_size] + else: + padding = int(video_kernel_size // 2) + + self.time_mix_conv = ops.Conv3d( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=video_kernel_size, + padding=padding, + ) + + def forward(self, input, timesteps=None, skip_video=False): + if timesteps is None: + timesteps = input.shape[0] + x = super().forward(input) + if skip_video: + return x + x = rearrange(x, "(b t) c h w -> b c t h w", t=timesteps) + x = self.time_mix_conv(x) + return rearrange(x, "b c t h w -> (b t) c h w") + + +class AttnVideoBlock(AttnBlock): + def __init__( + self, in_channels: int, alpha: float = 0, merge_strategy: str = "learned" + ): + super().__init__(in_channels) + # no context, single headed, as in base class + self.time_mix_block = BasicTransformerBlock( + dim=in_channels, + n_heads=1, + d_head=in_channels, + checkpoint=False, + ff_in=True, + ) + + time_embed_dim = self.in_channels * 4 + self.video_time_embed = torch.nn.Sequential( + ops.Linear(self.in_channels, time_embed_dim), + torch.nn.SiLU(), + ops.Linear(time_embed_dim, self.in_channels), + ) + + self.merge_strategy = merge_strategy + if self.merge_strategy == "fixed": + self.register_buffer("mix_factor", torch.Tensor([alpha])) + elif self.merge_strategy == "learned": + self.register_parameter( + "mix_factor", torch.nn.Parameter(torch.Tensor([alpha])) + ) + else: + raise ValueError(f"unknown merge strategy {self.merge_strategy}") + + def forward(self, x, timesteps=None, skip_time_block=False): + if skip_time_block: + return super().forward(x) + + if timesteps is None: + timesteps = x.shape[0] + + x_in = x + x = self.attention(x) + h, w = x.shape[2:] + x = rearrange(x, "b c h w -> b (h w) c") + + x_mix = x + num_frames = torch.arange(timesteps, device=x.device) + num_frames = repeat(num_frames, "t -> b t", b=x.shape[0] // timesteps) + num_frames = rearrange(num_frames, "b t -> (b t)") + t_emb = timestep_embedding(num_frames, self.in_channels, repeat_only=False) + emb = self.video_time_embed(t_emb) # b, n_channels + emb = emb[:, None, :] + x_mix = x_mix + emb + + alpha = self.get_alpha().to(x.device) + x_mix = self.time_mix_block(x_mix, timesteps=timesteps) + x = alpha * x + (1.0 - alpha) * x_mix # alpha merge + + x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w) + x = self.proj_out(x) + + return x_in + x + + def get_alpha( + self, + ): + if self.merge_strategy == "fixed": + return self.mix_factor + elif self.merge_strategy == "learned": + return torch.sigmoid(self.mix_factor) + else: + raise NotImplementedError(f"unknown merge strategy {self.merge_strategy}") + + + +def make_time_attn( + in_channels, + attn_type="vanilla", + attn_kwargs=None, + alpha: float = 0, + merge_strategy: str = "learned", +): + return partialclass( + AttnVideoBlock, in_channels, alpha=alpha, merge_strategy=merge_strategy + ) + + +class Conv2DWrapper(torch.nn.Conv2d): + def forward(self, input: torch.Tensor, **kwargs) -> torch.Tensor: + return super().forward(input) + + +class VideoDecoder(Decoder): + available_time_modes = ["all", "conv-only", "attn-only"] + + def __init__( + self, + *args, + video_kernel_size: Union[int, list] = 3, + alpha: float = 0.0, + merge_strategy: str = "learned", + time_mode: str = "conv-only", + **kwargs, + ): + self.video_kernel_size = video_kernel_size + self.alpha = alpha + self.merge_strategy = merge_strategy + self.time_mode = time_mode + assert ( + self.time_mode in self.available_time_modes + ), f"time_mode parameter has to be in {self.available_time_modes}" + + if self.time_mode != "attn-only": + kwargs["conv_out_op"] = partialclass(AE3DConv, video_kernel_size=self.video_kernel_size) + if self.time_mode not in ["conv-only", "only-last-conv"]: + kwargs["attn_op"] = partialclass(make_time_attn, alpha=self.alpha, merge_strategy=self.merge_strategy) + if self.time_mode not in ["attn-only", "only-last-conv"]: + kwargs["resnet_op"] = partialclass(VideoResBlock, video_kernel_size=self.video_kernel_size, alpha=self.alpha, merge_strategy=self.merge_strategy) + + super().__init__(*args, **kwargs) + + def get_last_layer(self, skip_time_mix=False, **kwargs): + if self.time_mode == "attn-only": + raise NotImplementedError("TODO") + else: + return ( + self.conv_out.time_mix_conv.weight + if not skip_time_mix + else self.conv_out.weight + ) diff --git a/ldm_patched/ldm/util.py b/ldm_patched/ldm/util.py new file mode 100644 index 0000000000000000000000000000000000000000..27424d8f7f76879e3af80d1a768f7ff8c955a52b --- /dev/null +++ b/ldm_patched/ldm/util.py @@ -0,0 +1,201 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import importlib + +import torch +from torch import optim +import numpy as np + +from inspect import isfunction +from PIL import Image, ImageDraw, ImageFont + + +def log_txt_as_img(wh, xc, size=10): + # wh a tuple of (width, height) + # xc a list of captions to plot + b = len(xc) + txts = list() + for bi in range(b): + txt = Image.new("RGB", wh, color="white") + draw = ImageDraw.Draw(txt) + font = ImageFont.truetype('data/DejaVuSans.ttf', size=size) + nc = int(40 * (wh[0] / 256)) + lines = "\n".join(xc[bi][start:start + nc] for start in range(0, len(xc[bi]), nc)) + + try: + draw.text((0, 0), lines, fill="black", font=font) + except UnicodeEncodeError: + print("Cant encode string for logging. Skipping.") + + txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 + txts.append(txt) + txts = np.stack(txts) + txts = torch.tensor(txts) + return txts + + +def ismap(x): + if not isinstance(x, torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] > 3) + + +def isimage(x): + if not isinstance(x,torch.Tensor): + return False + return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1) + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def mean_flat(tensor): + """ + https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86 + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def count_params(model, verbose=False): + total_params = sum(p.numel() for p in model.parameters()) + if verbose: + print(f"{model.__class__.__name__} has {total_params*1.e-6:.2f} M params.") + return total_params + + +def instantiate_from_config(config): + if not "target" in config: + if config == '__is_first_stage__': + return None + elif config == "__is_unconditional__": + return None + raise KeyError("Expected key `target` to instantiate.") + return get_obj_from_str(config["target"])(**config.get("params", dict())) + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + +class AdamWwithEMAandWings(optim.Optimizer): + # credit to https://gist.github.com/crowsonkb/65f7265353f403714fce3b2595e0b298 + def __init__(self, params, lr=1.e-3, betas=(0.9, 0.999), eps=1.e-8, # TODO: check hyperparameters before using + weight_decay=1.e-2, amsgrad=False, ema_decay=0.9999, # ema decay to match previous code + ema_power=1., param_names=()): + """AdamW that saves EMA versions of the parameters.""" + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + if not 0.0 <= ema_decay <= 1.0: + raise ValueError("Invalid ema_decay value: {}".format(ema_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad, ema_decay=ema_decay, + ema_power=ema_power, param_names=param_names) + super().__init__(params, defaults) + + def __setstate__(self, state): + super().__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step. + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for group in self.param_groups: + params_with_grad = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + ema_params_with_grad = [] + state_sums = [] + max_exp_avg_sqs = [] + state_steps = [] + amsgrad = group['amsgrad'] + beta1, beta2 = group['betas'] + ema_decay = group['ema_decay'] + ema_power = group['ema_power'] + + for p in group['params']: + if p.grad is None: + continue + params_with_grad.append(p) + if p.grad.is_sparse: + raise RuntimeError('AdamW does not support sparse gradients') + grads.append(p.grad) + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of parameter values + state['param_exp_avg'] = p.detach().float().clone() + + exp_avgs.append(state['exp_avg']) + exp_avg_sqs.append(state['exp_avg_sq']) + ema_params_with_grad.append(state['param_exp_avg']) + + if amsgrad: + max_exp_avg_sqs.append(state['max_exp_avg_sq']) + + # update the steps for each param group update + state['step'] += 1 + # record the step after step update + state_steps.append(state['step']) + + optim._functional.adamw(params_with_grad, + grads, + exp_avgs, + exp_avg_sqs, + max_exp_avg_sqs, + state_steps, + amsgrad=amsgrad, + beta1=beta1, + beta2=beta2, + lr=group['lr'], + weight_decay=group['weight_decay'], + eps=group['eps'], + maximize=False) + + cur_ema_decay = min(ema_decay, 1 - state['step'] ** -ema_power) + for param, ema_param in zip(params_with_grad, ema_params_with_grad): + ema_param.mul_(cur_ema_decay).add_(param.float(), alpha=1 - cur_ema_decay) + + return loss \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/chainer b/ldm_patched/licenses-3rd/chainer new file mode 100644 index 0000000000000000000000000000000000000000..db8ef9d966d9b2d89f1b060912806b45797d8c14 --- /dev/null +++ b/ldm_patched/licenses-3rd/chainer @@ -0,0 +1,20 @@ +Copyright (c) 2015 Preferred Infrastructure, Inc. +Copyright (c) 2015 Preferred Networks, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/comfyui b/ldm_patched/licenses-3rd/comfyui new file mode 100644 index 0000000000000000000000000000000000000000..e72bfddabc15be5718a7cc061ac10e47741d8219 --- /dev/null +++ b/ldm_patched/licenses-3rd/comfyui @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/diffusers b/ldm_patched/licenses-3rd/diffusers new file mode 100644 index 0000000000000000000000000000000000000000..f49a4e16e68b128803cc2dcea614603632b04eac --- /dev/null +++ b/ldm_patched/licenses-3rd/diffusers @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/kdiffusion b/ldm_patched/licenses-3rd/kdiffusion new file mode 100644 index 0000000000000000000000000000000000000000..e20684e521b3f0ce86ed26c2fc95ed665f52e05f --- /dev/null +++ b/ldm_patched/licenses-3rd/kdiffusion @@ -0,0 +1,19 @@ +Copyright (c) 2022 Katherine Crowson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/ldm b/ldm_patched/licenses-3rd/ldm new file mode 100644 index 0000000000000000000000000000000000000000..1a1c5058de8a8817b786ad440e9984c99e7df0ca --- /dev/null +++ b/ldm_patched/licenses-3rd/ldm @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/taesd b/ldm_patched/licenses-3rd/taesd new file mode 100644 index 0000000000000000000000000000000000000000..62e6312e5e8ca8669aa15c250f1d40c001cd7f20 --- /dev/null +++ b/ldm_patched/licenses-3rd/taesd @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Ollin Boer Bohan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/licenses-3rd/transformers b/ldm_patched/licenses-3rd/transformers new file mode 100644 index 0000000000000000000000000000000000000000..e44d8f5b79a0643c99977835611e1da9d08fc3cf --- /dev/null +++ b/ldm_patched/licenses-3rd/transformers @@ -0,0 +1,203 @@ +Copyright 2018- The Hugging Face team. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/ldm_patched/modules/args_parser.py b/ldm_patched/modules/args_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..8d39f757bae06689eaa79683d01b1a506b998fc9 --- /dev/null +++ b/ldm_patched/modules/args_parser.py @@ -0,0 +1,131 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import argparse +import enum +import ldm_patched.modules.options + +class EnumAction(argparse.Action): + """ + Argparse action for handling Enums + """ + def __init__(self, **kwargs): + # Pop off the type value + enum_type = kwargs.pop("type", None) + + # Ensure an Enum subclass is provided + if enum_type is None: + raise ValueError("type must be assigned an Enum when using EnumAction") + if not issubclass(enum_type, enum.Enum): + raise TypeError("type must be an Enum when using EnumAction") + + # Generate choices from the Enum + choices = tuple(e.value for e in enum_type) + kwargs.setdefault("choices", choices) + kwargs.setdefault("metavar", f"[{','.join(list(choices))}]") + + super(EnumAction, self).__init__(**kwargs) + + self._enum = enum_type + + def __call__(self, parser, namespace, values, option_string=None): + # Convert value back into an Enum + value = self._enum(values) + setattr(namespace, self.dest, value) + + +parser = argparse.ArgumentParser() + +#parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0") +#parser.add_argument("--port", type=int, default=8188) +parser.add_argument("--disable-header-check", type=str, default=None, metavar="ORIGIN", nargs="?", const="*") +parser.add_argument("--web-upload-size", type=float, default=100) + +parser.add_argument("--external-working-path", type=str, default=None, metavar="PATH", nargs='+', action='append') +parser.add_argument("--output-path", type=str, default=None) +parser.add_argument("--temp-path", type=str, default=None) +parser.add_argument("--cache-path", type=str, default=None) +parser.add_argument("--in-browser", action="store_true") +parser.add_argument("--disable-in-browser", action="store_true") +parser.add_argument("--gpu-device-id", type=int, default=None, metavar="DEVICE_ID") + +parser.add_argument("--disable-attention-upcast", action="store_true") + +fp_group = parser.add_mutually_exclusive_group() +fp_group.add_argument("--all-in-fp32", action="store_true") +fp_group.add_argument("--all-in-fp16", action="store_true") + +fpunet_group = parser.add_mutually_exclusive_group() +fpunet_group.add_argument("--unet-in-bf16", action="store_true") +fpunet_group.add_argument("--unet-in-fp16", action="store_true") +fpunet_group.add_argument("--unet-in-fp8-e4m3fn", action="store_true") +fpunet_group.add_argument("--unet-in-fp8-e5m2", action="store_true") + +fpvae_group = parser.add_mutually_exclusive_group() +fpvae_group.add_argument("--vae-in-fp16", action="store_true") +fpvae_group.add_argument("--vae-in-fp32", action="store_true") +fpvae_group.add_argument("--vae-in-bf16", action="store_true") + +parser.add_argument("--vae-in-cpu", action="store_true") + +fpte_group = parser.add_mutually_exclusive_group() +fpte_group.add_argument("--clip-in-fp8-e4m3fn", action="store_true") +fpte_group.add_argument("--clip-in-fp8-e5m2", action="store_true") +fpte_group.add_argument("--clip-in-fp16", action="store_true") +fpte_group.add_argument("--clip-in-fp32", action="store_true") + + +parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1) + +parser.add_argument("--disable-ipex-hijack", action="store_true") + +class LatentPreviewMethod(enum.Enum): + NoPreviews = "none" + Auto = "auto" + Latent2RGB = "fast" + TAESD = "taesd" + +parser.add_argument("--preview-option", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, action=EnumAction) + +attn_group = parser.add_mutually_exclusive_group() +attn_group.add_argument("--attention-split", action="store_true") +attn_group.add_argument("--attention-quad", action="store_true") +attn_group.add_argument("--attention-pytorch", action="store_true") + +parser.add_argument("--disable-xformers", action="store_true") + +vram_group = parser.add_mutually_exclusive_group() +vram_group.add_argument("--always-gpu", action="store_true") +vram_group.add_argument("--always-high-vram", action="store_true") +vram_group.add_argument("--always-normal-vram", action="store_true") +vram_group.add_argument("--always-low-vram", action="store_true") +vram_group.add_argument("--always-no-vram", action="store_true") +vram_group.add_argument("--always-cpu", action="store_true") + + +parser.add_argument("--always-offload-from-vram", action="store_true") +parser.add_argument("--pytorch-deterministic", action="store_true") + +parser.add_argument("--disable-server-log", action="store_true") +parser.add_argument("--debug-mode", action="store_true") +parser.add_argument("--is-windows-embedded-python", action="store_true") + +parser.add_argument("--disable-server-info", action="store_true") + +parser.add_argument("--multi-user", action="store_true") + +parser.add_argument("--cuda-malloc", action="store_true") +parser.add_argument("--cuda-stream", action="store_true") +parser.add_argument("--pin-shared-memory", action="store_true") + +if ldm_patched.modules.options.args_parsing: + args = parser.parse_args([]) +else: + args = parser.parse_args([]) + +if args.is_windows_embedded_python: + args.in_browser = True + +if args.disable_in_browser: + args.in_browser = False diff --git a/ldm_patched/modules/checkpoint_pickle.py b/ldm_patched/modules/checkpoint_pickle.py new file mode 100644 index 0000000000000000000000000000000000000000..782cac22b06e5217a392831cc1c575d8dc257d9d --- /dev/null +++ b/ldm_patched/modules/checkpoint_pickle.py @@ -0,0 +1,17 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import pickle + +load = pickle.load + +class Empty: + pass + +class Unpickler(pickle.Unpickler): + def find_class(self, module, name): + #TODO: safe unpickle + if module.startswith("pytorch_lightning"): + return Empty + return super().find_class(module, name) diff --git a/ldm_patched/modules/clip_config_bigg.json b/ldm_patched/modules/clip_config_bigg.json new file mode 100644 index 0000000000000000000000000000000000000000..32d82ff39ba66ba0be15ec101993e1c46cc3f7ab --- /dev/null +++ b/ldm_patched/modules/clip_config_bigg.json @@ -0,0 +1,23 @@ +{ + "architectures": [ + "CLIPTextModel" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "dropout": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 1280, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 77, + "model_type": "clip_text_model", + "num_attention_heads": 20, + "num_hidden_layers": 32, + "pad_token_id": 1, + "projection_dim": 1280, + "torch_dtype": "float32", + "vocab_size": 49408 +} diff --git a/ldm_patched/modules/clip_model.py b/ldm_patched/modules/clip_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1c3fd11e0c7e977e719faf98f4095c91b260737a --- /dev/null +++ b/ldm_patched/modules/clip_model.py @@ -0,0 +1,192 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +from ldm_patched.ldm.modules.attention import optimized_attention_for_device + +class CLIPAttention(torch.nn.Module): + def __init__(self, embed_dim, heads, dtype, device, operations): + super().__init__() + + self.heads = heads + self.q_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.k_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.v_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + + self.out_proj = operations.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + + def forward(self, x, mask=None, optimized_attention=None): + q = self.q_proj(x) + k = self.k_proj(x) + v = self.v_proj(x) + + out = optimized_attention(q, k, v, self.heads, mask) + return self.out_proj(out) + +ACTIVATIONS = {"quick_gelu": lambda a: a * torch.sigmoid(1.702 * a), + "gelu": torch.nn.functional.gelu, +} + +class CLIPMLP(torch.nn.Module): + def __init__(self, embed_dim, intermediate_size, activation, dtype, device, operations): + super().__init__() + self.fc1 = operations.Linear(embed_dim, intermediate_size, bias=True, dtype=dtype, device=device) + self.activation = ACTIVATIONS[activation] + self.fc2 = operations.Linear(intermediate_size, embed_dim, bias=True, dtype=dtype, device=device) + + def forward(self, x): + x = self.fc1(x) + x = self.activation(x) + x = self.fc2(x) + return x + +class CLIPLayer(torch.nn.Module): + def __init__(self, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations): + super().__init__() + self.layer_norm1 = operations.LayerNorm(embed_dim, dtype=dtype, device=device) + self.self_attn = CLIPAttention(embed_dim, heads, dtype, device, operations) + self.layer_norm2 = operations.LayerNorm(embed_dim, dtype=dtype, device=device) + self.mlp = CLIPMLP(embed_dim, intermediate_size, intermediate_activation, dtype, device, operations) + + def forward(self, x, mask=None, optimized_attention=None): + x += self.self_attn(self.layer_norm1(x), mask, optimized_attention) + x += self.mlp(self.layer_norm2(x)) + return x + + +class CLIPEncoder(torch.nn.Module): + def __init__(self, num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations): + super().__init__() + self.layers = torch.nn.ModuleList([CLIPLayer(embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations) for i in range(num_layers)]) + + def forward(self, x, mask=None, intermediate_output=None): + optimized_attention = optimized_attention_for_device(x.device, mask=mask is not None, small_input=True) + + if intermediate_output is not None: + if intermediate_output < 0: + intermediate_output = len(self.layers) + intermediate_output + + intermediate = None + for i, l in enumerate(self.layers): + x = l(x, mask, optimized_attention) + if i == intermediate_output: + intermediate = x.clone() + return x, intermediate + +class CLIPEmbeddings(torch.nn.Module): + def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None): + super().__init__() + self.token_embedding = torch.nn.Embedding(vocab_size, embed_dim, dtype=dtype, device=device) + self.position_embedding = torch.nn.Embedding(num_positions, embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens): + return self.token_embedding(input_tokens) + self.position_embedding.weight + + +class CLIPTextModel_(torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + num_layers = config_dict["num_hidden_layers"] + embed_dim = config_dict["hidden_size"] + heads = config_dict["num_attention_heads"] + intermediate_size = config_dict["intermediate_size"] + intermediate_activation = config_dict["hidden_act"] + + super().__init__() + self.embeddings = CLIPEmbeddings(embed_dim, dtype=torch.float32, device=device) + self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations) + self.final_layer_norm = operations.LayerNorm(embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens, attention_mask=None, intermediate_output=None, final_layer_norm_intermediate=True): + x = self.embeddings(input_tokens) + mask = None + if attention_mask is not None: + mask = 1.0 - attention_mask.to(x.dtype).unsqueeze(1).unsqueeze(1).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1]) + mask = mask.masked_fill(mask.to(torch.bool), float("-inf")) + + causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1) + if mask is not None: + mask += causal_mask + else: + mask = causal_mask + + x, i = self.encoder(x, mask=mask, intermediate_output=intermediate_output) + x = self.final_layer_norm(x) + if i is not None and final_layer_norm_intermediate: + i = self.final_layer_norm(i) + + pooled_output = x[torch.arange(x.shape[0], device=x.device), input_tokens.to(dtype=torch.int, device=x.device).argmax(dim=-1),] + return x, i, pooled_output + +class CLIPTextModel(torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + self.num_layers = config_dict["num_hidden_layers"] + self.text_model = CLIPTextModel_(config_dict, dtype, device, operations) + self.dtype = dtype + + def get_input_embeddings(self): + return self.text_model.embeddings.token_embedding + + def set_input_embeddings(self, embeddings): + self.text_model.embeddings.token_embedding = embeddings + + def forward(self, *args, **kwargs): + return self.text_model(*args, **kwargs) + +class CLIPVisionEmbeddings(torch.nn.Module): + def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, dtype=None, device=None, operations=None): + super().__init__() + self.class_embedding = torch.nn.Parameter(torch.empty(embed_dim, dtype=dtype, device=device)) + + self.patch_embedding = operations.Conv2d( + in_channels=num_channels, + out_channels=embed_dim, + kernel_size=patch_size, + stride=patch_size, + bias=False, + dtype=dtype, + device=device + ) + + num_patches = (image_size // patch_size) ** 2 + num_positions = num_patches + 1 + self.position_embedding = torch.nn.Embedding(num_positions, embed_dim, dtype=dtype, device=device) + + def forward(self, pixel_values): + embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2) + return torch.cat([self.class_embedding.to(embeds.device).expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + self.position_embedding.weight.to(embeds.device) + + +class CLIPVision(torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + num_layers = config_dict["num_hidden_layers"] + embed_dim = config_dict["hidden_size"] + heads = config_dict["num_attention_heads"] + intermediate_size = config_dict["intermediate_size"] + intermediate_activation = config_dict["hidden_act"] + + self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], dtype=torch.float32, device=device, operations=operations) + self.pre_layrnorm = operations.LayerNorm(embed_dim) + self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations) + self.post_layernorm = operations.LayerNorm(embed_dim) + + def forward(self, pixel_values, attention_mask=None, intermediate_output=None): + x = self.embeddings(pixel_values) + x = self.pre_layrnorm(x) + #TODO: attention_mask? + x, i = self.encoder(x, mask=None, intermediate_output=intermediate_output) + pooled_output = self.post_layernorm(x[:, 0, :]) + return x, i, pooled_output + +class CLIPVisionModelProjection(torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + self.vision_model = CLIPVision(config_dict, dtype, device, operations) + self.visual_projection = operations.Linear(config_dict["hidden_size"], config_dict["projection_dim"], bias=False) + + def forward(self, *args, **kwargs): + x = self.vision_model(*args, **kwargs) + out = self.visual_projection(x[2]) + return (x[0], x[1], out) diff --git a/ldm_patched/modules/clip_vision.py b/ldm_patched/modules/clip_vision.py new file mode 100644 index 0000000000000000000000000000000000000000..83c891730f7e5b10384f8df417fc7f24ee398aaf --- /dev/null +++ b/ldm_patched/modules/clip_vision.py @@ -0,0 +1,134 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge + + +from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace +import os +import torch + +import ldm_patched.modules.ops +import ldm_patched.modules.model_patcher +import ldm_patched.modules.model_management +import ldm_patched.modules.utils +import ldm_patched.modules.clip_model +import ldm_patched.modules.ops as ops + +from transformers import modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection + + +class Output: + def __getitem__(self, key): + return getattr(self, key) + def __setitem__(self, key, item): + setattr(self, key, item) + +def clip_preprocess(image, size=224): + mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype) + std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype) + image = image.movedim(-1, 1) + if not (image.shape[2] == size and image.shape[3] == size): + scale = (size / min(image.shape[2], image.shape[3])) + image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True) + h = (image.shape[2] - size)//2 + w = (image.shape[3] - size)//2 + image = image[:,:,h:h+size,w:w+size] + image = torch.clip((255. * image), 0, 255).round() / 255.0 + return (image - mean.view([3,1,1])) / std.view([3,1,1]) + +class ClipVisionModel(): + def __init__(self, json_config): + config = CLIPVisionConfig.from_json_file(json_config) + + self.load_device = ldm_patched.modules.model_management.text_encoder_device() + self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() + + if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): + self.dtype = torch.float16 + else: + self.dtype = torch.float32 + + with ops.use_patched_ops(ops.manual_cast): + with modeling_utils.no_init_weights(): + self.model = CLIPVisionModelWithProjection(config) + + self.model.to(self.dtype) + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher( + self.model, + load_device=self.load_device, + offload_device=self.offload_device + ) + + def load_sd(self, sd): + return self.model.load_state_dict(sd, strict=False) + + def get_sd(self): + return self.model.state_dict() + + def encode_image(self, image): + ldm_patched.modules.model_management.load_model_gpu(self.patcher) + pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(self.load_device)) + outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) + + o = Output() + o["last_hidden_state"] = outputs.last_hidden_state.to(ldm_patched.modules.model_management.intermediate_device()) + o["penultimate_hidden_states"] = outputs.hidden_states[-2].to(ldm_patched.modules.model_management.intermediate_device()) + o["image_embeds"] = outputs.image_embeds.to(ldm_patched.modules.model_management.intermediate_device()) + + return o + +def convert_to_transformers(sd, prefix): + sd_k = sd.keys() + if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k: + keys_to_replace = { + "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding", + "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight", + "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight", + "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias", + "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight", + "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias", + "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight", + } + + for x in keys_to_replace: + if x in sd_k: + sd[keys_to_replace[x]] = sd.pop(x) + + if "{}proj".format(prefix) in sd_k: + sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1) + + sd = transformers_convert(sd, prefix, "vision_model.", 48) + else: + replace_prefix = {prefix: ""} + sd = state_dict_prefix_replace(sd, replace_prefix) + return sd + +def load_clipvision_from_sd(sd, prefix="", convert_keys=False): + if convert_keys: + sd = convert_to_transformers(sd, prefix) + if "vision_model.encoder.layers.47.layer_norm1.weight" in sd: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json") + elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json") + elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json") + else: + return None + + clip = ClipVisionModel(json_config) + m, u = clip.load_sd(sd) + if len(m) > 0: + print("extra clip vision:", m) + u = set(u) + keys = list(sd.keys()) + for k in keys: + if k not in u: + t = sd.pop(k) + del t + return clip + +def load(ckpt_path): + sd = load_torch_file(ckpt_path) + if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd: + return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True) + else: + return load_clipvision_from_sd(sd) diff --git a/ldm_patched/modules/clip_vision_config_g.json b/ldm_patched/modules/clip_vision_config_g.json new file mode 100644 index 0000000000000000000000000000000000000000..708e7e21ac3513a719d6a49e88e756f5ef7e2c8d --- /dev/null +++ b/ldm_patched/modules/clip_vision_config_g.json @@ -0,0 +1,18 @@ +{ + "attention_dropout": 0.0, + "dropout": 0.0, + "hidden_act": "gelu", + "hidden_size": 1664, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 8192, + "layer_norm_eps": 1e-05, + "model_type": "clip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 48, + "patch_size": 14, + "projection_dim": 1280, + "torch_dtype": "float32" +} diff --git a/ldm_patched/modules/clip_vision_config_h.json b/ldm_patched/modules/clip_vision_config_h.json new file mode 100644 index 0000000000000000000000000000000000000000..bb71be419a4be0ad5c8c157850de032a65593cb9 --- /dev/null +++ b/ldm_patched/modules/clip_vision_config_h.json @@ -0,0 +1,18 @@ +{ + "attention_dropout": 0.0, + "dropout": 0.0, + "hidden_act": "gelu", + "hidden_size": 1280, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 5120, + "layer_norm_eps": 1e-05, + "model_type": "clip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 32, + "patch_size": 14, + "projection_dim": 1024, + "torch_dtype": "float32" +} diff --git a/ldm_patched/modules/clip_vision_config_vitl.json b/ldm_patched/modules/clip_vision_config_vitl.json new file mode 100644 index 0000000000000000000000000000000000000000..c59b8ed5a4c1f41fbcc9e6811d2c7dfe44273de7 --- /dev/null +++ b/ldm_patched/modules/clip_vision_config_vitl.json @@ -0,0 +1,18 @@ +{ + "attention_dropout": 0.0, + "dropout": 0.0, + "hidden_act": "quick_gelu", + "hidden_size": 1024, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "model_type": "clip_vision_model", + "num_attention_heads": 16, + "num_channels": 3, + "num_hidden_layers": 24, + "patch_size": 14, + "projection_dim": 768, + "torch_dtype": "float32" +} diff --git a/ldm_patched/modules/conds.py b/ldm_patched/modules/conds.py new file mode 100644 index 0000000000000000000000000000000000000000..fc69131efd75463547fb29135b11c658e5db1ebf --- /dev/null +++ b/ldm_patched/modules/conds.py @@ -0,0 +1,82 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import math +import ldm_patched.modules.utils + + +def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9) + return abs(a*b) // math.gcd(a, b) + +class CONDRegular: + def __init__(self, cond): + self.cond = cond + + def _copy_with(self, cond): + return self.__class__(cond) + + def process_cond(self, batch_size, device, **kwargs): + return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device)) + + def can_concat(self, other): + if self.cond.shape != other.cond.shape: + return False + return True + + def concat(self, others): + conds = [self.cond] + for x in others: + conds.append(x.cond) + return torch.cat(conds) + +class CONDNoiseShape(CONDRegular): + def process_cond(self, batch_size, device, area, **kwargs): + data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] + return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device)) + + +class CONDCrossAttn(CONDRegular): + def can_concat(self, other): + s1 = self.cond.shape + s2 = other.cond.shape + if s1 != s2: + if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen + return False + + mult_min = lcm(s1[1], s2[1]) + diff = mult_min // min(s1[1], s2[1]) + if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much + return False + return True + + def concat(self, others): + conds = [self.cond] + crossattn_max_len = self.cond.shape[1] + for x in others: + c = x.cond + crossattn_max_len = lcm(crossattn_max_len, c.shape[1]) + conds.append(c) + + out = [] + for c in conds: + if c.shape[1] < crossattn_max_len: + c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result + out.append(c) + return torch.cat(out) + +class CONDConstant(CONDRegular): + def __init__(self, cond): + self.cond = cond + + def process_cond(self, batch_size, device, **kwargs): + return self._copy_with(self.cond) + + def can_concat(self, other): + if self.cond != other.cond: + return False + return True + + def concat(self, others): + return self.cond diff --git a/ldm_patched/modules/controlnet.py b/ldm_patched/modules/controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6192d7ae833a6fbd130ddd83dba413fbd370fd7d --- /dev/null +++ b/ldm_patched/modules/controlnet.py @@ -0,0 +1,631 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import math +import os +import ldm_patched.modules.utils +import ldm_patched.modules.model_management +import ldm_patched.modules.model_detection +import ldm_patched.modules.model_patcher +import ldm_patched.modules.ops + +import ldm_patched.controlnet.cldm +import ldm_patched.t2ia.adapter + +from ldm_patched.modules.ops import main_stream_worker + + +def broadcast_image_to(tensor, target_batch_size, batched_number): + current_batch_size = tensor.shape[0] + #print(current_batch_size, target_batch_size) + if current_batch_size == 1: + return tensor + + per_batch = target_batch_size // batched_number + tensor = tensor[:per_batch] + + if per_batch > tensor.shape[0]: + tensor = torch.cat([tensor] * (per_batch // tensor.shape[0]) + [tensor[:(per_batch % tensor.shape[0])]], dim=0) + + current_batch_size = tensor.shape[0] + if current_batch_size == target_batch_size: + return tensor + else: + return torch.cat([tensor] * batched_number, dim=0) + + +def get_at(array, index, default=None): + return array[index] if 0 <= index < len(array) else default + + +def compute_controlnet_weighting(control, cnet): + + positive_advanced_weighting = getattr(cnet, 'positive_advanced_weighting', None) + negative_advanced_weighting = getattr(cnet, 'negative_advanced_weighting', None) + advanced_frame_weighting = getattr(cnet, 'advanced_frame_weighting', None) + advanced_sigma_weighting = getattr(cnet, 'advanced_sigma_weighting', None) + advanced_mask_weighting = getattr(cnet, 'advanced_mask_weighting', None) + + transformer_options = cnet.transformer_options + + if positive_advanced_weighting is None and negative_advanced_weighting is None \ + and advanced_frame_weighting is None and advanced_sigma_weighting is None \ + and advanced_mask_weighting is None: + return control + + cond_or_uncond = transformer_options['cond_or_uncond'] + sigmas = transformer_options['sigmas'] + cond_mark = transformer_options['cond_mark'] + + if advanced_frame_weighting is not None: + advanced_frame_weighting = torch.Tensor(advanced_frame_weighting * len(cond_or_uncond)).to(sigmas) + assert advanced_frame_weighting.shape[0] == cond_mark.shape[0], \ + 'Frame weighting list length is different from batch size!' + + if advanced_sigma_weighting is not None: + advanced_sigma_weighting = torch.cat([advanced_sigma_weighting(sigmas)] * len(cond_or_uncond)) + + for k, v in control.items(): + for i in range(len(v)): + control_signal = control[k][i] + + if not isinstance(control_signal, torch.Tensor): + continue + + B, C, H, W = control_signal.shape + + positive_weight = 1.0 + negative_weight = 1.0 + sigma_weight = 1.0 + frame_weight = 1.0 + + if positive_advanced_weighting is not None: + positive_weight = get_at(positive_advanced_weighting.get(k, []), i, 1.0) + + if negative_advanced_weighting is not None: + negative_weight = get_at(negative_advanced_weighting.get(k, []), i, 1.0) + + if advanced_sigma_weighting is not None: + sigma_weight = advanced_sigma_weighting + + if advanced_frame_weighting is not None: + frame_weight = advanced_frame_weighting + + final_weight = positive_weight * (1.0 - cond_mark) + negative_weight * cond_mark + final_weight = final_weight * sigma_weight * frame_weight + + if isinstance(advanced_mask_weighting, torch.Tensor): + if advanced_mask_weighting.shape[0] != 1: + k_ = int(control_signal.shape[0] // advanced_mask_weighting.shape[0]) + if control_signal.shape[0] == k_ * advanced_mask_weighting.shape[0]: + advanced_mask_weighting = advanced_mask_weighting.repeat(k_, 1, 1, 1) + control_signal = control_signal * torch.nn.functional.interpolate(advanced_mask_weighting.to(control_signal), size=(H, W), mode='bilinear') + + control[k][i] = control_signal * final_weight[:, None, None, None] + + return control + + +class ControlBase: + def __init__(self, device=None): + self.cond_hint_original = None + self.cond_hint = None + self.strength = 1.0 + self.timestep_percent_range = (0.0, 1.0) + self.global_average_pooling = False + self.timestep_range = None + self.transformer_options = {} + + if device is None: + device = ldm_patched.modules.model_management.get_torch_device() + self.device = device + self.previous_controlnet = None + + def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0)): + self.cond_hint_original = cond_hint + self.strength = strength + self.timestep_percent_range = timestep_percent_range + return self + + def pre_run(self, model, percent_to_timestep_function): + self.timestep_range = (percent_to_timestep_function(self.timestep_percent_range[0]), percent_to_timestep_function(self.timestep_percent_range[1])) + if self.previous_controlnet is not None: + self.previous_controlnet.pre_run(model, percent_to_timestep_function) + + def set_previous_controlnet(self, controlnet): + self.previous_controlnet = controlnet + return self + + def cleanup(self): + if self.previous_controlnet is not None: + self.previous_controlnet.cleanup() + if self.cond_hint is not None: + del self.cond_hint + self.cond_hint = None + self.timestep_range = None + + def get_models(self): + out = [] + if self.previous_controlnet is not None: + out += self.previous_controlnet.get_models() + return out + + def copy_to(self, c): + c.cond_hint_original = self.cond_hint_original + c.strength = self.strength + c.timestep_percent_range = self.timestep_percent_range + c.global_average_pooling = self.global_average_pooling + + def inference_memory_requirements(self, dtype): + if self.previous_controlnet is not None: + return self.previous_controlnet.inference_memory_requirements(dtype) + return 0 + + def control_merge(self, control_input, control_output, control_prev, output_dtype): + out = {'input':[], 'middle':[], 'output': []} + + if control_input is not None: + for i in range(len(control_input)): + key = 'input' + x = control_input[i] + if x is not None: + x *= self.strength + if x.dtype != output_dtype: + x = x.to(output_dtype) + out[key].insert(0, x) + + if control_output is not None: + for i in range(len(control_output)): + if i == (len(control_output) - 1): + key = 'middle' + index = 0 + else: + key = 'output' + index = i + x = control_output[i] + if x is not None: + if self.global_average_pooling: + x = torch.mean(x, dim=(2, 3), keepdim=True).repeat(1, 1, x.shape[2], x.shape[3]) + + x *= self.strength + if x.dtype != output_dtype: + x = x.to(output_dtype) + + out[key].append(x) + + out = compute_controlnet_weighting(out, self) + + if control_prev is not None: + for x in ['input', 'middle', 'output']: + o = out[x] + for i in range(len(control_prev[x])): + prev_val = control_prev[x][i] + if i >= len(o): + o.append(prev_val) + elif prev_val is not None: + if o[i] is None: + o[i] = prev_val + else: + if o[i].shape[0] < prev_val.shape[0]: + o[i] = prev_val + o[i] + else: + o[i] += prev_val + return out + +class ControlNet(ControlBase): + def __init__(self, control_model, global_average_pooling=False, device=None, load_device=None, manual_cast_dtype=None): + super().__init__(device) + self.control_model = control_model + self.load_device = load_device + self.control_model_wrapped = ldm_patched.modules.model_patcher.ModelPatcher(self.control_model, load_device=load_device, offload_device=ldm_patched.modules.model_management.unet_offload_device()) + self.global_average_pooling = global_average_pooling + self.model_sampling_current = None + self.manual_cast_dtype = manual_cast_dtype + + def get_control(self, x_noisy, t, cond, batched_number): + to = self.transformer_options + + for conditioning_modifier in to.get('controlnet_conditioning_modifiers', []): + x_noisy, t, cond, batched_number = conditioning_modifier(self, x_noisy, t, cond, batched_number) + + control_prev = None + if self.previous_controlnet is not None: + control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number) + + if self.timestep_range is not None: + if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]: + if control_prev is not None: + return control_prev + else: + return None + + dtype = self.control_model.dtype + if self.manual_cast_dtype is not None: + dtype = self.manual_cast_dtype + + output_dtype = x_noisy.dtype + if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]: + if self.cond_hint is not None: + del self.cond_hint + self.cond_hint = None + self.cond_hint = ldm_patched.modules.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(dtype) + if x_noisy.shape[0] != self.cond_hint.shape[0]: + self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number) + + context = cond['c_crossattn'] + y = cond.get('y', None) + if y is not None: + y = y.to(dtype) + timestep = self.model_sampling_current.timestep(t) + x_noisy = self.model_sampling_current.calculate_input(t, x_noisy) + + controlnet_model_function_wrapper = to.get('controlnet_model_function_wrapper', None) + + if controlnet_model_function_wrapper is not None: + wrapper_args = dict(x=x_noisy.to(dtype), hint=self.cond_hint, timesteps=timestep.float(), + context=context.to(dtype), y=y) + wrapper_args['model'] = self + wrapper_args['inner_model'] = self.control_model + control = controlnet_model_function_wrapper(**wrapper_args) + else: + control = self.control_model(x=x_noisy.to(dtype), hint=self.cond_hint.to(self.device), timesteps=timestep.float(), context=context.to(dtype), y=y) + return self.control_merge(None, control, control_prev, output_dtype) + + def copy(self): + c = ControlNet(self.control_model, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype) + self.copy_to(c) + return c + + def get_models(self): + out = super().get_models() + out.append(self.control_model_wrapped) + return out + + def pre_run(self, model, percent_to_timestep_function): + super().pre_run(model, percent_to_timestep_function) + self.model_sampling_current = model.model_sampling + + def cleanup(self): + self.model_sampling_current = None + super().cleanup() + +class ControlLoraOps: + class Linear(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, bias: bool = True, + device=None, dtype=None) -> None: + factory_kwargs = {'device': device, 'dtype': dtype} + super().__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = None + self.up = None + self.down = None + self.bias = None + + def forward(self, input): + weight, bias, signal = ldm_patched.modules.ops.cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + if self.up is not None: + return torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias) + else: + return torch.nn.functional.linear(input, weight, bias) + + class Conv2d(torch.nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + device=None, + dtype=None + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.transposed = False + self.output_padding = 0 + self.groups = groups + self.padding_mode = padding_mode + + self.weight = None + self.bias = None + self.up = None + self.down = None + + + def forward(self, input): + weight, bias, signal = ldm_patched.modules.ops.cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + if self.up is not None: + return torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups) + else: + return torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups) + + +class ControlLora(ControlNet): + def __init__(self, control_weights, global_average_pooling=False, device=None): + ControlBase.__init__(self, device) + self.control_weights = control_weights + self.global_average_pooling = global_average_pooling + + def pre_run(self, model, percent_to_timestep_function): + super().pre_run(model, percent_to_timestep_function) + controlnet_config = model.model_config.unet_config.copy() + controlnet_config.pop("out_channels") + controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1] + self.manual_cast_dtype = model.manual_cast_dtype + dtype = model.get_dtype() + if self.manual_cast_dtype is None: + class control_lora_ops(ControlLoraOps, ldm_patched.modules.ops.disable_weight_init): + pass + else: + class control_lora_ops(ControlLoraOps, ldm_patched.modules.ops.manual_cast): + pass + dtype = self.manual_cast_dtype + + controlnet_config["operations"] = control_lora_ops + controlnet_config["dtype"] = dtype + self.control_model = ldm_patched.controlnet.cldm.ControlNet(**controlnet_config) + self.control_model.to(ldm_patched.modules.model_management.get_torch_device()) + diffusion_model = model.diffusion_model + sd = diffusion_model.state_dict() + cm = self.control_model.state_dict() + + for k in sd: + weight = sd[k] + try: + ldm_patched.modules.utils.set_attr(self.control_model, k, weight) + except: + pass + + for k in self.control_weights: + if k not in {"lora_controlnet"}: + ldm_patched.modules.utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(ldm_patched.modules.model_management.get_torch_device())) + + def copy(self): + c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling) + self.copy_to(c) + return c + + def cleanup(self): + del self.control_model + self.control_model = None + super().cleanup() + + def get_models(self): + out = ControlBase.get_models(self) + return out + + def inference_memory_requirements(self, dtype): + return ldm_patched.modules.utils.calculate_parameters(self.control_weights) * ldm_patched.modules.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype) + +def load_controlnet(ckpt_path, model=None): + controlnet_data = ldm_patched.modules.utils.load_torch_file(ckpt_path, safe_load=True) + if "lora_controlnet" in controlnet_data: + return ControlLora(controlnet_data) + + controlnet_config = None + if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format + unet_dtype = ldm_patched.modules.model_management.unet_dtype() + controlnet_config = ldm_patched.modules.model_detection.unet_config_from_diffusers_unet(controlnet_data, unet_dtype) + diffusers_keys = ldm_patched.modules.utils.unet_to_diffusers(controlnet_config) + diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight" + diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias" + + count = 0 + loop = True + while loop: + suffix = [".weight", ".bias"] + for s in suffix: + k_in = "controlnet_down_blocks.{}{}".format(count, s) + k_out = "zero_convs.{}.0{}".format(count, s) + if k_in not in controlnet_data: + loop = False + break + diffusers_keys[k_in] = k_out + count += 1 + + count = 0 + loop = True + while loop: + suffix = [".weight", ".bias"] + for s in suffix: + if count == 0: + k_in = "controlnet_cond_embedding.conv_in{}".format(s) + else: + k_in = "controlnet_cond_embedding.blocks.{}{}".format(count - 1, s) + k_out = "input_hint_block.{}{}".format(count * 2, s) + if k_in not in controlnet_data: + k_in = "controlnet_cond_embedding.conv_out{}".format(s) + loop = False + diffusers_keys[k_in] = k_out + count += 1 + + new_sd = {} + for k in diffusers_keys: + if k in controlnet_data: + new_sd[diffusers_keys[k]] = controlnet_data.pop(k) + + leftover_keys = controlnet_data.keys() + if len(leftover_keys) > 0: + print("leftover keys:", leftover_keys) + controlnet_data = new_sd + + pth_key = 'control_model.zero_convs.0.0.weight' + pth = False + key = 'zero_convs.0.0.weight' + if pth_key in controlnet_data: + pth = True + key = pth_key + prefix = "control_model." + elif key in controlnet_data: + prefix = "" + else: + net = load_t2i_adapter(controlnet_data) + if net is None: + print("error checkpoint does not contain controlnet or t2i adapter data", ckpt_path) + return net + + if controlnet_config is None: + unet_dtype = ldm_patched.modules.model_management.unet_dtype() + controlnet_config = ldm_patched.modules.model_detection.model_config_from_unet(controlnet_data, prefix, unet_dtype, True).unet_config + load_device = ldm_patched.modules.model_management.get_torch_device() + manual_cast_dtype = ldm_patched.modules.model_management.unet_manual_cast(unet_dtype, load_device) + if manual_cast_dtype is not None: + controlnet_config["operations"] = ldm_patched.modules.ops.manual_cast + controlnet_config.pop("out_channels") + controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1] + control_model = ldm_patched.controlnet.cldm.ControlNet(**controlnet_config) + + if pth: + if 'difference' in controlnet_data: + if model is not None: + ldm_patched.modules.model_management.load_models_gpu([model]) + model_sd = model.model_state_dict() + for x in controlnet_data: + c_m = "control_model." + if x.startswith(c_m): + sd_key = "diffusion_model.{}".format(x[len(c_m):]) + if sd_key in model_sd: + cd = controlnet_data[x] + cd += model_sd[sd_key].type(cd.dtype).to(cd.device) + else: + print("WARNING: Loaded a diff controlnet without a model. It will very likely not work.") + + class WeightsLoader(torch.nn.Module): + pass + w = WeightsLoader() + w.control_model = control_model + missing, unexpected = w.load_state_dict(controlnet_data, strict=False) + else: + missing, unexpected = control_model.load_state_dict(controlnet_data, strict=False) + print(missing, unexpected) + + global_average_pooling = False + filename = os.path.splitext(ckpt_path)[0] + if filename.endswith("_shuffle") or filename.endswith("_shuffle_fp16"): #TODO: smarter way of enabling global_average_pooling + global_average_pooling = True + + control = ControlNet(control_model, global_average_pooling=global_average_pooling, load_device=load_device, manual_cast_dtype=manual_cast_dtype) + return control + +class T2IAdapter(ControlBase): + def __init__(self, t2i_model, channels_in, device=None): + super().__init__(device) + self.t2i_model = t2i_model + self.channels_in = channels_in + self.control_input = None + + def scale_image_to(self, width, height): + unshuffle_amount = self.t2i_model.unshuffle_amount + width = math.ceil(width / unshuffle_amount) * unshuffle_amount + height = math.ceil(height / unshuffle_amount) * unshuffle_amount + return width, height + + def get_control(self, x_noisy, t, cond, batched_number): + to = self.transformer_options + + for conditioning_modifier in to.get('controlnet_conditioning_modifiers', []): + x_noisy, t, cond, batched_number = conditioning_modifier(self, x_noisy, t, cond, batched_number) + + control_prev = None + if self.previous_controlnet is not None: + control_prev = self.previous_controlnet.get_control(x_noisy, t, cond, batched_number) + + if self.timestep_range is not None: + if t[0] > self.timestep_range[0] or t[0] < self.timestep_range[1]: + if control_prev is not None: + return control_prev + else: + return None + + if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]: + if self.cond_hint is not None: + del self.cond_hint + self.control_input = None + self.cond_hint = None + width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8) + self.cond_hint = ldm_patched.modules.utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float() + if self.channels_in == 1 and self.cond_hint.shape[1] > 1: + self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True) + if x_noisy.shape[0] != self.cond_hint.shape[0]: + self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number) + if self.control_input is None: + self.t2i_model.to(x_noisy.dtype) + self.t2i_model.to(self.device) + + controlnet_model_function_wrapper = to.get('controlnet_model_function_wrapper', None) + + if controlnet_model_function_wrapper is not None: + wrapper_args = dict(hint=self.cond_hint.to(x_noisy.dtype)) + wrapper_args['model'] = self + wrapper_args['inner_model'] = self.t2i_model + wrapper_args['inner_t2i_model'] = self.t2i_model + self.control_input = controlnet_model_function_wrapper(**wrapper_args) + else: + self.control_input = self.t2i_model(self.cond_hint.to(x_noisy)) + + self.t2i_model.cpu() + + control_input = list(map(lambda a: None if a is None else a.clone(), self.control_input)) + mid = None + if self.t2i_model.xl == True: + mid = control_input[-1:] + control_input = control_input[:-1] + return self.control_merge(control_input, mid, control_prev, x_noisy.dtype) + + def copy(self): + c = T2IAdapter(self.t2i_model, self.channels_in) + self.copy_to(c) + return c + +def load_t2i_adapter(t2i_data): + if 'adapter' in t2i_data: + t2i_data = t2i_data['adapter'] + if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format + prefix_replace = {} + for i in range(4): + for j in range(2): + prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j) + prefix_replace["adapter.body.{}.".format(i, j)] = "body.{}.".format(i * 2) + prefix_replace["adapter."] = "" + t2i_data = ldm_patched.modules.utils.state_dict_prefix_replace(t2i_data, prefix_replace) + keys = t2i_data.keys() + + if "body.0.in_conv.weight" in keys: + cin = t2i_data['body.0.in_conv.weight'].shape[1] + model_ad = ldm_patched.t2ia.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4) + elif 'conv_in.weight' in keys: + cin = t2i_data['conv_in.weight'].shape[1] + channel = t2i_data['conv_in.weight'].shape[0] + ksize = t2i_data['body.0.block2.weight'].shape[2] + use_conv = False + down_opts = list(filter(lambda a: a.endswith("down_opt.op.weight"), keys)) + if len(down_opts) > 0: + use_conv = True + xl = False + if cin == 256 or cin == 768: + xl = True + model_ad = ldm_patched.t2ia.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl) + else: + return None + missing, unexpected = model_ad.load_state_dict(t2i_data) + if len(missing) > 0: + print("t2i missing", missing) + + if len(unexpected) > 0: + print("t2i unexpected", unexpected) + + return T2IAdapter(model_ad, model_ad.input_channels) diff --git a/ldm_patched/modules/diffusers_convert.py b/ldm_patched/modules/diffusers_convert.py new file mode 100644 index 0000000000000000000000000000000000000000..e3581bece2f65d54b0777ecf15d5dc6cd55dfcf5 --- /dev/null +++ b/ldm_patched/modules/diffusers_convert.py @@ -0,0 +1,264 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI + + +import re +import torch + +# conversion code from https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py + +# =================# +# UNet Conversion # +# =================# + +unet_conversion_map = [ + # (stable-diffusion, HF Diffusers) + ("time_embed.0.weight", "time_embedding.linear_1.weight"), + ("time_embed.0.bias", "time_embedding.linear_1.bias"), + ("time_embed.2.weight", "time_embedding.linear_2.weight"), + ("time_embed.2.bias", "time_embedding.linear_2.bias"), + ("input_blocks.0.0.weight", "conv_in.weight"), + ("input_blocks.0.0.bias", "conv_in.bias"), + ("out.0.weight", "conv_norm_out.weight"), + ("out.0.bias", "conv_norm_out.bias"), + ("out.2.weight", "conv_out.weight"), + ("out.2.bias", "conv_out.bias"), +] + +unet_conversion_map_resnet = [ + # (stable-diffusion, HF Diffusers) + ("in_layers.0", "norm1"), + ("in_layers.2", "conv1"), + ("out_layers.0", "norm2"), + ("out_layers.3", "conv2"), + ("emb_layers.1", "time_emb_proj"), + ("skip_connection", "conv_shortcut"), +] + +unet_conversion_map_layer = [] +# hardcoded number of downblocks and resnets/attentions... +# would need smarter logic for other networks. +for i in range(4): + # loop over downblocks/upblocks + + for j in range(2): + # loop over resnets/attentions for downblocks + hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}." + sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0." + unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix)) + + if i < 3: + # no attention layers in down_blocks.3 + hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}." + sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1." + unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix)) + + for j in range(3): + # loop over resnets/attentions for upblocks + hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}." + sd_up_res_prefix = f"output_blocks.{3 * i + j}.0." + unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix)) + + if i > 0: + # no attention layers in up_blocks.0 + hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}." + sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1." + unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix)) + + if i < 3: + # no downsample in down_blocks.3 + hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv." + sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op." + unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix)) + + # no upsample in up_blocks.3 + hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0." + sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{1 if i == 0 else 2}." + unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix)) + +hf_mid_atn_prefix = "mid_block.attentions.0." +sd_mid_atn_prefix = "middle_block.1." +unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix)) + +for j in range(2): + hf_mid_res_prefix = f"mid_block.resnets.{j}." + sd_mid_res_prefix = f"middle_block.{2 * j}." + unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix)) + + +def convert_unet_state_dict(unet_state_dict): + # buyer beware: this is a *brittle* function, + # and correct output requires that all of these pieces interact in + # the exact order in which I have arranged them. + mapping = {k: k for k in unet_state_dict.keys()} + for sd_name, hf_name in unet_conversion_map: + mapping[hf_name] = sd_name + for k, v in mapping.items(): + if "resnets" in k: + for sd_part, hf_part in unet_conversion_map_resnet: + v = v.replace(hf_part, sd_part) + mapping[k] = v + for k, v in mapping.items(): + for sd_part, hf_part in unet_conversion_map_layer: + v = v.replace(hf_part, sd_part) + mapping[k] = v + new_state_dict = {v: unet_state_dict[k] for k, v in mapping.items()} + return new_state_dict + + +# ================# +# VAE Conversion # +# ================# + +vae_conversion_map = [ + # (stable-diffusion, HF Diffusers) + ("nin_shortcut", "conv_shortcut"), + ("norm_out", "conv_norm_out"), + ("mid.attn_1.", "mid_block.attentions.0."), +] + +for i in range(4): + # down_blocks have two resnets + for j in range(2): + hf_down_prefix = f"encoder.down_blocks.{i}.resnets.{j}." + sd_down_prefix = f"encoder.down.{i}.block.{j}." + vae_conversion_map.append((sd_down_prefix, hf_down_prefix)) + + if i < 3: + hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0." + sd_downsample_prefix = f"down.{i}.downsample." + vae_conversion_map.append((sd_downsample_prefix, hf_downsample_prefix)) + + hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0." + sd_upsample_prefix = f"up.{3 - i}.upsample." + vae_conversion_map.append((sd_upsample_prefix, hf_upsample_prefix)) + + # up_blocks have three resnets + # also, up blocks in hf are numbered in reverse from sd + for j in range(3): + hf_up_prefix = f"decoder.up_blocks.{i}.resnets.{j}." + sd_up_prefix = f"decoder.up.{3 - i}.block.{j}." + vae_conversion_map.append((sd_up_prefix, hf_up_prefix)) + +# this part accounts for mid blocks in both the encoder and the decoder +for i in range(2): + hf_mid_res_prefix = f"mid_block.resnets.{i}." + sd_mid_res_prefix = f"mid.block_{i + 1}." + vae_conversion_map.append((sd_mid_res_prefix, hf_mid_res_prefix)) + +vae_conversion_map_attn = [ + # (stable-diffusion, HF Diffusers) + ("norm.", "group_norm."), + ("q.", "query."), + ("k.", "key."), + ("v.", "value."), + ("q.", "to_q."), + ("k.", "to_k."), + ("v.", "to_v."), + ("proj_out.", "to_out.0."), + ("proj_out.", "proj_attn."), +] + + +def reshape_weight_for_sd(w): + # convert HF linear weights to SD conv2d weights + return w.reshape(*w.shape, 1, 1) + + +def convert_vae_state_dict(vae_state_dict): + mapping = {k: k for k in vae_state_dict.keys()} + for k, v in mapping.items(): + for sd_part, hf_part in vae_conversion_map: + v = v.replace(hf_part, sd_part) + mapping[k] = v + for k, v in mapping.items(): + if "attentions" in k: + for sd_part, hf_part in vae_conversion_map_attn: + v = v.replace(hf_part, sd_part) + mapping[k] = v + new_state_dict = {v: vae_state_dict[k] for k, v in mapping.items()} + weights_to_convert = ["q", "k", "v", "proj_out"] + for k, v in new_state_dict.items(): + for weight_name in weights_to_convert: + if f"mid.attn_1.{weight_name}.weight" in k: + print(f"Reshaping {k} for SD format") + new_state_dict[k] = reshape_weight_for_sd(v) + return new_state_dict + + +# =========================# +# Text Encoder Conversion # +# =========================# + + +textenc_conversion_lst = [ + # (stable-diffusion, HF Diffusers) + ("resblocks.", "text_model.encoder.layers."), + ("ln_1", "layer_norm1"), + ("ln_2", "layer_norm2"), + (".c_fc.", ".fc1."), + (".c_proj.", ".fc2."), + (".attn", ".self_attn"), + ("ln_final.", "transformer.text_model.final_layer_norm."), + ("token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"), + ("positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"), +] +protected = {re.escape(x[1]): x[0] for x in textenc_conversion_lst} +textenc_pattern = re.compile("|".join(protected.keys())) + +# Ordering is from https://github.com/pytorch/pytorch/blob/master/test/cpp/api/modules.cpp +code2idx = {"q": 0, "k": 1, "v": 2} + + +def convert_text_enc_state_dict_v20(text_enc_dict, prefix=""): + new_state_dict = {} + capture_qkv_weight = {} + capture_qkv_bias = {} + for k, v in text_enc_dict.items(): + if not k.startswith(prefix): + continue + if ( + k.endswith(".self_attn.q_proj.weight") + or k.endswith(".self_attn.k_proj.weight") + or k.endswith(".self_attn.v_proj.weight") + ): + k_pre = k[: -len(".q_proj.weight")] + k_code = k[-len("q_proj.weight")] + if k_pre not in capture_qkv_weight: + capture_qkv_weight[k_pre] = [None, None, None] + capture_qkv_weight[k_pre][code2idx[k_code]] = v + continue + + if ( + k.endswith(".self_attn.q_proj.bias") + or k.endswith(".self_attn.k_proj.bias") + or k.endswith(".self_attn.v_proj.bias") + ): + k_pre = k[: -len(".q_proj.bias")] + k_code = k[-len("q_proj.bias")] + if k_pre not in capture_qkv_bias: + capture_qkv_bias[k_pre] = [None, None, None] + capture_qkv_bias[k_pre][code2idx[k_code]] = v + continue + + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k) + new_state_dict[relabelled_key] = v + + for k_pre, tensors in capture_qkv_weight.items(): + if None in tensors: + raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing") + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre) + new_state_dict[relabelled_key + ".in_proj_weight"] = torch.cat(tensors) + + for k_pre, tensors in capture_qkv_bias.items(): + if None in tensors: + raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing") + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre) + new_state_dict[relabelled_key + ".in_proj_bias"] = torch.cat(tensors) + + return new_state_dict + + +def convert_text_enc_state_dict(text_enc_dict): + return text_enc_dict + + diff --git a/ldm_patched/modules/diffusers_load.py b/ldm_patched/modules/diffusers_load.py new file mode 100644 index 0000000000000000000000000000000000000000..b80f33f88abb1699696336ea5b12202f79a676d6 --- /dev/null +++ b/ldm_patched/modules/diffusers_load.py @@ -0,0 +1,38 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI + +import os + +import ldm_patched.modules.sd + +def first_file(path, filenames): + for f in filenames: + p = os.path.join(path, f) + if os.path.exists(p): + return p + return None + +def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): + diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] + unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) + vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) + + text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] + text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) + text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) + + text_encoder_paths = [text_encoder1_path] + if text_encoder2_path is not None: + text_encoder_paths.append(text_encoder2_path) + + unet = ldm_patched.modules.sd.load_unet(unet_path) + + clip = None + if output_clip: + clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) + + vae = None + if output_vae: + sd = ldm_patched.modules.utils.load_torch_file(vae_path) + vae = ldm_patched.modules.sd.VAE(sd=sd) + + return (unet, clip, vae) diff --git a/ldm_patched/modules/gligen.py b/ldm_patched/modules/gligen.py new file mode 100644 index 0000000000000000000000000000000000000000..8d2890449f3cf4cfef2e52dd63d424704e735b2e --- /dev/null +++ b/ldm_patched/modules/gligen.py @@ -0,0 +1,345 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch +from torch import nn +from ldm_patched.ldm.modules.attention import CrossAttention +from inspect import isfunction + + +def exists(val): + return val is not None + + +def uniq(arr): + return{el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * torch.nn.functional.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential( + nn.Linear(dim, inner_dim), + nn.GELU() + ) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential( + project_in, + nn.Dropout(dropout), + nn.Linear(inner_dim, dim_out) + ) + + def forward(self, x): + return self.net(x) + + +class GatedCrossAttentionDense(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + self.attn = CrossAttention( + query_dim=query_dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + x = x + self.scale * \ + torch.tanh(self.alpha_attn) * self.attn(self.norm1(x), objs, objs) + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class GatedSelfAttentionDense(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + # we need a linear projection since we need cat visual feature and obj + # feature + self.linear = nn.Linear(context_dim, query_dim) + + self.attn = CrossAttention( + query_dim=query_dim, + context_dim=query_dim, + heads=n_heads, + dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + N_visual = x.shape[1] + objs = self.linear(objs) + + x = x + self.scale * torch.tanh(self.alpha_attn) * self.attn( + self.norm1(torch.cat([x, objs], dim=1)))[:, 0:N_visual, :] + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class GatedSelfAttentionDense2(nn.Module): + def __init__(self, query_dim, context_dim, n_heads, d_head): + super().__init__() + + # we need a linear projection since we need cat visual feature and obj + # feature + self.linear = nn.Linear(context_dim, query_dim) + + self.attn = CrossAttention( + query_dim=query_dim, context_dim=query_dim, dim_head=d_head) + self.ff = FeedForward(query_dim, glu=True) + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.))) + self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.))) + + # this can be useful: we can externally change magnitude of tanh(alpha) + # for example, when it is set to 0, then the entire model is same as + # original one + self.scale = 1 + + def forward(self, x, objs): + + B, N_visual, _ = x.shape + B, N_ground, _ = objs.shape + + objs = self.linear(objs) + + # sanity check + size_v = math.sqrt(N_visual) + size_g = math.sqrt(N_ground) + assert int(size_v) == size_v, "Visual tokens must be square rootable" + assert int(size_g) == size_g, "Grounding tokens must be square rootable" + size_v = int(size_v) + size_g = int(size_g) + + # select grounding token and resize it to visual token size as residual + out = self.attn(self.norm1(torch.cat([x, objs], dim=1)))[ + :, N_visual:, :] + out = out.permute(0, 2, 1).reshape(B, -1, size_g, size_g) + out = torch.nn.functional.interpolate( + out, (size_v, size_v), mode='bicubic') + residual = out.reshape(B, -1, N_visual).permute(0, 2, 1) + + # add residual to visual feature + x = x + self.scale * torch.tanh(self.alpha_attn) * residual + x = x + self.scale * \ + torch.tanh(self.alpha_dense) * self.ff(self.norm2(x)) + + return x + + +class FourierEmbedder(): + def __init__(self, num_freqs=64, temperature=100): + + self.num_freqs = num_freqs + self.temperature = temperature + self.freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs) + + @torch.no_grad() + def __call__(self, x, cat_dim=-1): + "x: arbitrary shape of tensor. dim: cat dim" + out = [] + for freq in self.freq_bands: + out.append(torch.sin(freq * x)) + out.append(torch.cos(freq * x)) + return torch.cat(out, cat_dim) + + +class PositionNet(nn.Module): + def __init__(self, in_dim, out_dim, fourier_freqs=8): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + + self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs) + self.position_dim = fourier_freqs * 2 * 4 # 2 is sin&cos, 4 is xyxy + + self.linears = nn.Sequential( + nn.Linear(self.in_dim + self.position_dim, 512), + nn.SiLU(), + nn.Linear(512, 512), + nn.SiLU(), + nn.Linear(512, out_dim), + ) + + self.null_positive_feature = torch.nn.Parameter( + torch.zeros([self.in_dim])) + self.null_position_feature = torch.nn.Parameter( + torch.zeros([self.position_dim])) + + def forward(self, boxes, masks, positive_embeddings): + B, N, _ = boxes.shape + dtype = self.linears[0].weight.dtype + masks = masks.unsqueeze(-1).to(dtype) + positive_embeddings = positive_embeddings.to(dtype) + + # embedding position (it may includes padding as placeholder) + xyxy_embedding = self.fourier_embedder(boxes.to(dtype)) # B*N*4 --> B*N*C + + # learnable null embedding + positive_null = self.null_positive_feature.view(1, 1, -1) + xyxy_null = self.null_position_feature.view(1, 1, -1) + + # replace padding with learnable null embedding + positive_embeddings = positive_embeddings * \ + masks + (1 - masks) * positive_null + xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null + + objs = self.linears( + torch.cat([positive_embeddings, xyxy_embedding], dim=-1)) + assert objs.shape == torch.Size([B, N, self.out_dim]) + return objs + + +class Gligen(nn.Module): + def __init__(self, modules, position_net, key_dim): + super().__init__() + self.module_list = nn.ModuleList(modules) + self.position_net = position_net + self.key_dim = key_dim + self.max_objs = 30 + self.current_device = torch.device("cpu") + + def _set_position(self, boxes, masks, positive_embeddings): + objs = self.position_net(boxes, masks, positive_embeddings) + def func(x, extra_options): + key = extra_options["transformer_index"] + module = self.module_list[key] + return module(x, objs) + return func + + def set_position(self, latent_image_shape, position_params, device): + batch, c, h, w = latent_image_shape + masks = torch.zeros([self.max_objs], device="cpu") + boxes = [] + positive_embeddings = [] + for p in position_params: + x1 = (p[4]) / w + y1 = (p[3]) / h + x2 = (p[4] + p[2]) / w + y2 = (p[3] + p[1]) / h + masks[len(boxes)] = 1.0 + boxes += [torch.tensor((x1, y1, x2, y2)).unsqueeze(0)] + positive_embeddings += [p[0]] + append_boxes = [] + append_conds = [] + if len(boxes) < self.max_objs: + append_boxes = [torch.zeros( + [self.max_objs - len(boxes), 4], device="cpu")] + append_conds = [torch.zeros( + [self.max_objs - len(boxes), self.key_dim], device="cpu")] + + box_out = torch.cat( + boxes + append_boxes).unsqueeze(0).repeat(batch, 1, 1) + masks = masks.unsqueeze(0).repeat(batch, 1) + conds = torch.cat(positive_embeddings + + append_conds).unsqueeze(0).repeat(batch, 1, 1) + return self._set_position( + box_out.to(device), + masks.to(device), + conds.to(device)) + + def set_empty(self, latent_image_shape, device): + batch, c, h, w = latent_image_shape + masks = torch.zeros([self.max_objs], device="cpu").repeat(batch, 1) + box_out = torch.zeros([self.max_objs, 4], + device="cpu").repeat(batch, 1, 1) + conds = torch.zeros([self.max_objs, self.key_dim], + device="cpu").repeat(batch, 1, 1) + return self._set_position( + box_out.to(device), + masks.to(device), + conds.to(device)) + + +def load_gligen(sd): + sd_k = sd.keys() + output_list = [] + key_dim = 768 + for a in ["input_blocks", "middle_block", "output_blocks"]: + for b in range(20): + k_temp = filter(lambda k: "{}.{}.".format(a, b) + in k and ".fuser." in k, sd_k) + k_temp = map(lambda k: (k, k.split(".fuser.")[-1]), k_temp) + + n_sd = {} + for k in k_temp: + n_sd[k[1]] = sd[k[0]] + if len(n_sd) > 0: + query_dim = n_sd["linear.weight"].shape[0] + key_dim = n_sd["linear.weight"].shape[1] + + if key_dim == 768: # SD1.x + n_heads = 8 + d_head = query_dim // n_heads + else: + d_head = 64 + n_heads = query_dim // d_head + + gated = GatedSelfAttentionDense( + query_dim, key_dim, n_heads, d_head) + gated.load_state_dict(n_sd, strict=False) + output_list.append(gated) + + if "position_net.null_positive_feature" in sd_k: + in_dim = sd["position_net.null_positive_feature"].shape[0] + out_dim = sd["position_net.linears.4.weight"].shape[0] + + class WeightsLoader(torch.nn.Module): + pass + w = WeightsLoader() + w.position_net = PositionNet(in_dim, out_dim) + w.load_state_dict(sd, strict=False) + + gligen = Gligen(output_list, w.position_net, key_dim) + return gligen diff --git a/ldm_patched/modules/latent_formats.py b/ldm_patched/modules/latent_formats.py new file mode 100644 index 0000000000000000000000000000000000000000..fef90f2f5979990da0510ced56c25b6a542ffdc5 --- /dev/null +++ b/ldm_patched/modules/latent_formats.py @@ -0,0 +1,42 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +class LatentFormat: + scale_factor = 1.0 + latent_rgb_factors = None + taesd_decoder_name = None + + def process_in(self, latent): + return latent * self.scale_factor + + def process_out(self, latent): + return latent / self.scale_factor + +class SD15(LatentFormat): + def __init__(self, scale_factor=0.18215): + self.scale_factor = scale_factor + self.latent_rgb_factors = [ + # R G B + [ 0.3512, 0.2297, 0.3227], + [ 0.3250, 0.4974, 0.2350], + [-0.2829, 0.1762, 0.2721], + [-0.2120, -0.2616, -0.7177] + ] + self.taesd_decoder_name = "taesd_decoder" + +class SDXL(LatentFormat): + def __init__(self): + self.scale_factor = 0.13025 + self.latent_rgb_factors = [ + # R G B + [ 0.3920, 0.4054, 0.4549], + [-0.2634, -0.0196, 0.0653], + [ 0.0568, 0.1687, -0.0755], + [-0.3112, -0.2359, -0.2076] + ] + self.taesd_decoder_name = "taesdxl_decoder" + +class SD_X4(LatentFormat): + def __init__(self): + self.scale_factor = 0.08333 diff --git a/ldm_patched/modules/lora.py b/ldm_patched/modules/lora.py new file mode 100644 index 0000000000000000000000000000000000000000..7f74f119d7a73c8d4d0de9ce4ea813f7b4d679bf --- /dev/null +++ b/ldm_patched/modules/lora.py @@ -0,0 +1,226 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import ldm_patched.modules.utils + +LORA_CLIP_MAP = { + "mlp.fc1": "mlp_fc1", + "mlp.fc2": "mlp_fc2", + "self_attn.k_proj": "self_attn_k_proj", + "self_attn.q_proj": "self_attn_q_proj", + "self_attn.v_proj": "self_attn_v_proj", + "self_attn.out_proj": "self_attn_out_proj", +} + + +def load_lora(lora, to_load): + patch_dict = {} + loaded_keys = set() + for x in to_load: + alpha_name = "{}.alpha".format(x) + alpha = None + if alpha_name in lora.keys(): + alpha = lora[alpha_name].item() + loaded_keys.add(alpha_name) + + regular_lora = "{}.lora_up.weight".format(x) + diffusers_lora = "{}_lora.up.weight".format(x) + transformers_lora = "{}.lora_linear_layer.up.weight".format(x) + A_name = None + + if regular_lora in lora.keys(): + A_name = regular_lora + B_name = "{}.lora_down.weight".format(x) + mid_name = "{}.lora_mid.weight".format(x) + elif diffusers_lora in lora.keys(): + A_name = diffusers_lora + B_name = "{}_lora.down.weight".format(x) + mid_name = None + elif transformers_lora in lora.keys(): + A_name = transformers_lora + B_name ="{}.lora_linear_layer.down.weight".format(x) + mid_name = None + + if A_name is not None: + mid = None + if mid_name is not None and mid_name in lora.keys(): + mid = lora[mid_name] + loaded_keys.add(mid_name) + patch_dict[to_load[x]] = ("lora", (lora[A_name], lora[B_name], alpha, mid)) + loaded_keys.add(A_name) + loaded_keys.add(B_name) + + + ######## loha + hada_w1_a_name = "{}.hada_w1_a".format(x) + hada_w1_b_name = "{}.hada_w1_b".format(x) + hada_w2_a_name = "{}.hada_w2_a".format(x) + hada_w2_b_name = "{}.hada_w2_b".format(x) + hada_t1_name = "{}.hada_t1".format(x) + hada_t2_name = "{}.hada_t2".format(x) + if hada_w1_a_name in lora.keys(): + hada_t1 = None + hada_t2 = None + if hada_t1_name in lora.keys(): + hada_t1 = lora[hada_t1_name] + hada_t2 = lora[hada_t2_name] + loaded_keys.add(hada_t1_name) + loaded_keys.add(hada_t2_name) + + patch_dict[to_load[x]] = ("loha", (lora[hada_w1_a_name], lora[hada_w1_b_name], alpha, lora[hada_w2_a_name], lora[hada_w2_b_name], hada_t1, hada_t2)) + loaded_keys.add(hada_w1_a_name) + loaded_keys.add(hada_w1_b_name) + loaded_keys.add(hada_w2_a_name) + loaded_keys.add(hada_w2_b_name) + + + ######## lokr + lokr_w1_name = "{}.lokr_w1".format(x) + lokr_w2_name = "{}.lokr_w2".format(x) + lokr_w1_a_name = "{}.lokr_w1_a".format(x) + lokr_w1_b_name = "{}.lokr_w1_b".format(x) + lokr_t2_name = "{}.lokr_t2".format(x) + lokr_w2_a_name = "{}.lokr_w2_a".format(x) + lokr_w2_b_name = "{}.lokr_w2_b".format(x) + + lokr_w1 = None + if lokr_w1_name in lora.keys(): + lokr_w1 = lora[lokr_w1_name] + loaded_keys.add(lokr_w1_name) + + lokr_w2 = None + if lokr_w2_name in lora.keys(): + lokr_w2 = lora[lokr_w2_name] + loaded_keys.add(lokr_w2_name) + + lokr_w1_a = None + if lokr_w1_a_name in lora.keys(): + lokr_w1_a = lora[lokr_w1_a_name] + loaded_keys.add(lokr_w1_a_name) + + lokr_w1_b = None + if lokr_w1_b_name in lora.keys(): + lokr_w1_b = lora[lokr_w1_b_name] + loaded_keys.add(lokr_w1_b_name) + + lokr_w2_a = None + if lokr_w2_a_name in lora.keys(): + lokr_w2_a = lora[lokr_w2_a_name] + loaded_keys.add(lokr_w2_a_name) + + lokr_w2_b = None + if lokr_w2_b_name in lora.keys(): + lokr_w2_b = lora[lokr_w2_b_name] + loaded_keys.add(lokr_w2_b_name) + + lokr_t2 = None + if lokr_t2_name in lora.keys(): + lokr_t2 = lora[lokr_t2_name] + loaded_keys.add(lokr_t2_name) + + if (lokr_w1 is not None) or (lokr_w2 is not None) or (lokr_w1_a is not None) or (lokr_w2_a is not None): + patch_dict[to_load[x]] = ("lokr", (lokr_w1, lokr_w2, alpha, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t2)) + + #glora + a1_name = "{}.a1.weight".format(x) + a2_name = "{}.a2.weight".format(x) + b1_name = "{}.b1.weight".format(x) + b2_name = "{}.b2.weight".format(x) + if a1_name in lora: + patch_dict[to_load[x]] = ("glora", (lora[a1_name], lora[a2_name], lora[b1_name], lora[b2_name], alpha)) + loaded_keys.add(a1_name) + loaded_keys.add(a2_name) + loaded_keys.add(b1_name) + loaded_keys.add(b2_name) + + w_norm_name = "{}.w_norm".format(x) + b_norm_name = "{}.b_norm".format(x) + w_norm = lora.get(w_norm_name, None) + b_norm = lora.get(b_norm_name, None) + + if w_norm is not None: + loaded_keys.add(w_norm_name) + patch_dict[to_load[x]] = ("diff", (w_norm,)) + if b_norm is not None: + loaded_keys.add(b_norm_name) + patch_dict["{}.bias".format(to_load[x][:-len(".weight")])] = ("diff", (b_norm,)) + + diff_name = "{}.diff".format(x) + diff_weight = lora.get(diff_name, None) + if diff_weight is not None: + patch_dict[to_load[x]] = ("diff", (diff_weight,)) + loaded_keys.add(diff_name) + + diff_bias_name = "{}.diff_b".format(x) + diff_bias = lora.get(diff_bias_name, None) + if diff_bias is not None: + patch_dict["{}.bias".format(to_load[x][:-len(".weight")])] = ("diff", (diff_bias,)) + loaded_keys.add(diff_bias_name) + + remaining_dict = {x: y for x, y in lora.items() if x not in loaded_keys} + return patch_dict, remaining_dict + +def model_lora_keys_clip(model, key_map={}): + sdk = model.state_dict().keys() + + text_model_lora_key = "lora_te_text_model_encoder_layers_{}_{}" + clip_l_present = False + for b in range(32): #TODO: clean up + for c in LORA_CLIP_MAP: + k = "clip_h.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) + if k in sdk: + lora_key = text_model_lora_key.format(b, LORA_CLIP_MAP[c]) + key_map[lora_key] = k + lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) + key_map[lora_key] = k + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k + + k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) + if k in sdk: + lora_key = text_model_lora_key.format(b, LORA_CLIP_MAP[c]) + key_map[lora_key] = k + lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base + key_map[lora_key] = k + clip_l_present = True + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k + + k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) + if k in sdk: + if clip_l_present: + lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base + key_map[lora_key] = k + lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k + else: + lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner + key_map[lora_key] = k + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k + + return key_map + +def model_lora_keys_unet(model, key_map={}): + sdk = model.state_dict().keys() + + for k in sdk: + if k.startswith("diffusion_model.") and k.endswith(".weight"): + key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_") + key_map["lora_unet_{}".format(key_lora)] = k + + diffusers_keys = ldm_patched.modules.utils.unet_to_diffusers(model.model_config.unet_config) + for k in diffusers_keys: + if k.endswith(".weight"): + unet_key = "diffusion_model.{}".format(diffusers_keys[k]) + key_lora = k[:-len(".weight")].replace(".", "_") + key_map["lora_unet_{}".format(key_lora)] = unet_key + + diffusers_lora_prefix = ["", "unet."] + for p in diffusers_lora_prefix: + diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_")) + if diffusers_lora_key.endswith(".to_out.0"): + diffusers_lora_key = diffusers_lora_key[:-2] + key_map[diffusers_lora_key] = unet_key + return key_map diff --git a/ldm_patched/modules/model_base.py b/ldm_patched/modules/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..e847648cf17d6a1cc206e83567dbbd4d862f03f4 --- /dev/null +++ b/ldm_patched/modules/model_base.py @@ -0,0 +1,431 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +from ldm_patched.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep +from ldm_patched.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation +from ldm_patched.ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation +import ldm_patched.ldm.modules.attention +import ldm_patched.modules.model_management +import ldm_patched.modules.conds +import ldm_patched.modules.ops +from enum import Enum +from . import utils + +class ModelType(Enum): + EPS = 1 + V_PREDICTION = 2 + V_PREDICTION_EDM = 3 + + +from ldm_patched.modules.model_sampling import EPS, V_PREDICTION, ModelSamplingDiscrete, ModelSamplingContinuousEDM + + +def model_sampling(model_config, model_type): + s = ModelSamplingDiscrete + + if model_type == ModelType.EPS: + c = EPS + elif model_type == ModelType.V_PREDICTION: + c = V_PREDICTION + elif model_type == ModelType.V_PREDICTION_EDM: + c = V_PREDICTION + s = ModelSamplingContinuousEDM + + class ModelSampling(s, c): + pass + + return ModelSampling(model_config) + + +class BaseModel(torch.nn.Module): + def __init__(self, model_config, model_type=ModelType.EPS, device=None): + super().__init__() + + unet_config = model_config.unet_config + self.latent_format = model_config.latent_format + self.model_config = model_config + self.manual_cast_dtype = model_config.manual_cast_dtype + + if not unet_config.get("disable_unet_model_creation", False): + if self.manual_cast_dtype is not None: + operations = ldm_patched.modules.ops.manual_cast + else: + operations = ldm_patched.modules.ops.disable_weight_init + self.diffusion_model = UNetModel(**unet_config, device=device, operations=operations) + self.model_type = model_type + self.model_sampling = model_sampling(model_config, model_type) + + self.adm_channels = unet_config.get("adm_in_channels", None) + if self.adm_channels is None: + self.adm_channels = 0 + self.inpaint_model = False + print("model_type", model_type.name) + print("UNet ADM Dimension", self.adm_channels) + + def apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs): + sigma = t + xc = self.model_sampling.calculate_input(sigma, x) + if c_concat is not None: + xc = torch.cat([xc] + [c_concat], dim=1) + + context = c_crossattn + dtype = self.get_dtype() + + if self.manual_cast_dtype is not None: + dtype = self.manual_cast_dtype + + xc = xc.to(dtype) + t = self.model_sampling.timestep(t).float() + context = context.to(dtype) + extra_conds = {} + for o in kwargs: + extra = kwargs[o] + if hasattr(extra, "dtype"): + if extra.dtype != torch.int and extra.dtype != torch.long: + extra = extra.to(dtype) + extra_conds[o] = extra + + model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds).float() + return self.model_sampling.calculate_denoised(sigma, model_output, x) + + def get_dtype(self): + return self.diffusion_model.dtype + + def is_adm(self): + return self.adm_channels > 0 + + def encode_adm(self, **kwargs): + return None + + def extra_conds(self, **kwargs): + out = {} + if self.inpaint_model: + concat_keys = ("mask", "masked_image") + cond_concat = [] + denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None)) + concat_latent_image = kwargs.get("concat_latent_image", None) + if concat_latent_image is None: + concat_latent_image = kwargs.get("latent_image", None) + else: + concat_latent_image = self.process_latent_in(concat_latent_image) + + noise = kwargs.get("noise", None) + device = kwargs["device"] + + if concat_latent_image.shape[1:] != noise.shape[1:]: + concat_latent_image = utils.common_upscale(concat_latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center") + + concat_latent_image = utils.resize_to_batch_size(concat_latent_image, noise.shape[0]) + + if len(denoise_mask.shape) == len(noise.shape): + denoise_mask = denoise_mask[:,:1] + + denoise_mask = denoise_mask.reshape((-1, 1, denoise_mask.shape[-2], denoise_mask.shape[-1])) + if denoise_mask.shape[-2:] != noise.shape[-2:]: + denoise_mask = utils.common_upscale(denoise_mask, noise.shape[-1], noise.shape[-2], "bilinear", "center") + denoise_mask = utils.resize_to_batch_size(denoise_mask.round(), noise.shape[0]) + + def blank_inpaint_image_like(latent_image): + blank_image = torch.ones_like(latent_image) + # these are the values for "zero" in pixel space translated to latent space + blank_image[:,0] *= 0.8223 + blank_image[:,1] *= -0.6876 + blank_image[:,2] *= 0.6364 + blank_image[:,3] *= 0.1380 + return blank_image + + for ck in concat_keys: + if denoise_mask is not None: + if ck == "mask": + cond_concat.append(denoise_mask.to(device)) + elif ck == "masked_image": + cond_concat.append(concat_latent_image.to(device)) #NOTE: the latent_image should be masked by the mask in pixel space + else: + if ck == "mask": + cond_concat.append(torch.ones_like(noise)[:,:1]) + elif ck == "masked_image": + cond_concat.append(blank_inpaint_image_like(noise)) + data = torch.cat(cond_concat, dim=1) + out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(data) + + adm = self.encode_adm(**kwargs) + if adm is not None: + out['y'] = ldm_patched.modules.conds.CONDRegular(adm) + + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn) + + return out + + def load_model_weights(self, sd, unet_prefix=""): + to_load = {} + keys = list(sd.keys()) + for k in keys: + if k.startswith(unet_prefix): + to_load[k[len(unet_prefix):]] = sd.pop(k) + + to_load = self.model_config.process_unet_state_dict(to_load) + m, u = self.diffusion_model.load_state_dict(to_load, strict=False) + if len(m) > 0: + print("unet missing:", m) + + if len(u) > 0: + print("unet unexpected:", u) + del to_load + return self + + def process_latent_in(self, latent): + return self.latent_format.process_in(latent) + + def process_latent_out(self, latent): + return self.latent_format.process_out(latent) + + def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None): + extra_sds = [] + if clip_state_dict is not None: + extra_sds.append(self.model_config.process_clip_state_dict_for_saving(clip_state_dict)) + if vae_state_dict is not None: + extra_sds.append(self.model_config.process_vae_state_dict_for_saving(vae_state_dict)) + if clip_vision_state_dict is not None: + extra_sds.append(self.model_config.process_clip_vision_state_dict_for_saving(clip_vision_state_dict)) + + unet_state_dict = self.diffusion_model.state_dict() + unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict) + + if self.get_dtype() == torch.float16: + extra_sds = map(lambda sd: utils.convert_sd_to(sd, torch.float16), extra_sds) + + if self.model_type == ModelType.V_PREDICTION: + unet_state_dict["v_pred"] = torch.tensor([]) + + for sd in extra_sds: + unet_state_dict.update(sd) + + return unet_state_dict + + def set_inpaint(self): + self.inpaint_model = True + + def memory_required(self, input_shape): + area = input_shape[0] * input_shape[2] * input_shape[3] + dtype = self.manual_cast_dtype if self.manual_cast_dtype is not None else self.get_dtype() + dtype_size = ldm_patched.modules.model_management.dtype_size(dtype) + + if ldm_patched.modules.model_management.xformers_enabled() or ldm_patched.modules.model_management.pytorch_attention_flash_attention(): + scaler = 1.28 + else: + scaler = 1.65 + if ldm_patched.ldm.modules.attention._ATTN_PRECISION == "fp32": + dtype_size = 4 + + return scaler * area * dtype_size * 16384 + + +def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None): + adm_inputs = [] + weights = [] + noise_aug = [] + for unclip_cond in unclip_conditioning: + for adm_cond in unclip_cond["clip_vision_output"].image_embeds: + weight = unclip_cond["strength"] + noise_augment = unclip_cond["noise_augmentation"] + noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment) + c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device), seed=seed) + adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight + weights.append(weight) + noise_aug.append(noise_augment) + adm_inputs.append(adm_out) + + if len(noise_aug) > 1: + adm_out = torch.stack(adm_inputs).sum(0) + noise_augment = noise_augment_merge + noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment) + c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device)) + adm_out = torch.cat((c_adm, noise_level_emb), 1) + + return adm_out + +class SD21UNCLIP(BaseModel): + def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None): + super().__init__(model_config, model_type, device=device) + self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**noise_aug_config) + + def encode_adm(self, **kwargs): + unclip_conditioning = kwargs.get("unclip_conditioning", None) + device = kwargs["device"] + if unclip_conditioning is None: + return torch.zeros((1, self.adm_channels)) + else: + return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05), kwargs.get("seed", 0) - 10) + +def sdxl_pooled(args, noise_augmentor): + if "unclip_conditioning" in args: + return unclip_adm(args.get("unclip_conditioning", None), args["device"], noise_augmentor, seed=args.get("seed", 0) - 10)[:,:1280] + else: + return args["pooled_output"] + +class SDXLRefiner(BaseModel): + def __init__(self, model_config, model_type=ModelType.EPS, device=None): + super().__init__(model_config, model_type, device=device) + self.embedder = Timestep(256) + self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280}) + + def encode_adm(self, **kwargs): + clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor) + width = kwargs.get("width", 768) + height = kwargs.get("height", 768) + crop_w = kwargs.get("crop_w", 0) + crop_h = kwargs.get("crop_h", 0) + + if kwargs.get("prompt_type", "") == "negative": + aesthetic_score = kwargs.get("aesthetic_score", 2.5) + else: + aesthetic_score = kwargs.get("aesthetic_score", 6) + + out = [] + out.append(self.embedder(torch.Tensor([height]))) + out.append(self.embedder(torch.Tensor([width]))) + out.append(self.embedder(torch.Tensor([crop_h]))) + out.append(self.embedder(torch.Tensor([crop_w]))) + out.append(self.embedder(torch.Tensor([aesthetic_score]))) + flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) + return torch.cat((clip_pooled.to(flat.device), flat), dim=1) + +class SDXL(BaseModel): + def __init__(self, model_config, model_type=ModelType.EPS, device=None): + super().__init__(model_config, model_type, device=device) + self.embedder = Timestep(256) + self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280}) + + def encode_adm(self, **kwargs): + clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor) + width = kwargs.get("width", 768) + height = kwargs.get("height", 768) + crop_w = kwargs.get("crop_w", 0) + crop_h = kwargs.get("crop_h", 0) + target_width = kwargs.get("target_width", width) + target_height = kwargs.get("target_height", height) + + out = [] + out.append(self.embedder(torch.Tensor([height]))) + out.append(self.embedder(torch.Tensor([width]))) + out.append(self.embedder(torch.Tensor([crop_h]))) + out.append(self.embedder(torch.Tensor([crop_w]))) + out.append(self.embedder(torch.Tensor([target_height]))) + out.append(self.embedder(torch.Tensor([target_width]))) + flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) + return torch.cat((clip_pooled.to(flat.device), flat), dim=1) + +class SVD_img2vid(BaseModel): + def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None): + super().__init__(model_config, model_type, device=device) + self.embedder = Timestep(256) + + def encode_adm(self, **kwargs): + fps_id = kwargs.get("fps", 6) - 1 + motion_bucket_id = kwargs.get("motion_bucket_id", 127) + augmentation = kwargs.get("augmentation_level", 0) + + out = [] + out.append(self.embedder(torch.Tensor([fps_id]))) + out.append(self.embedder(torch.Tensor([motion_bucket_id]))) + out.append(self.embedder(torch.Tensor([augmentation]))) + + flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0) + return flat + + def extra_conds(self, **kwargs): + out = {} + adm = self.encode_adm(**kwargs) + if adm is not None: + out['y'] = ldm_patched.modules.conds.CONDRegular(adm) + + latent_image = kwargs.get("concat_latent_image", None) + noise = kwargs.get("noise", None) + device = kwargs["device"] + + if latent_image is None: + latent_image = torch.zeros_like(noise) + + if latent_image.shape[1:] != noise.shape[1:]: + latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center") + + latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0]) + + out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image) + + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn) + + if "time_conditioning" in kwargs: + out["time_context"] = ldm_patched.modules.conds.CONDCrossAttn(kwargs["time_conditioning"]) + + out['image_only_indicator'] = ldm_patched.modules.conds.CONDConstant(torch.zeros((1,), device=device)) + out['num_video_frames'] = ldm_patched.modules.conds.CONDConstant(noise.shape[0]) + return out + +class Stable_Zero123(BaseModel): + def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None): + super().__init__(model_config, model_type, device=device) + self.cc_projection = ldm_patched.modules.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device) + self.cc_projection.weight.copy_(cc_projection_weight) + self.cc_projection.bias.copy_(cc_projection_bias) + + def extra_conds(self, **kwargs): + out = {} + + latent_image = kwargs.get("concat_latent_image", None) + noise = kwargs.get("noise", None) + + if latent_image is None: + latent_image = torch.zeros_like(noise) + + if latent_image.shape[1:] != noise.shape[1:]: + latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center") + + latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0]) + + out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image) + + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + if cross_attn.shape[-1] != 768: + cross_attn = self.cc_projection(cross_attn) + out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn) + return out + +class SD_X4Upscaler(BaseModel): + def __init__(self, model_config, model_type=ModelType.V_PREDICTION, device=None): + super().__init__(model_config, model_type, device=device) + self.noise_augmentor = ImageConcatWithNoiseAugmentation(noise_schedule_config={"linear_start": 0.0001, "linear_end": 0.02}, max_noise_level=350) + + def extra_conds(self, **kwargs): + out = {} + + image = kwargs.get("concat_image", None) + noise = kwargs.get("noise", None) + noise_augment = kwargs.get("noise_augmentation", 0.0) + device = kwargs["device"] + seed = kwargs["seed"] - 10 + + noise_level = round((self.noise_augmentor.max_noise_level) * noise_augment) + + if image is None: + image = torch.zeros_like(noise)[:,:3] + + if image.shape[1:] != noise.shape[1:]: + image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center") + + noise_level = torch.tensor([noise_level], device=device) + if noise_augment > 0: + image, noise_level = self.noise_augmentor(image.to(device), noise_level=noise_level, seed=seed) + + image = utils.resize_to_batch_size(image, noise.shape[0]) + + out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(image) + out['y'] = ldm_patched.modules.conds.CONDRegular(noise_level) + return out diff --git a/ldm_patched/modules/model_detection.py b/ldm_patched/modules/model_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..3fa40cbb52403a07140ca1a4728879174c93369a --- /dev/null +++ b/ldm_patched/modules/model_detection.py @@ -0,0 +1,324 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import ldm_patched.modules.supported_models +import ldm_patched.modules.supported_models_base + +def count_blocks(state_dict_keys, prefix_string): + count = 0 + while True: + c = False + for k in state_dict_keys: + if k.startswith(prefix_string.format(count)): + c = True + break + if c == False: + break + count += 1 + return count + +def calculate_transformer_depth(prefix, state_dict_keys, state_dict): + context_dim = None + use_linear_in_transformer = False + + transformer_prefix = prefix + "1.transformer_blocks." + transformer_keys = sorted(list(filter(lambda a: a.startswith(transformer_prefix), state_dict_keys))) + if len(transformer_keys) > 0: + last_transformer_depth = count_blocks(state_dict_keys, transformer_prefix + '{}') + context_dim = state_dict['{}0.attn2.to_k.weight'.format(transformer_prefix)].shape[1] + use_linear_in_transformer = len(state_dict['{}1.proj_in.weight'.format(prefix)].shape) == 2 + time_stack = '{}1.time_stack.0.attn1.to_q.weight'.format(prefix) in state_dict or '{}1.time_mix_blocks.0.attn1.to_q.weight'.format(prefix) in state_dict + return last_transformer_depth, context_dim, use_linear_in_transformer, time_stack + return None + +def detect_unet_config(state_dict, key_prefix, dtype): + state_dict_keys = list(state_dict.keys()) + + unet_config = { + "use_checkpoint": False, + "image_size": 32, + "use_spatial_transformer": True, + "legacy": False + } + + y_input = '{}label_emb.0.0.weight'.format(key_prefix) + if y_input in state_dict_keys: + unet_config["num_classes"] = "sequential" + unet_config["adm_in_channels"] = state_dict[y_input].shape[1] + else: + unet_config["adm_in_channels"] = None + + unet_config["dtype"] = dtype + model_channels = state_dict['{}input_blocks.0.0.weight'.format(key_prefix)].shape[0] + in_channels = state_dict['{}input_blocks.0.0.weight'.format(key_prefix)].shape[1] + + out_key = '{}out.2.weight'.format(key_prefix) + if out_key in state_dict: + out_channels = state_dict[out_key].shape[0] + else: + out_channels = 4 + + num_res_blocks = [] + channel_mult = [] + attention_resolutions = [] + transformer_depth = [] + transformer_depth_output = [] + context_dim = None + use_linear_in_transformer = False + + video_model = False + + current_res = 1 + count = 0 + + last_res_blocks = 0 + last_channel_mult = 0 + + input_block_count = count_blocks(state_dict_keys, '{}input_blocks'.format(key_prefix) + '.{}.') + for count in range(input_block_count): + prefix = '{}input_blocks.{}.'.format(key_prefix, count) + prefix_output = '{}output_blocks.{}.'.format(key_prefix, input_block_count - count - 1) + + block_keys = sorted(list(filter(lambda a: a.startswith(prefix), state_dict_keys))) + if len(block_keys) == 0: + break + + block_keys_output = sorted(list(filter(lambda a: a.startswith(prefix_output), state_dict_keys))) + + if "{}0.op.weight".format(prefix) in block_keys: #new layer + num_res_blocks.append(last_res_blocks) + channel_mult.append(last_channel_mult) + + current_res *= 2 + last_res_blocks = 0 + last_channel_mult = 0 + out = calculate_transformer_depth(prefix_output, state_dict_keys, state_dict) + if out is not None: + transformer_depth_output.append(out[0]) + else: + transformer_depth_output.append(0) + else: + res_block_prefix = "{}0.in_layers.0.weight".format(prefix) + if res_block_prefix in block_keys: + last_res_blocks += 1 + last_channel_mult = state_dict["{}0.out_layers.3.weight".format(prefix)].shape[0] // model_channels + + out = calculate_transformer_depth(prefix, state_dict_keys, state_dict) + if out is not None: + transformer_depth.append(out[0]) + if context_dim is None: + context_dim = out[1] + use_linear_in_transformer = out[2] + video_model = out[3] + else: + transformer_depth.append(0) + + res_block_prefix = "{}0.in_layers.0.weight".format(prefix_output) + if res_block_prefix in block_keys_output: + out = calculate_transformer_depth(prefix_output, state_dict_keys, state_dict) + if out is not None: + transformer_depth_output.append(out[0]) + else: + transformer_depth_output.append(0) + + + num_res_blocks.append(last_res_blocks) + channel_mult.append(last_channel_mult) + if "{}middle_block.1.proj_in.weight".format(key_prefix) in state_dict_keys: + transformer_depth_middle = count_blocks(state_dict_keys, '{}middle_block.1.transformer_blocks.'.format(key_prefix) + '{}') + else: + transformer_depth_middle = -1 + + unet_config["in_channels"] = in_channels + unet_config["out_channels"] = out_channels + unet_config["model_channels"] = model_channels + unet_config["num_res_blocks"] = num_res_blocks + unet_config["transformer_depth"] = transformer_depth + unet_config["transformer_depth_output"] = transformer_depth_output + unet_config["channel_mult"] = channel_mult + unet_config["transformer_depth_middle"] = transformer_depth_middle + unet_config['use_linear_in_transformer'] = use_linear_in_transformer + unet_config["context_dim"] = context_dim + + if video_model: + unet_config["extra_ff_mix_layer"] = True + unet_config["use_spatial_context"] = True + unet_config["merge_strategy"] = "learned_with_images" + unet_config["merge_factor"] = 0.0 + unet_config["video_kernel_size"] = [3, 1, 1] + unet_config["use_temporal_resblock"] = True + unet_config["use_temporal_attention"] = True + else: + unet_config["use_temporal_resblock"] = False + unet_config["use_temporal_attention"] = False + + return unet_config + +def model_config_from_unet_config(unet_config): + for model_config in ldm_patched.modules.supported_models.models: + if model_config.matches(unet_config): + return model_config(unet_config) + + print("no match", unet_config) + return None + +def model_config_from_unet(state_dict, unet_key_prefix, dtype, use_base_if_no_match=False): + unet_config = detect_unet_config(state_dict, unet_key_prefix, dtype) + model_config = model_config_from_unet_config(unet_config) + if model_config is None and use_base_if_no_match: + return ldm_patched.modules.supported_models_base.BASE(unet_config) + else: + return model_config + +def convert_config(unet_config): + new_config = unet_config.copy() + num_res_blocks = new_config.get("num_res_blocks", None) + channel_mult = new_config.get("channel_mult", None) + + if isinstance(num_res_blocks, int): + num_res_blocks = len(channel_mult) * [num_res_blocks] + + if "attention_resolutions" in new_config: + attention_resolutions = new_config.pop("attention_resolutions") + transformer_depth = new_config.get("transformer_depth", None) + transformer_depth_middle = new_config.get("transformer_depth_middle", None) + + if isinstance(transformer_depth, int): + transformer_depth = len(channel_mult) * [transformer_depth] + if transformer_depth_middle is None: + transformer_depth_middle = transformer_depth[-1] + t_in = [] + t_out = [] + s = 1 + for i in range(len(num_res_blocks)): + res = num_res_blocks[i] + d = 0 + if s in attention_resolutions: + d = transformer_depth[i] + + t_in += [d] * res + t_out += [d] * (res + 1) + s *= 2 + transformer_depth = t_in + transformer_depth_output = t_out + new_config["transformer_depth"] = t_in + new_config["transformer_depth_output"] = t_out + new_config["transformer_depth_middle"] = transformer_depth_middle + + new_config["num_res_blocks"] = num_res_blocks + return new_config + + +def unet_config_from_diffusers_unet(state_dict, dtype): + match = {} + transformer_depth = [] + + attn_res = 1 + down_blocks = count_blocks(state_dict, "down_blocks.{}") + for i in range(down_blocks): + attn_blocks = count_blocks(state_dict, "down_blocks.{}.attentions.".format(i) + '{}') + for ab in range(attn_blocks): + transformer_count = count_blocks(state_dict, "down_blocks.{}.attentions.{}.transformer_blocks.".format(i, ab) + '{}') + transformer_depth.append(transformer_count) + if transformer_count > 0: + match["context_dim"] = state_dict["down_blocks.{}.attentions.{}.transformer_blocks.0.attn2.to_k.weight".format(i, ab)].shape[1] + + attn_res *= 2 + if attn_blocks == 0: + transformer_depth.append(0) + transformer_depth.append(0) + + match["transformer_depth"] = transformer_depth + + match["model_channels"] = state_dict["conv_in.weight"].shape[0] + match["in_channels"] = state_dict["conv_in.weight"].shape[1] + match["adm_in_channels"] = None + if "class_embedding.linear_1.weight" in state_dict: + match["adm_in_channels"] = state_dict["class_embedding.linear_1.weight"].shape[1] + elif "add_embedding.linear_1.weight" in state_dict: + match["adm_in_channels"] = state_dict["add_embedding.linear_1.weight"].shape[1] + + SDXL = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 2, 2, 10, 10], 'channel_mult': [1, 2, 4], 'transformer_depth_middle': 10, + 'use_linear_in_transformer': True, 'context_dim': 2048, 'num_head_channels': 64, 'transformer_depth_output': [0, 0, 0, 2, 2, 2, 10, 10, 10], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SDXL_refiner = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2560, 'dtype': dtype, 'in_channels': 4, 'model_channels': 384, + 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [0, 0, 4, 4, 4, 4, 0, 0], 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 4, + 'use_linear_in_transformer': True, 'context_dim': 1280, 'num_head_channels': 64, 'transformer_depth_output': [0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SD21 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'adm_in_channels': None, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], + 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, + 'context_dim': 1024, 'num_head_channels': 64, 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SD21_uncliph = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2048, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, + 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_head_channels': 64, 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SD21_unclipl = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 1536, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, + 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_head_channels': 64, 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SD15 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, 'adm_in_channels': None, + 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], + 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768, 'num_heads': 8, + 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SDXL_mid_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 0, 0, 1, 1], 'channel_mult': [1, 2, 4], 'transformer_depth_middle': 1, + 'use_linear_in_transformer': True, 'context_dim': 2048, 'num_head_channels': 64, 'transformer_depth_output': [0, 0, 0, 0, 0, 0, 1, 1, 1], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SDXL_small_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 0, 0, 0, 0], 'channel_mult': [1, 2, 4], 'transformer_depth_middle': 0, + 'use_linear_in_transformer': True, 'num_head_channels': 64, 'context_dim': 1, 'transformer_depth_output': [0, 0, 0, 0, 0, 0, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SDXL_diffusers_inpaint = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 9, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 2, 2, 10, 10], 'channel_mult': [1, 2, 4], 'transformer_depth_middle': 10, + 'use_linear_in_transformer': True, 'context_dim': 2048, 'num_head_channels': 64, 'transformer_depth_output': [0, 0, 0, 2, 2, 2, 10, 10, 10], + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + SSD_1B = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 2, 2, 4, 4], 'transformer_depth_output': [0, 0, 0, 1, 1, 2, 10, 4, 4], + 'channel_mult': [1, 2, 4], 'transformer_depth_middle': -1, 'use_linear_in_transformer': True, 'context_dim': 2048, 'num_head_channels': 64, + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + Segmind_Vega = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, + 'num_classes': 'sequential', 'adm_in_channels': 2816, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, + 'num_res_blocks': [2, 2, 2], 'transformer_depth': [0, 0, 1, 1, 2, 2], 'transformer_depth_output': [0, 0, 0, 1, 1, 1, 2, 2, 2], + 'channel_mult': [1, 2, 4], 'transformer_depth_middle': -1, 'use_linear_in_transformer': True, 'context_dim': 2048, 'num_head_channels': 64, + 'use_temporal_attention': False, 'use_temporal_resblock': False} + + supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mid_cnet, SDXL_small_cnet, SDXL_diffusers_inpaint, SSD_1B, Segmind_Vega] + + for unet_config in supported_models: + matches = True + for k in match: + if match[k] != unet_config[k]: + matches = False + break + if matches: + return convert_config(unet_config) + return None + +def model_config_from_diffusers_unet(state_dict, dtype): + unet_config = unet_config_from_diffusers_unet(state_dict, dtype) + if unet_config is not None: + return model_config_from_unet_config(unet_config) + return None diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py new file mode 100644 index 0000000000000000000000000000000000000000..0fb26eb9e8b9b1058b7542e574fef6a258bdeb60 --- /dev/null +++ b/ldm_patched/modules/model_management.py @@ -0,0 +1,869 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import time +import psutil +from enum import Enum +from ldm_patched.modules.args_parser import args +from modules_forge import stream +import ldm_patched.modules.utils +import torch +import sys + +class VRAMState(Enum): + DISABLED = 0 #No vram present: no need to move models to vram + NO_VRAM = 1 #Very low vram: enable all the options to save vram + LOW_VRAM = 2 + NORMAL_VRAM = 3 + HIGH_VRAM = 4 + SHARED = 5 #No dedicated vram: memory shared between CPU and GPU but models still need to be moved between both. + +class CPUState(Enum): + GPU = 0 + CPU = 1 + MPS = 2 + +# Determine VRAM State +vram_state = VRAMState.NORMAL_VRAM +set_vram_to = VRAMState.NORMAL_VRAM +cpu_state = CPUState.GPU + +total_vram = 0 + +lowvram_available = True +xpu_available = False + +if args.pytorch_deterministic: + print("Using deterministic algorithms for pytorch") + torch.use_deterministic_algorithms(True, warn_only=True) + +directml_enabled = False +if args.directml is not None: + import torch_directml + directml_enabled = True + device_index = args.directml + if device_index < 0: + directml_device = torch_directml.device() + else: + directml_device = torch_directml.device(device_index) + print("Using directml with device:", torch_directml.device_name(device_index)) + +try: + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True +except: + pass + +try: + if torch.backends.mps.is_available(): + cpu_state = CPUState.MPS + import torch.mps +except: + pass + +if args.always_cpu: + cpu_state = CPUState.CPU + +def is_intel_xpu(): + global cpu_state + global xpu_available + if cpu_state == CPUState.GPU: + if xpu_available: + return True + return False + +def get_torch_device(): + global directml_enabled + global cpu_state + if directml_enabled: + global directml_device + return directml_device + if cpu_state == CPUState.MPS: + return torch.device("mps") + if cpu_state == CPUState.CPU: + return torch.device("cpu") + else: + if is_intel_xpu(): + return torch.device("xpu") + else: + return torch.device(torch.cuda.current_device()) + +def get_total_memory(dev=None, torch_total_too=False): + global directml_enabled + if dev is None: + dev = get_torch_device() + + if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): + mem_total = psutil.virtual_memory().total + mem_total_torch = mem_total + else: + if directml_enabled: + mem_total = 1024 * 1024 * 1024 #TODO + mem_total_torch = mem_total + elif is_intel_xpu(): + stats = torch.xpu.memory_stats(dev) + mem_reserved = stats['reserved_bytes.all.current'] + mem_total = torch.xpu.get_device_properties(dev).total_memory + mem_total_torch = mem_reserved + else: + stats = torch.cuda.memory_stats(dev) + mem_reserved = stats['reserved_bytes.all.current'] + _, mem_total_cuda = torch.cuda.mem_get_info(dev) + mem_total_torch = mem_reserved + mem_total = mem_total_cuda + + if torch_total_too: + return (mem_total, mem_total_torch) + else: + return mem_total + +total_vram = get_total_memory(get_torch_device()) / (1024 * 1024) +total_ram = psutil.virtual_memory().total / (1024 * 1024) +print("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram)) +if not args.always_normal_vram and not args.always_cpu: + if lowvram_available and total_vram <= 4096: + print("Trying to enable lowvram mode because your GPU seems to have 4GB or less. If you don't want this use: --always-normal-vram") + set_vram_to = VRAMState.LOW_VRAM + +try: + OOM_EXCEPTION = torch.cuda.OutOfMemoryError +except: + OOM_EXCEPTION = Exception + +if directml_enabled: + OOM_EXCEPTION = Exception + +XFORMERS_VERSION = "" +XFORMERS_ENABLED_VAE = True +if args.disable_xformers: + XFORMERS_IS_AVAILABLE = False +else: + try: + import xformers + import xformers.ops + XFORMERS_IS_AVAILABLE = True + try: + XFORMERS_IS_AVAILABLE = xformers._has_cpp_library + except: + pass + try: + XFORMERS_VERSION = xformers.version.__version__ + print("xformers version:", XFORMERS_VERSION) + if XFORMERS_VERSION.startswith("0.0.18"): + print() + print("WARNING: This version of xformers has a major bug where you will get black images when generating high resolution images.") + print("Please downgrade or upgrade xformers to a different version.") + print() + XFORMERS_ENABLED_VAE = False + except: + pass + except: + XFORMERS_IS_AVAILABLE = False + +def is_nvidia(): + global cpu_state + if cpu_state == CPUState.GPU: + if torch.version.cuda: + return True + return False + +ENABLE_PYTORCH_ATTENTION = False +if args.attention_pytorch: + ENABLE_PYTORCH_ATTENTION = True + XFORMERS_IS_AVAILABLE = False + +VAE_DTYPE = torch.float32 + +try: + if is_nvidia(): + torch_version = torch.version.__version__ + if int(torch_version[0]) >= 2: + if ENABLE_PYTORCH_ATTENTION == False and args.attention_split == False and args.attention_quad == False: + ENABLE_PYTORCH_ATTENTION = True + if torch.cuda.is_bf16_supported() and torch.cuda.get_device_properties(torch.cuda.current_device()).major >= 8: + VAE_DTYPE = torch.bfloat16 + if is_intel_xpu(): + if args.attention_split == False and args.attention_quad == False: + ENABLE_PYTORCH_ATTENTION = True +except: + pass + +if is_intel_xpu(): + VAE_DTYPE = torch.bfloat16 + +if args.vae_in_cpu: + VAE_DTYPE = torch.float32 + +if args.vae_in_fp16: + VAE_DTYPE = torch.float16 +elif args.vae_in_bf16: + VAE_DTYPE = torch.bfloat16 +elif args.vae_in_fp32: + VAE_DTYPE = torch.float32 + + +VAE_ALWAYS_TILED = False + + +if ENABLE_PYTORCH_ATTENTION: + torch.backends.cuda.enable_math_sdp(True) + torch.backends.cuda.enable_flash_sdp(True) + torch.backends.cuda.enable_mem_efficient_sdp(True) + +if args.always_low_vram: + set_vram_to = VRAMState.LOW_VRAM + lowvram_available = True +elif args.always_no_vram: + set_vram_to = VRAMState.NO_VRAM +elif args.always_high_vram or args.always_gpu: + vram_state = VRAMState.HIGH_VRAM + +FORCE_FP32 = False +FORCE_FP16 = False +if args.all_in_fp32: + print("Forcing FP32, if this improves things please report it.") + FORCE_FP32 = True + +if args.all_in_fp16: + print("Forcing FP16.") + FORCE_FP16 = True + +if lowvram_available: + if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM): + vram_state = set_vram_to + + +if cpu_state != CPUState.GPU: + vram_state = VRAMState.DISABLED + +if cpu_state == CPUState.MPS: + vram_state = VRAMState.SHARED + +print(f"Set vram state to: {vram_state.name}") + +ALWAYS_VRAM_OFFLOAD = args.always_offload_from_vram + +if ALWAYS_VRAM_OFFLOAD: + print("Always offload VRAM") + +PIN_SHARED_MEMORY = args.pin_shared_memory + +if PIN_SHARED_MEMORY: + print("Always pin shared GPU memory") + + +def get_torch_device_name(device): + if hasattr(device, 'type'): + if device.type == "cuda": + try: + allocator_backend = torch.cuda.get_allocator_backend() + except: + allocator_backend = "" + return "{} {} : {}".format(device, torch.cuda.get_device_name(device), allocator_backend) + else: + return "{}".format(device.type) + elif is_intel_xpu(): + return "{} {}".format(device, torch.xpu.get_device_name(device)) + else: + return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device)) + +try: + torch_device_name = get_torch_device_name(get_torch_device()) + print("Device:", torch_device_name) +except: + torch_device_name = '' + print("Could not pick default device.") + +if 'rtx' in torch_device_name.lower(): + if not args.pin_shared_memory: + print('Hint: your device supports --pin-shared-memory for potential speed improvements.') + if not args.cuda_malloc: + print('Hint: your device supports --cuda-malloc for potential speed improvements.') + if not args.cuda_stream: + print('Hint: your device supports --cuda-stream for potential speed improvements.') + +print("VAE dtype:", VAE_DTYPE) + +current_loaded_models = [] + +def module_size(module, exclude_device=None): + module_mem = 0 + sd = module.state_dict() + for k in sd: + t = sd[k] + + if exclude_device is not None: + if t.device == exclude_device: + continue + + module_mem += t.nelement() * t.element_size() + return module_mem + +class LoadedModel: + def __init__(self, model, memory_required): + self.model = model + self.memory_required = memory_required + self.model_accelerated = False + self.device = model.load_device + + def model_memory(self): + return self.model.model_size() + + def model_memory_required(self, device): + return module_size(self.model.model, exclude_device=device) + + def model_load(self, async_kept_memory=-1): + patch_model_to = None + disable_async_load = async_kept_memory < 0 + + if disable_async_load: + patch_model_to = self.device + + self.model.model_patches_to(self.device) + self.model.model_patches_to(self.model.model_dtype()) + + try: + self.real_model = self.model.patch_model(device_to=patch_model_to) #TODO: do something with loras and offloading to CPU + except Exception as e: + self.model.unpatch_model(self.model.offload_device) + self.model_unload() + raise e + + if not disable_async_load: + flag = 'ASYNC' if stream.using_stream else 'SYNC' + print(f"[Memory Management] Requested {flag} Preserved Memory (MB) = ", async_kept_memory / (1024 * 1024)) + real_async_memory = 0 + mem_counter = 0 + for m in self.real_model.modules(): + if hasattr(m, "ldm_patched_cast_weights"): + m.prev_ldm_patched_cast_weights = m.ldm_patched_cast_weights + m.ldm_patched_cast_weights = True + module_mem = module_size(m) + if mem_counter + module_mem < async_kept_memory: + m.to(self.device) + mem_counter += module_mem + else: + real_async_memory += module_mem + m.to(self.model.offload_device) + if PIN_SHARED_MEMORY and is_device_cpu(self.model.offload_device): + m._apply(lambda x: x.pin_memory()) + elif hasattr(m, "weight"): + m.to(self.device) + mem_counter += module_size(m) + print(f"[Memory Management] {flag} Loader Disabled for ", m) + print(f"[Memory Management] Parameters Loaded to {flag} Stream (MB) = ", real_async_memory / (1024 * 1024)) + print(f"[Memory Management] Parameters Loaded to GPU (MB) = ", mem_counter / (1024 * 1024)) + + self.model_accelerated = True + + if is_intel_xpu() and not args.disable_ipex_hijack: + self.real_model = torch.xpu.optimize(self.real_model.eval(), inplace=True, auto_kernel_selection=True, graph_mode=True) + + return self.real_model + + def model_unload(self, avoid_model_moving=False): + if self.model_accelerated: + for m in self.real_model.modules(): + if hasattr(m, "prev_ldm_patched_cast_weights"): + m.ldm_patched_cast_weights = m.prev_ldm_patched_cast_weights + del m.prev_ldm_patched_cast_weights + + self.model_accelerated = False + + if avoid_model_moving: + self.model.unpatch_model() + else: + self.model.unpatch_model(self.model.offload_device) + self.model.model_patches_to(self.model.offload_device) + + def __eq__(self, other): + return self.model is other.model # and self.memory_required == other.memory_required + +def minimum_inference_memory(): + return (1024 * 1024 * 1024) + +def unload_model_clones(model): + to_unload = [] + for i in range(len(current_loaded_models)): + if model.is_clone(current_loaded_models[i].model): + to_unload = [i] + to_unload + + if len(to_unload) > 0: + print(f"Reuse {len(to_unload)} loaded models") + + for i in to_unload: + current_loaded_models.pop(i).model_unload(avoid_model_moving=True) + +def free_memory(memory_required, device, keep_loaded=[]): + offload_everything = ALWAYS_VRAM_OFFLOAD or vram_state == VRAMState.NO_VRAM + unloaded_model = False + for i in range(len(current_loaded_models) -1, -1, -1): + if not offload_everything: + if get_free_memory(device) > memory_required: + break + shift_model = current_loaded_models[i] + if shift_model.device == device: + if shift_model not in keep_loaded: + m = current_loaded_models.pop(i) + m.model_unload() + del m + unloaded_model = True + + if unloaded_model: + soft_empty_cache() + else: + if vram_state != VRAMState.HIGH_VRAM: + mem_free_total, mem_free_torch = get_free_memory(device, torch_free_too=True) + if mem_free_torch > mem_free_total * 0.25: + soft_empty_cache() + +def load_models_gpu(models, memory_required=0): + global vram_state + + execution_start_time = time.perf_counter() + extra_mem = max(minimum_inference_memory(), memory_required) + + models_to_load = [] + models_already_loaded = [] + for x in models: + loaded_model = LoadedModel(x, memory_required=memory_required) + + if loaded_model in current_loaded_models: + index = current_loaded_models.index(loaded_model) + current_loaded_models.insert(0, current_loaded_models.pop(index)) + models_already_loaded.append(loaded_model) + else: + if hasattr(x, "model"): + print(f"To load target model {x.model.__class__.__name__}") + models_to_load.append(loaded_model) + + if len(models_to_load) == 0: + devs = set(map(lambda a: a.device, models_already_loaded)) + for d in devs: + if d != torch.device("cpu"): + free_memory(extra_mem, d, models_already_loaded) + + moving_time = time.perf_counter() - execution_start_time + if moving_time > 0.1: + print(f'Memory cleanup has taken {moving_time:.2f} seconds') + + return + + print(f"Begin to load {len(models_to_load)} model{'s' if len(models_to_load) > 1 else ''}") + + total_memory_required = {} + for loaded_model in models_to_load: + unload_model_clones(loaded_model.model) + total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device) + + for device in total_memory_required: + if device != torch.device("cpu"): + free_memory(total_memory_required[device] * 1.3 + extra_mem, device, models_already_loaded) + + for loaded_model in models_to_load: + model = loaded_model.model + torch_dev = model.load_device + if is_device_cpu(torch_dev): + vram_set_state = VRAMState.DISABLED + else: + vram_set_state = vram_state + + async_kept_memory = -1 + + if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM): + model_memory = loaded_model.model_memory_required(torch_dev) + current_free_mem = get_free_memory(torch_dev) + minimal_inference_memory = minimum_inference_memory() + estimated_remaining_memory = current_free_mem - model_memory - minimal_inference_memory + + print("[Memory Management] Current Free GPU Memory (MB) = ", current_free_mem / (1024 * 1024)) + print("[Memory Management] Model Memory (MB) = ", model_memory / (1024 * 1024)) + print("[Memory Management] Minimal Inference Memory (MB) = ", minimal_inference_memory / (1024 * 1024)) + print("[Memory Management] Estimated Remaining GPU Memory (MB) = ", estimated_remaining_memory / (1024 * 1024)) + + if estimated_remaining_memory < 0: + vram_set_state = VRAMState.LOW_VRAM + async_kept_memory = (current_free_mem - minimal_inference_memory) / 1.3 + async_kept_memory = int(max(0, async_kept_memory)) + + if vram_set_state == VRAMState.NO_VRAM: + async_kept_memory = 0 + + loaded_model.model_load(async_kept_memory) + current_loaded_models.insert(0, loaded_model) + + moving_time = time.perf_counter() - execution_start_time + print(f'Moving model(s) has taken {moving_time:.2f} seconds') + + return + + +def load_model_gpu(model): + return load_models_gpu([model]) + +def cleanup_models(): + to_delete = [] + for i in range(len(current_loaded_models)): + if sys.getrefcount(current_loaded_models[i].model) <= 2: + to_delete = [i] + to_delete + + for i in to_delete: + x = current_loaded_models.pop(i) + x.model_unload() + del x + +def dtype_size(dtype): + dtype_size = 4 + if dtype == torch.float16 or dtype == torch.bfloat16: + dtype_size = 2 + elif dtype == torch.float32: + dtype_size = 4 + else: + try: + dtype_size = dtype.itemsize + except: #Old pytorch doesn't have .itemsize + pass + return dtype_size + +def unet_offload_device(): + if vram_state == VRAMState.HIGH_VRAM: + return get_torch_device() + else: + return torch.device("cpu") + +def unet_inital_load_device(parameters, dtype): + torch_dev = get_torch_device() + if vram_state == VRAMState.HIGH_VRAM: + return torch_dev + + cpu_dev = torch.device("cpu") + if ALWAYS_VRAM_OFFLOAD: + return cpu_dev + + model_size = dtype_size(dtype) * parameters + + mem_dev = get_free_memory(torch_dev) + mem_cpu = get_free_memory(cpu_dev) + if mem_dev > mem_cpu and model_size < mem_dev: + return torch_dev + else: + return cpu_dev + +def unet_dtype(device=None, model_params=0): + if args.unet_in_bf16: + return torch.bfloat16 + if args.unet_in_fp16: + return torch.float16 + if args.unet_in_fp8_e4m3fn: + return torch.float8_e4m3fn + if args.unet_in_fp8_e5m2: + return torch.float8_e5m2 + if should_use_fp16(device=device, model_params=model_params, manual_cast=True): + return torch.float16 + return torch.float32 + +# None means no manual cast +def unet_manual_cast(weight_dtype, inference_device): + if weight_dtype == torch.float32: + return None + + fp16_supported = ldm_patched.modules.model_management.should_use_fp16(inference_device, prioritize_performance=False) + if fp16_supported and weight_dtype == torch.float16: + return None + + if fp16_supported: + return torch.float16 + else: + return torch.float32 + +def text_encoder_offload_device(): + if args.always_gpu: + return get_torch_device() + else: + return torch.device("cpu") + +def text_encoder_device(): + if args.always_gpu: + return get_torch_device() + elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM: + if is_intel_xpu(): + return torch.device("cpu") + if should_use_fp16(prioritize_performance=False): + return get_torch_device() + else: + return torch.device("cpu") + else: + return torch.device("cpu") + +def text_encoder_dtype(device=None): + if args.clip_in_fp8_e4m3fn: + return torch.float8_e4m3fn + elif args.clip_in_fp8_e5m2: + return torch.float8_e5m2 + elif args.clip_in_fp16: + return torch.float16 + elif args.clip_in_fp32: + return torch.float32 + + if is_device_cpu(device): + return torch.float16 + + return torch.float16 + + +def intermediate_device(): + if args.always_gpu: + return get_torch_device() + else: + return torch.device("cpu") + +def vae_device(): + if args.vae_in_cpu: + return torch.device("cpu") + return get_torch_device() + +def vae_offload_device(): + if args.always_gpu: + return get_torch_device() + else: + return torch.device("cpu") + +def vae_dtype(): + global VAE_DTYPE + return VAE_DTYPE + +def get_autocast_device(dev): + if hasattr(dev, 'type'): + return dev.type + return "cuda" + +def supports_dtype(device, dtype): #TODO + if dtype == torch.float32: + return True + if is_device_cpu(device): + return False + if dtype == torch.float16: + return True + if dtype == torch.bfloat16: + return True + return False + +def device_supports_non_blocking(device): + if is_device_mps(device): + return False #pytorch bug? mps doesn't support non blocking + return True + +def cast_to_device(tensor, device, dtype, copy=False): + device_supports_cast = False + if tensor.dtype == torch.float32 or tensor.dtype == torch.float16: + device_supports_cast = True + elif tensor.dtype == torch.bfloat16: + if hasattr(device, 'type') and device.type.startswith("cuda"): + device_supports_cast = True + elif is_intel_xpu(): + device_supports_cast = True + + non_blocking = device_supports_non_blocking(device) + + if device_supports_cast: + if copy: + if tensor.device == device: + return tensor.to(dtype, copy=copy, non_blocking=non_blocking) + return tensor.to(device, copy=copy, non_blocking=non_blocking).to(dtype, non_blocking=non_blocking) + else: + return tensor.to(device, non_blocking=non_blocking).to(dtype, non_blocking=non_blocking) + else: + return tensor.to(device, dtype, copy=copy, non_blocking=non_blocking) + +def xformers_enabled(): + global directml_enabled + global cpu_state + if cpu_state != CPUState.GPU: + return False + if is_intel_xpu(): + return False + if directml_enabled: + return False + return XFORMERS_IS_AVAILABLE + + +def xformers_enabled_vae(): + enabled = xformers_enabled() + if not enabled: + return False + + return XFORMERS_ENABLED_VAE + +def pytorch_attention_enabled(): + global ENABLE_PYTORCH_ATTENTION + return ENABLE_PYTORCH_ATTENTION + +def pytorch_attention_flash_attention(): + global ENABLE_PYTORCH_ATTENTION + if ENABLE_PYTORCH_ATTENTION: + #TODO: more reliable way of checking for flash attention? + if is_nvidia(): #pytorch flash attention only works on Nvidia + return True + return False + +def get_free_memory(dev=None, torch_free_too=False): + global directml_enabled + if dev is None: + dev = get_torch_device() + + if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): + mem_free_total = psutil.virtual_memory().available + mem_free_torch = mem_free_total + else: + if directml_enabled: + mem_free_total = 1024 * 1024 * 1024 #TODO + mem_free_torch = mem_free_total + elif is_intel_xpu(): + stats = torch.xpu.memory_stats(dev) + mem_active = stats['active_bytes.all.current'] + mem_allocated = stats['allocated_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_torch = mem_reserved - mem_active + mem_free_total = torch.xpu.get_device_properties(dev).total_memory - mem_allocated + else: + stats = torch.cuda.memory_stats(dev) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(dev) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + + if torch_free_too: + return (mem_free_total, mem_free_torch) + else: + return mem_free_total + +def cpu_mode(): + global cpu_state + return cpu_state == CPUState.CPU + +def mps_mode(): + global cpu_state + return cpu_state == CPUState.MPS + +def is_device_cpu(device): + if hasattr(device, 'type'): + if (device.type == 'cpu'): + return True + return False + +def is_device_mps(device): + if hasattr(device, 'type'): + if (device.type == 'mps'): + return True + return False + +def should_use_fp16(device=None, model_params=0, prioritize_performance=True, manual_cast=False): + global directml_enabled + + if device is not None: + if is_device_cpu(device): + return False + + if FORCE_FP16: + return True + + if device is not None: #TODO + if is_device_mps(device): + return False + + if FORCE_FP32: + return False + + if directml_enabled: + return False + + if cpu_mode() or mps_mode(): + return False #TODO ? + + if is_intel_xpu(): + return True + + if torch.version.hip: + return True + + props = torch.cuda.get_device_properties("cuda") + if props.major >= 8: + return True + + if props.major < 6: + return False + + fp16_works = False + #FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled + #when the model doesn't actually fit on the card + #TODO: actually test if GP106 and others have the same type of behavior + nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"] + for x in nvidia_10_series: + if x in props.name.lower(): + fp16_works = True + + if fp16_works or manual_cast: + free_model_memory = (get_free_memory() * 0.9 - minimum_inference_memory()) + if (not prioritize_performance) or model_params * 4 > free_model_memory: + return True + + if props.major < 7: + return False + + #FP16 is just broken on these cards + nvidia_16_series = ["1660", "1650", "1630", "T500", "T550", "T600", "MX550", "MX450", "CMP 30HX", "T2000", "T1000", "T1200"] + for x in nvidia_16_series: + if x in props.name: + return False + + return True + +def soft_empty_cache(force=False): + global cpu_state + if cpu_state == CPUState.MPS: + torch.mps.empty_cache() + elif is_intel_xpu(): + torch.xpu.empty_cache() + elif torch.cuda.is_available(): + if force or is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + +def unload_all_models(): + free_memory(1e30, get_torch_device()) + + +def resolve_lowvram_weight(weight, model, key): #TODO: remove + return weight + +#TODO: might be cleaner to put this somewhere else +import threading + +class InterruptProcessingException(Exception): + pass + +interrupt_processing_mutex = threading.RLock() + +interrupt_processing = False +def interrupt_current_processing(value=True): + global interrupt_processing + global interrupt_processing_mutex + with interrupt_processing_mutex: + interrupt_processing = value + +def processing_interrupted(): + global interrupt_processing + global interrupt_processing_mutex + with interrupt_processing_mutex: + return interrupt_processing + +def throw_exception_if_processing_interrupted(): + global interrupt_processing + global interrupt_processing_mutex + with interrupt_processing_mutex: + if interrupt_processing: + interrupt_processing = False + raise InterruptProcessingException() diff --git a/ldm_patched/modules/model_patcher.py b/ldm_patched/modules/model_patcher.py new file mode 100644 index 0000000000000000000000000000000000000000..d1093dc6d902559e716a0680cbd7bd4c412bc705 --- /dev/null +++ b/ldm_patched/modules/model_patcher.py @@ -0,0 +1,383 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import copy +import inspect + +import ldm_patched.modules.utils +import ldm_patched.modules.model_management + + +extra_weight_calculators = {} + + +class ModelPatcher: + def __init__(self, model, load_device, offload_device, size=0, current_device=None, weight_inplace_update=False): + self.size = size + self.model = model + self.patches = {} + self.backup = {} + self.object_patches = {} + self.object_patches_backup = {} + self.model_options = {"transformer_options":{}} + self.model_size() + self.load_device = load_device + self.offload_device = offload_device + if current_device is None: + self.current_device = self.offload_device + else: + self.current_device = current_device + + self.weight_inplace_update = weight_inplace_update + + def model_size(self): + if self.size > 0: + return self.size + model_sd = self.model.state_dict() + self.size = ldm_patched.modules.model_management.module_size(self.model) + self.model_keys = set(model_sd.keys()) + return self.size + + def clone(self): + n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device, weight_inplace_update=self.weight_inplace_update) + n.patches = {} + for k in self.patches: + n.patches[k] = self.patches[k][:] + + n.object_patches = self.object_patches.copy() + n.model_options = copy.deepcopy(self.model_options) + n.model_keys = self.model_keys + return n + + def is_clone(self, other): + if hasattr(other, 'model') and self.model is other.model: + return True + return False + + def memory_required(self, input_shape): + return self.model.memory_required(input_shape=input_shape) + + def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False): + if len(inspect.signature(sampler_cfg_function).parameters) == 3: + self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way + else: + self.model_options["sampler_cfg_function"] = sampler_cfg_function + if disable_cfg1_optimization: + self.model_options["disable_cfg1_optimization"] = True + + def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False): + self.model_options["sampler_post_cfg_function"] = self.model_options.get("sampler_post_cfg_function", []) + [post_cfg_function] + if disable_cfg1_optimization: + self.model_options["disable_cfg1_optimization"] = True + + def set_model_unet_function_wrapper(self, unet_wrapper_function): + self.model_options["model_function_wrapper"] = unet_wrapper_function + + def set_model_vae_encode_wrapper(self, wrapper_function): + self.model_options["model_vae_encode_wrapper"] = wrapper_function + + def set_model_vae_decode_wrapper(self, wrapper_function): + self.model_options["model_vae_decode_wrapper"] = wrapper_function + + def set_model_patch(self, patch, name): + to = self.model_options["transformer_options"] + if "patches" not in to: + to["patches"] = {} + to["patches"][name] = to["patches"].get(name, []) + [patch] + + def set_model_patch_replace(self, patch, name, block_name, number, transformer_index=None): + to = self.model_options["transformer_options"] + if "patches_replace" not in to: + to["patches_replace"] = {} + if name not in to["patches_replace"]: + to["patches_replace"][name] = {} + if transformer_index is not None: + block = (block_name, number, transformer_index) + else: + block = (block_name, number) + to["patches_replace"][name][block] = patch + + def set_model_attn1_patch(self, patch): + self.set_model_patch(patch, "attn1_patch") + + def set_model_attn2_patch(self, patch): + self.set_model_patch(patch, "attn2_patch") + + def set_model_attn1_replace(self, patch, block_name, number, transformer_index=None): + self.set_model_patch_replace(patch, "attn1", block_name, number, transformer_index) + + def set_model_attn2_replace(self, patch, block_name, number, transformer_index=None): + self.set_model_patch_replace(patch, "attn2", block_name, number, transformer_index) + + def set_model_attn1_output_patch(self, patch): + self.set_model_patch(patch, "attn1_output_patch") + + def set_model_attn2_output_patch(self, patch): + self.set_model_patch(patch, "attn2_output_patch") + + def set_model_input_block_patch(self, patch): + self.set_model_patch(patch, "input_block_patch") + + def set_model_input_block_patch_after_skip(self, patch): + self.set_model_patch(patch, "input_block_patch_after_skip") + + def set_model_output_block_patch(self, patch): + self.set_model_patch(patch, "output_block_patch") + + def add_object_patch(self, name, obj): + self.object_patches[name] = obj + + def model_patches_to(self, device): + to = self.model_options["transformer_options"] + if "patches" in to: + patches = to["patches"] + for name in patches: + patch_list = patches[name] + for i in range(len(patch_list)): + if hasattr(patch_list[i], "to"): + patch_list[i] = patch_list[i].to(device) + if "patches_replace" in to: + patches = to["patches_replace"] + for name in patches: + patch_list = patches[name] + for k in patch_list: + if hasattr(patch_list[k], "to"): + patch_list[k] = patch_list[k].to(device) + if "model_function_wrapper" in self.model_options: + wrap_func = self.model_options["model_function_wrapper"] + if hasattr(wrap_func, "to"): + self.model_options["model_function_wrapper"] = wrap_func.to(device) + + def model_dtype(self): + if hasattr(self.model, "get_dtype"): + return self.model.get_dtype() + + def add_patches(self, patches, strength_patch=1.0, strength_model=1.0): + p = set() + for k in patches: + if k in self.model_keys: + p.add(k) + current_patches = self.patches.get(k, []) + current_patches.append((strength_patch, patches[k], strength_model)) + self.patches[k] = current_patches + + return list(p) + + def get_key_patches(self, filter_prefix=None): + ldm_patched.modules.model_management.unload_model_clones(self) + model_sd = self.model_state_dict() + p = {} + for k in model_sd: + if filter_prefix is not None: + if not k.startswith(filter_prefix): + continue + if k in self.patches: + p[k] = [model_sd[k]] + self.patches[k] + else: + p[k] = (model_sd[k],) + return p + + def model_state_dict(self, filter_prefix=None): + sd = self.model.state_dict() + keys = list(sd.keys()) + if filter_prefix is not None: + for k in keys: + if not k.startswith(filter_prefix): + sd.pop(k) + return sd + + def patch_model(self, device_to=None, patch_weights=True): + for k in self.object_patches: + old = ldm_patched.modules.utils.get_attr(self.model, k) + if k not in self.object_patches_backup: + self.object_patches_backup[k] = old + ldm_patched.modules.utils.set_attr_raw(self.model, k, self.object_patches[k]) + + if patch_weights: + model_sd = self.model_state_dict() + for key in self.patches: + if key not in model_sd: + print("could not patch. key doesn't exist in model:", key) + continue + + weight = model_sd[key] + + inplace_update = self.weight_inplace_update + + if key not in self.backup: + self.backup[key] = weight.to(device=self.offload_device, copy=inplace_update) + + if device_to is not None: + temp_weight = ldm_patched.modules.model_management.cast_to_device(weight, device_to, torch.float32, copy=True) + else: + temp_weight = weight.to(torch.float32, copy=True) + out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype) + if inplace_update: + ldm_patched.modules.utils.copy_to_param(self.model, key, out_weight) + else: + ldm_patched.modules.utils.set_attr(self.model, key, out_weight) + del temp_weight + + if device_to is not None: + self.model.to(device_to) + self.current_device = device_to + + return self.model + + def calculate_weight(self, patches, weight, key): + for p in patches: + alpha = p[0] + v = p[1] + strength_model = p[2] + + if strength_model != 1.0: + weight *= strength_model + + if isinstance(v, list): + v = (self.calculate_weight(v[1:], v[0].clone(), key), ) + + if len(v) == 1: + patch_type = "diff" + elif len(v) == 2: + patch_type = v[0] + v = v[1] + + if patch_type == "diff": + w1 = v[0] + if alpha != 0.0: + if w1.shape != weight.shape: + if w1.ndim == weight.ndim == 4: + new_shape = [max(n, m) for n, m in zip(weight.shape, w1.shape)] + print(f'Merged with {key} channel changed to {new_shape}') + new_diff = alpha * ldm_patched.modules.model_management.cast_to_device(w1, weight.device, weight.dtype) + new_weight = torch.zeros(size=new_shape).to(weight) + new_weight[:weight.shape[0], :weight.shape[1], :weight.shape[2], :weight.shape[3]] = weight + new_weight[:new_diff.shape[0], :new_diff.shape[1], :new_diff.shape[2], :new_diff.shape[3]] += new_diff + new_weight = new_weight.contiguous().clone() + weight = new_weight + else: + print("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape)) + else: + weight += alpha * ldm_patched.modules.model_management.cast_to_device(w1, weight.device, weight.dtype) + elif patch_type == "lora": #lora/locon + mat1 = ldm_patched.modules.model_management.cast_to_device(v[0], weight.device, torch.float32) + mat2 = ldm_patched.modules.model_management.cast_to_device(v[1], weight.device, torch.float32) + if v[2] is not None: + alpha *= v[2] / mat2.shape[0] + if v[3] is not None: + #locon mid weights, hopefully the math is fine because I didn't properly test it + mat3 = ldm_patched.modules.model_management.cast_to_device(v[3], weight.device, torch.float32) + final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]] + mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1) + try: + weight += (alpha * torch.mm(mat1.flatten(start_dim=1), mat2.flatten(start_dim=1))).reshape(weight.shape).type(weight.dtype) + except Exception as e: + print("ERROR", key, e) + elif patch_type == "lokr": + w1 = v[0] + w2 = v[1] + w1_a = v[3] + w1_b = v[4] + w2_a = v[5] + w2_b = v[6] + t2 = v[7] + dim = None + + if w1 is None: + dim = w1_b.shape[0] + w1 = torch.mm(ldm_patched.modules.model_management.cast_to_device(w1_a, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w1_b, weight.device, torch.float32)) + else: + w1 = ldm_patched.modules.model_management.cast_to_device(w1, weight.device, torch.float32) + + if w2 is None: + dim = w2_b.shape[0] + if t2 is None: + w2 = torch.mm(ldm_patched.modules.model_management.cast_to_device(w2_a, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2_b, weight.device, torch.float32)) + else: + w2 = torch.einsum('i j k l, j r, i p -> p r k l', + ldm_patched.modules.model_management.cast_to_device(t2, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2_b, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2_a, weight.device, torch.float32)) + else: + w2 = ldm_patched.modules.model_management.cast_to_device(w2, weight.device, torch.float32) + + if len(w2.shape) == 4: + w1 = w1.unsqueeze(2).unsqueeze(2) + if v[2] is not None and dim is not None: + alpha *= v[2] / dim + + try: + weight += alpha * torch.kron(w1, w2).reshape(weight.shape).type(weight.dtype) + except Exception as e: + print("ERROR", key, e) + elif patch_type == "loha": + w1a = v[0] + w1b = v[1] + if v[2] is not None: + alpha *= v[2] / w1b.shape[0] + w2a = v[3] + w2b = v[4] + if v[5] is not None: #cp decomposition + t1 = v[5] + t2 = v[6] + m1 = torch.einsum('i j k l, j r, i p -> p r k l', + ldm_patched.modules.model_management.cast_to_device(t1, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w1b, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w1a, weight.device, torch.float32)) + + m2 = torch.einsum('i j k l, j r, i p -> p r k l', + ldm_patched.modules.model_management.cast_to_device(t2, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2b, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2a, weight.device, torch.float32)) + else: + m1 = torch.mm(ldm_patched.modules.model_management.cast_to_device(w1a, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w1b, weight.device, torch.float32)) + m2 = torch.mm(ldm_patched.modules.model_management.cast_to_device(w2a, weight.device, torch.float32), + ldm_patched.modules.model_management.cast_to_device(w2b, weight.device, torch.float32)) + + try: + weight += (alpha * m1 * m2).reshape(weight.shape).type(weight.dtype) + except Exception as e: + print("ERROR", key, e) + elif patch_type == "glora": + if v[4] is not None: + alpha *= v[4] / v[0].shape[0] + + a1 = ldm_patched.modules.model_management.cast_to_device(v[0].flatten(start_dim=1), weight.device, torch.float32) + a2 = ldm_patched.modules.model_management.cast_to_device(v[1].flatten(start_dim=1), weight.device, torch.float32) + b1 = ldm_patched.modules.model_management.cast_to_device(v[2].flatten(start_dim=1), weight.device, torch.float32) + b2 = ldm_patched.modules.model_management.cast_to_device(v[3].flatten(start_dim=1), weight.device, torch.float32) + + weight += ((torch.mm(b2, b1) + torch.mm(torch.mm(weight.flatten(start_dim=1), a2), a1)) * alpha).reshape(weight.shape).type(weight.dtype) + elif patch_type in extra_weight_calculators: + weight = extra_weight_calculators[patch_type](weight, alpha, v) + else: + print("patch type not recognized", patch_type, key) + + return weight + + def unpatch_model(self, device_to=None): + keys = list(self.backup.keys()) + + if self.weight_inplace_update: + for k in keys: + ldm_patched.modules.utils.copy_to_param(self.model, k, self.backup[k]) + else: + for k in keys: + ldm_patched.modules.utils.set_attr(self.model, k, self.backup[k]) + + self.backup = {} + + if device_to is not None: + self.model.to(device_to) + self.current_device = device_to + + keys = list(self.object_patches_backup.keys()) + for k in keys: + ldm_patched.modules.utils.set_attr_raw(self.model, k, self.object_patches_backup[k]) + + self.object_patches_backup = {} diff --git a/ldm_patched/modules/model_sampling.py b/ldm_patched/modules/model_sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..da5cc3a6e750be55e9f59fc6c8542c26b7f423a9 --- /dev/null +++ b/ldm_patched/modules/model_sampling.py @@ -0,0 +1,140 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import numpy as np +from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule +import math + +class EPS: + def calculate_input(self, sigma, noise): + sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1)) + return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 + + def calculate_denoised(self, sigma, model_output, model_input): + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + return model_input - model_output * sigma + + +class V_PREDICTION(EPS): + def calculate_denoised(self, sigma, model_output, model_input): + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 + + +class ModelSamplingDiscrete(torch.nn.Module): + def __init__(self, model_config=None): + super().__init__() + + if model_config is not None: + sampling_settings = model_config.sampling_settings + else: + sampling_settings = {} + + beta_schedule = sampling_settings.get("beta_schedule", "linear") + linear_start = sampling_settings.get("linear_start", 0.00085) + linear_end = sampling_settings.get("linear_end", 0.012) + + self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=1000, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3) + self.sigma_data = 1.0 + + def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if given_betas is not None: + betas = given_betas + else: + betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) + alphas = 1. - betas + alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32) + # alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + + # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32)) + # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32)) + # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) + + sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 + self.set_sigmas(sigmas) + + def set_sigmas(self, sigmas): + self.register_buffer('sigmas', sigmas) + self.register_buffer('log_sigmas', sigmas.log()) + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def timestep(self, sigma): + log_sigma = sigma.log() + dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None] + return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device) + + def sigma(self, timestep): + t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1)) + low_idx = t.floor().long() + high_idx = t.ceil().long() + w = t.frac() + log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] + return log_sigma.exp().to(timestep.device) + + def percent_to_sigma(self, percent): + if percent <= 0.0: + return 999999999.9 + if percent >= 1.0: + return 0.0 + percent = 1.0 - percent + return self.sigma(torch.tensor(percent * 999.0)).item() + + +class ModelSamplingContinuousEDM(torch.nn.Module): + def __init__(self, model_config=None): + super().__init__() + self.sigma_data = 1.0 + + if model_config is not None: + sampling_settings = model_config.sampling_settings + else: + sampling_settings = {} + + sigma_min = sampling_settings.get("sigma_min", 0.002) + sigma_max = sampling_settings.get("sigma_max", 120.0) + self.set_sigma_range(sigma_min, sigma_max) + + def set_sigma_range(self, sigma_min, sigma_max): + sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp() + + self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers + self.register_buffer('log_sigmas', sigmas.log()) + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def timestep(self, sigma): + return 0.25 * sigma.log() + + def sigma(self, timestep): + return (timestep / 0.25).exp() + + def percent_to_sigma(self, percent): + if percent <= 0.0: + return 999999999.9 + if percent >= 1.0: + return 0.0 + percent = 1.0 - percent + + log_sigma_min = math.log(self.sigma_min) + return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min) diff --git a/ldm_patched/modules/ops.py b/ldm_patched/modules/ops.py new file mode 100644 index 0000000000000000000000000000000000000000..beb8f2657d59bd625633efd87d29ad4cc0356e19 --- /dev/null +++ b/ldm_patched/modules/ops.py @@ -0,0 +1,189 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import ldm_patched.modules.model_management +import contextlib + +from modules_forge import stream + + +# https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14855/files +stash = {} + + +@contextlib.contextmanager +def use_patched_ops(operations): + op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm'] + backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names} + + try: + for op_name in op_names: + setattr(torch.nn, op_name, getattr(operations, op_name)) + + yield + + finally: + for op_name in op_names: + setattr(torch.nn, op_name, backups[op_name]) + return + + +def cast_bias_weight(s, input): + weight, bias, signal = None, None, None + non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device) + + if stream.using_stream: + with stream.stream_context()(stream.mover_stream): + if s.bias is not None: + bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) + weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) + signal = stream.mover_stream.record_event() + else: + if s.bias is not None: + bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) + weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) + + return weight, bias, signal + + +@contextlib.contextmanager +def main_stream_worker(weight, bias, signal): + if not stream.using_stream or signal is None: + yield + return + + with stream.stream_context()(stream.current_stream): + stream.current_stream.wait_event(signal) + yield + finished_signal = stream.current_stream.record_event() + stash[id(finished_signal)] = (weight, bias, finished_signal) + + garbage = [] + for k, (w, b, s) in stash.items(): + if s.query(): + garbage.append(k) + + for k in garbage: + del stash[k] + return + + +def cleanup_cache(): + if not stream.using_stream: + return + + stream.current_stream.synchronize() + stream.mover_stream.synchronize() + stash.clear() + return + + +class disable_weight_init: + class Linear(torch.nn.Linear): + ldm_patched_cast_weights = False + def reset_parameters(self): + return None + + def forward_ldm_patched_cast_weights(self, input): + weight, bias, signal = cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + return torch.nn.functional.linear(input, weight, bias) + + def forward(self, *args, **kwargs): + if self.ldm_patched_cast_weights: + return self.forward_ldm_patched_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + + class Conv2d(torch.nn.Conv2d): + ldm_patched_cast_weights = False + def reset_parameters(self): + return None + + def forward_ldm_patched_cast_weights(self, input): + weight, bias, signal = cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + return self._conv_forward(input, weight, bias) + + def forward(self, *args, **kwargs): + if self.ldm_patched_cast_weights: + return self.forward_ldm_patched_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + + class Conv3d(torch.nn.Conv3d): + ldm_patched_cast_weights = False + def reset_parameters(self): + return None + + def forward_ldm_patched_cast_weights(self, input): + weight, bias, signal = cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + return self._conv_forward(input, weight, bias) + + def forward(self, *args, **kwargs): + if self.ldm_patched_cast_weights: + return self.forward_ldm_patched_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + + class GroupNorm(torch.nn.GroupNorm): + ldm_patched_cast_weights = False + def reset_parameters(self): + return None + + def forward_ldm_patched_cast_weights(self, input): + weight, bias, signal = cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps) + + def forward(self, *args, **kwargs): + if self.ldm_patched_cast_weights: + return self.forward_ldm_patched_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + + + class LayerNorm(torch.nn.LayerNorm): + ldm_patched_cast_weights = False + def reset_parameters(self): + return None + + def forward_ldm_patched_cast_weights(self, input): + weight, bias, signal = cast_bias_weight(self, input) + with main_stream_worker(weight, bias, signal): + return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps) + + def forward(self, *args, **kwargs): + if self.ldm_patched_cast_weights: + return self.forward_ldm_patched_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + + @classmethod + def conv_nd(s, dims, *args, **kwargs): + if dims == 2: + return s.Conv2d(*args, **kwargs) + elif dims == 3: + return s.Conv3d(*args, **kwargs) + else: + raise ValueError(f"unsupported dimensions: {dims}") + + +class manual_cast(disable_weight_init): + class Linear(disable_weight_init.Linear): + ldm_patched_cast_weights = True + + class Conv2d(disable_weight_init.Conv2d): + ldm_patched_cast_weights = True + + class Conv3d(disable_weight_init.Conv3d): + ldm_patched_cast_weights = True + + class GroupNorm(disable_weight_init.GroupNorm): + ldm_patched_cast_weights = True + + class LayerNorm(disable_weight_init.LayerNorm): + ldm_patched_cast_weights = True diff --git a/ldm_patched/modules/options.py b/ldm_patched/modules/options.py new file mode 100644 index 0000000000000000000000000000000000000000..83fd1247f86c38073d1a7044e6276db2bb9daf11 --- /dev/null +++ b/ldm_patched/modules/options.py @@ -0,0 +1,9 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +args_parsing = False + +def enable_args_parsing(enable=True): + global args_parsing + args_parsing = enable diff --git a/ldm_patched/modules/sample.py b/ldm_patched/modules/sample.py new file mode 100644 index 0000000000000000000000000000000000000000..5d5ed694e9776b49703d5d3b80a345364b6af430 --- /dev/null +++ b/ldm_patched/modules/sample.py @@ -0,0 +1,122 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +import torch +import ldm_patched.modules.model_management +import ldm_patched.modules.samplers +import ldm_patched.modules.conds +import ldm_patched.modules.utils +import math +import numpy as np + +def prepare_noise(latent_image, seed, noise_inds=None): + """ + creates random noise given a latent image and a seed. + optional arg skip can be used to skip and discard x number of noise generations for a given seed + """ + generator = torch.manual_seed(seed) + if noise_inds is None: + return torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + + unique_inds, inverse = np.unique(noise_inds, return_inverse=True) + noises = [] + for i in range(unique_inds[-1]+1): + noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + if i in unique_inds: + noises.append(noise) + noises = [noises[i] for i in inverse] + noises = torch.cat(noises, axis=0) + return noises + +def prepare_mask(noise_mask, shape, device): + """ensures noise mask is of proper dimensions""" + noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear") + noise_mask = torch.cat([noise_mask] * shape[1], dim=1) + noise_mask = ldm_patched.modules.utils.repeat_to_batch_size(noise_mask, shape[0]) + noise_mask = noise_mask.to(device) + return noise_mask + +def get_models_from_cond(cond, model_type): + models = [] + for c in cond: + if model_type in c: + models += [c[model_type]] + return models + +def convert_cond(cond): + out = [] + for c in cond: + temp = c[1].copy() + model_conds = temp.get("model_conds", {}) + if c[0] is not None: + model_conds["c_crossattn"] = ldm_patched.modules.conds.CONDCrossAttn(c[0]) #TODO: remove + temp["cross_attn"] = c[0] + temp["model_conds"] = model_conds + out.append(temp) + return out + +def get_additional_models(positive, negative, dtype): + """loads additional models in positive and negative conditioning""" + control_nets = set(get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control")) + + inference_memory = 0 + control_models = [] + for m in control_nets: + control_models += m.get_models() + inference_memory += m.inference_memory_requirements(dtype) + + gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen") + gligen = [x[1] for x in gligen] + models = control_models + gligen + return models, inference_memory + +def cleanup_additional_models(models): + """cleanup additional models that were loaded""" + for m in models: + if hasattr(m, 'cleanup'): + m.cleanup() + +def prepare_sampling(model, noise_shape, positive, negative, noise_mask): + device = model.load_device + positive = convert_cond(positive) + negative = convert_cond(negative) + + if noise_mask is not None: + noise_mask = prepare_mask(noise_mask, noise_shape, device) + + real_model = None + models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) + ldm_patched.modules.model_management.load_models_gpu([model] + models, model.memory_required([noise_shape[0] * 2] + list(noise_shape[1:])) + inference_memory) + real_model = model.model + + return real_model, positive, negative, noise_mask, models + + +def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None): + real_model, positive_copy, negative_copy, noise_mask, models = prepare_sampling(model, noise.shape, positive, negative, noise_mask) + + noise = noise.to(model.load_device) + latent_image = latent_image.to(model.load_device) + + sampler = ldm_patched.modules.samplers.KSampler(real_model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) + + samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed) + samples = samples.to(ldm_patched.modules.model_management.intermediate_device()) + + cleanup_additional_models(models) + cleanup_additional_models(set(get_models_from_cond(positive_copy, "control") + get_models_from_cond(negative_copy, "control"))) + return samples + +def sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=None, callback=None, disable_pbar=False, seed=None): + real_model, positive_copy, negative_copy, noise_mask, models = prepare_sampling(model, noise.shape, positive, negative, noise_mask) + noise = noise.to(model.load_device) + latent_image = latent_image.to(model.load_device) + sigmas = sigmas.to(model.load_device) + + samples = ldm_patched.modules.samplers.sample(real_model, noise, positive_copy, negative_copy, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed) + samples = samples.to(ldm_patched.modules.model_management.intermediate_device()) + cleanup_additional_models(models) + cleanup_additional_models(set(get_models_from_cond(positive_copy, "control") + get_models_from_cond(negative_copy, "control"))) + return samples + diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..e8f53e13aa168c96d9e1042470960532ff8843bf --- /dev/null +++ b/ldm_patched/modules/samplers.py @@ -0,0 +1,756 @@ +# 1st edit by https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge Official + + +from ldm_patched.k_diffusion import sampling as k_diffusion_sampling +from ldm_patched.unipc import uni_pc +import torch +import collections +from ldm_patched.modules import model_management +import math + +def get_area_and_mult(conds, x_in, timestep_in): + area = (x_in.shape[2], x_in.shape[3], 0, 0) + strength = 1.0 + + if 'timestep_start' in conds: + timestep_start = conds['timestep_start'] + if timestep_in[0] > timestep_start: + return None + if 'timestep_end' in conds: + timestep_end = conds['timestep_end'] + if timestep_in[0] < timestep_end: + return None + if 'area' in conds: + area = conds['area'] + if 'strength' in conds: + strength = conds['strength'] + + input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] + if 'mask' in conds: + # Scale the mask to the size of the input + # The mask should have been resized as we began the sampling process + mask_strength = 1.0 + if "mask_strength" in conds: + mask_strength = conds["mask_strength"] + mask = conds['mask'] + assert(mask.shape[1] == x_in.shape[2]) + assert(mask.shape[2] == x_in.shape[3]) + mask = mask[:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength + mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1) + else: + mask = torch.ones_like(input_x) + mult = mask * strength + + if 'mask' not in conds: + rr = 8 + if area[2] != 0: + for t in range(rr): + mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1)) + if (area[0] + area[2]) < x_in.shape[2]: + for t in range(rr): + mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1)) + if area[3] != 0: + for t in range(rr): + mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1)) + if (area[1] + area[3]) < x_in.shape[3]: + for t in range(rr): + mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1)) + + conditioning = {} + model_conds = conds["model_conds"] + for c in model_conds: + conditioning[c] = model_conds[c].process_cond(batch_size=x_in.shape[0], device=x_in.device, area=area) + + control = conds.get('control', None) + + patches = None + if 'gligen' in conds: + gligen = conds['gligen'] + patches = {} + gligen_type = gligen[0] + gligen_model = gligen[1] + if gligen_type == "position": + gligen_patch = gligen_model.model.set_position(input_x.shape, gligen[2], input_x.device) + else: + gligen_patch = gligen_model.model.set_empty(input_x.shape, input_x.device) + + patches['middle_patch'] = [gligen_patch] + + cond_obj = collections.namedtuple('cond_obj', ['input_x', 'mult', 'conditioning', 'area', 'control', 'patches']) + return cond_obj(input_x, mult, conditioning, area, control, patches) + +def cond_equal_size(c1, c2): + if c1 is c2: + return True + if c1.keys() != c2.keys(): + return False + for k in c1: + if not c1[k].can_concat(c2[k]): + return False + return True + +def can_concat_cond(c1, c2): + if c1.input_x.shape != c2.input_x.shape: + return False + + def objects_concatable(obj1, obj2): + if (obj1 is None) != (obj2 is None): + return False + if obj1 is not None: + if obj1 is not obj2: + return False + return True + + if not objects_concatable(c1.control, c2.control): + return False + + if not objects_concatable(c1.patches, c2.patches): + return False + + return cond_equal_size(c1.conditioning, c2.conditioning) + +def cond_cat(c_list): + c_crossattn = [] + c_concat = [] + c_adm = [] + crossattn_max_len = 0 + + temp = {} + for x in c_list: + for k in x: + cur = temp.get(k, []) + cur.append(x[k]) + temp[k] = cur + + out = {} + for k in temp: + conds = temp[k] + out[k] = conds[0].concat(conds[1:]) + + return out + +def compute_cond_mark(cond_or_uncond, sigmas): + cond_or_uncond_size = int(sigmas.shape[0]) + + cond_mark = [] + for cx in cond_or_uncond: + cond_mark += [cx] * cond_or_uncond_size + + cond_mark = torch.Tensor(cond_mark).to(sigmas) + return cond_mark + +def compute_cond_indices(cond_or_uncond, sigmas): + cl = int(sigmas.shape[0]) + + cond_indices = [] + uncond_indices = [] + for i, cx in enumerate(cond_or_uncond): + if cx == 0: + cond_indices += list(range(i * cl, (i + 1) * cl)) + else: + uncond_indices += list(range(i * cl, (i + 1) * cl)) + + return cond_indices, uncond_indices + +def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): + out_cond = torch.zeros_like(x_in) + out_count = torch.ones_like(x_in) * 1e-37 + + out_uncond = torch.zeros_like(x_in) + out_uncond_count = torch.ones_like(x_in) * 1e-37 + + COND = 0 + UNCOND = 1 + + to_run = [] + for x in cond: + p = get_area_and_mult(x, x_in, timestep) + if p is None: + continue + + to_run += [(p, COND)] + if uncond is not None: + for x in uncond: + p = get_area_and_mult(x, x_in, timestep) + if p is None: + continue + + to_run += [(p, UNCOND)] + + while len(to_run) > 0: + first = to_run[0] + first_shape = first[0][0].shape + to_batch_temp = [] + for x in range(len(to_run)): + if can_concat_cond(to_run[x][0], first[0]): + to_batch_temp += [x] + + to_batch_temp.reverse() + to_batch = to_batch_temp[:1] + + free_memory = model_management.get_free_memory(x_in.device) + for i in range(1, len(to_batch_temp) + 1): + batch_amount = to_batch_temp[:len(to_batch_temp)//i] + input_shape = [len(batch_amount) * first_shape[0]] + list(first_shape)[1:] + if model.memory_required(input_shape) < free_memory: + to_batch = batch_amount + break + + input_x = [] + mult = [] + c = [] + cond_or_uncond = [] + area = [] + control = None + patches = None + for x in to_batch: + o = to_run.pop(x) + p = o[0] + input_x.append(p.input_x) + mult.append(p.mult) + c.append(p.conditioning) + area.append(p.area) + cond_or_uncond.append(o[1]) + control = p.control + patches = p.patches + + batch_chunks = len(cond_or_uncond) + input_x = torch.cat(input_x) + c = cond_cat(c) + timestep_ = torch.cat([timestep] * batch_chunks) + + transformer_options = {} + if 'transformer_options' in model_options: + transformer_options = model_options['transformer_options'].copy() + + if patches is not None: + if "patches" in transformer_options: + cur_patches = transformer_options["patches"].copy() + for p in patches: + if p in cur_patches: + cur_patches[p] = cur_patches[p] + patches[p] + else: + cur_patches[p] = patches[p] + else: + transformer_options["patches"] = patches + + transformer_options["cond_or_uncond"] = cond_or_uncond[:] + transformer_options["sigmas"] = timestep + + transformer_options["cond_mark"] = compute_cond_mark(cond_or_uncond=cond_or_uncond, sigmas=timestep) + transformer_options["cond_indices"], transformer_options["uncond_indices"] = compute_cond_indices(cond_or_uncond=cond_or_uncond, sigmas=timestep) + + c['transformer_options'] = transformer_options + + if control is not None: + p = control + while p is not None: + p.transformer_options = transformer_options + p = p.previous_controlnet + control_cond = c.copy() # get_control may change items in this dict, so we need to copy it + c['control'] = control.get_control(input_x, timestep_, control_cond, len(cond_or_uncond)) + c['control_model'] = control + + if 'model_function_wrapper' in model_options: + output = model_options['model_function_wrapper'](model.apply_model, {"input": input_x, "timestep": timestep_, "c": c, "cond_or_uncond": cond_or_uncond}).chunk(batch_chunks) + else: + output = model.apply_model(input_x, timestep_, **c).chunk(batch_chunks) + del input_x + + for o in range(batch_chunks): + if cond_or_uncond[o] == COND: + out_cond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o] + out_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o] + else: + out_uncond[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o] + out_uncond_count[:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o] + del mult + + out_cond /= out_count + del out_count + out_uncond /= out_uncond_count + del out_uncond_count + return out_cond, out_uncond + +#The main sampling function shared by all the samplers +#Returns denoised +def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options={}, seed=None): + edit_strength = sum((item['strength'] if 'strength' in item else 1) for item in cond) + + if math.isclose(cond_scale, 1.0) and model_options.get("disable_cfg1_optimization", False) == False: + uncond_ = None + else: + uncond_ = uncond + + for fn in model_options.get("sampler_pre_cfg_function", []): + model, cond, uncond_, x, timestep, model_options = fn(model, cond, uncond_, x, timestep, model_options) + + cond_pred, uncond_pred = calc_cond_uncond_batch(model, cond, uncond_, x, timestep, model_options) + + if "sampler_cfg_function" in model_options: + args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep, + "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options} + cfg_result = x - model_options["sampler_cfg_function"](args) + elif not math.isclose(edit_strength, 1.0): + cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale * edit_strength + else: + cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale + + for fn in model_options.get("sampler_post_cfg_function", []): + args = {"denoised": cfg_result, "cond": cond, "uncond": uncond, "model": model, "uncond_denoised": uncond_pred, "cond_denoised": cond_pred, + "sigma": timestep, "model_options": model_options, "input": x} + cfg_result = fn(args) + + return cfg_result + +class CFGNoisePredictor(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + def apply_model(self, x, timestep, cond, uncond, cond_scale, model_options={}, seed=None): + out = sampling_function(self.inner_model, x, timestep, uncond, cond, cond_scale, model_options=model_options, seed=seed) + return out + def forward(self, *args, **kwargs): + return self.apply_model(*args, **kwargs) + +class KSamplerX0Inpaint(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None): + if denoise_mask is not None: + latent_mask = 1. - denoise_mask + x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask + out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, model_options=model_options, seed=seed) + if denoise_mask is not None: + out = out * denoise_mask + self.latent_image * latent_mask + return out + +def simple_scheduler(model, steps): + s = model.model_sampling + sigs = [] + ss = len(s.sigmas) / steps + for x in range(steps): + sigs += [float(s.sigmas[-(1 + int(x * ss))])] + sigs += [0.0] + return torch.FloatTensor(sigs) + +def ddim_scheduler(model, steps): + s = model.model_sampling + sigs = [] + ss = len(s.sigmas) // steps + x = 1 + while x < len(s.sigmas): + sigs += [float(s.sigmas[x])] + x += ss + sigs = sigs[::-1] + sigs += [0.0] + return torch.FloatTensor(sigs) + +def normal_scheduler(model, steps, sgm=False, floor=False): + s = model.model_sampling + start = s.timestep(s.sigma_max) + end = s.timestep(s.sigma_min) + + if sgm: + timesteps = torch.linspace(start, end, steps + 1)[:-1] + else: + timesteps = torch.linspace(start, end, steps) + + sigs = [] + for x in range(len(timesteps)): + ts = timesteps[x] + sigs.append(s.sigma(ts)) + sigs += [0.0] + return torch.FloatTensor(sigs) + +def get_mask_aabb(masks): + if masks.numel() == 0: + return torch.zeros((0, 4), device=masks.device, dtype=torch.int) + + b = masks.shape[0] + + bounding_boxes = torch.zeros((b, 4), device=masks.device, dtype=torch.int) + is_empty = torch.zeros((b), device=masks.device, dtype=torch.bool) + for i in range(b): + mask = masks[i] + if mask.numel() == 0: + continue + if torch.max(mask != 0) == False: + is_empty[i] = True + continue + y, x = torch.where(mask) + bounding_boxes[i, 0] = torch.min(x) + bounding_boxes[i, 1] = torch.min(y) + bounding_boxes[i, 2] = torch.max(x) + bounding_boxes[i, 3] = torch.max(y) + + return bounding_boxes, is_empty + +def resolve_areas_and_cond_masks(conditions, h, w, device): + # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes. + # While we're doing this, we can also resolve the mask device and scaling for performance reasons + for i in range(len(conditions)): + c = conditions[i] + if 'area' in c: + area = c['area'] + if area[0] == "percentage": + modified = c.copy() + area = (max(1, round(area[1] * h)), max(1, round(area[2] * w)), round(area[3] * h), round(area[4] * w)) + modified['area'] = area + c = modified + conditions[i] = c + + if 'mask' in c: + mask = c['mask'] + mask = mask.to(device=device) + modified = c.copy() + if len(mask.shape) == 2: + mask = mask.unsqueeze(0) + if mask.shape[1] != h or mask.shape[2] != w: + mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(h, w), mode='bilinear', align_corners=False).squeeze(1) + + if modified.get("set_area_to_bounds", False): + bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0) + boxes, is_empty = get_mask_aabb(bounds) + if is_empty[0]: + # Use the minimum possible size for efficiency reasons. (Since the mask is all-0, this becomes a noop anyway) + modified['area'] = (8, 8, 0, 0) + else: + box = boxes[0] + H, W, Y, X = (box[3] - box[1] + 1, box[2] - box[0] + 1, box[1], box[0]) + H = max(8, H) + W = max(8, W) + area = (int(H), int(W), int(Y), int(X)) + modified['area'] = area + + modified['mask'] = mask + conditions[i] = modified + +def create_cond_with_same_area_if_none(conds, c): + if 'area' not in c: + return + + c_area = c['area'] + smallest = None + for x in conds: + if 'area' in x: + a = x['area'] + if c_area[2] >= a[2] and c_area[3] >= a[3]: + if a[0] + a[2] >= c_area[0] + c_area[2]: + if a[1] + a[3] >= c_area[1] + c_area[3]: + if smallest is None: + smallest = x + elif 'area' not in smallest: + smallest = x + else: + if smallest['area'][0] * smallest['area'][1] > a[0] * a[1]: + smallest = x + else: + if smallest is None: + smallest = x + if smallest is None: + return + if 'area' in smallest: + if smallest['area'] == c_area: + return + + out = c.copy() + out['model_conds'] = smallest['model_conds'].copy() #TODO: which fields should be copied? + conds += [out] + +def calculate_start_end_timesteps(model, conds): + s = model.model_sampling + for t in range(len(conds)): + x = conds[t] + + timestep_start = None + timestep_end = None + if 'start_percent' in x: + timestep_start = s.percent_to_sigma(x['start_percent']) + if 'end_percent' in x: + timestep_end = s.percent_to_sigma(x['end_percent']) + + if (timestep_start is not None) or (timestep_end is not None): + n = x.copy() + if (timestep_start is not None): + n['timestep_start'] = timestep_start + if (timestep_end is not None): + n['timestep_end'] = timestep_end + conds[t] = n + +def pre_run_control(model, conds): + s = model.model_sampling + for t in range(len(conds)): + x = conds[t] + + timestep_start = None + timestep_end = None + percent_to_timestep_function = lambda a: s.percent_to_sigma(a) + if 'control' in x: + x['control'].pre_run(model, percent_to_timestep_function) + +def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func): + cond_cnets = [] + cond_other = [] + uncond_cnets = [] + uncond_other = [] + for t in range(len(conds)): + x = conds[t] + if 'area' not in x: + if name in x and x[name] is not None: + cond_cnets.append(x[name]) + else: + cond_other.append((x, t)) + for t in range(len(uncond)): + x = uncond[t] + if 'area' not in x: + if name in x and x[name] is not None: + uncond_cnets.append(x[name]) + else: + uncond_other.append((x, t)) + + if len(uncond_cnets) > 0: + return + + for x in range(len(cond_cnets)): + temp = uncond_other[x % len(uncond_other)] + o = temp[0] + if name in o and o[name] is not None: + n = o.copy() + n[name] = uncond_fill_func(cond_cnets, x) + uncond += [n] + else: + n = o.copy() + n[name] = uncond_fill_func(cond_cnets, x) + uncond[temp[1]] = n + +def encode_model_conds(model_function, conds, noise, device, prompt_type, **kwargs): + for t in range(len(conds)): + x = conds[t] + params = x.copy() + params["device"] = device + params["noise"] = noise + params["width"] = params.get("width", noise.shape[3] * 8) + params["height"] = params.get("height", noise.shape[2] * 8) + params["prompt_type"] = params.get("prompt_type", prompt_type) + for k in kwargs: + if k not in params: + params[k] = kwargs[k] + + out = model_function(**params) + x = x.copy() + model_conds = x['model_conds'].copy() + for k in out: + model_conds[k] = out[k] + x['model_conds'] = model_conds + conds[t] = x + return conds + +class Sampler: + def sample(self): + pass + + def max_denoise(self, model_wrap, sigmas): + max_sigma = float(model_wrap.inner_model.model_sampling.sigma_max) + sigma = float(sigmas[0]) + return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma + +class UNIPC(Sampler): + def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False): + return uni_pc.sample_unipc(model_wrap, noise, latent_image, sigmas, max_denoise=self.max_denoise(model_wrap, sigmas), extra_args=extra_args, noise_mask=denoise_mask, callback=callback, disable=disable_pbar) + +class UNIPCBH2(Sampler): + def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False): + return uni_pc.sample_unipc(model_wrap, noise, latent_image, sigmas, max_denoise=self.max_denoise(model_wrap, sigmas), extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2', disable=disable_pbar) + +KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2","dpm_2", "dpm_2_ancestral", + "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"] + +class KSAMPLER(Sampler): + def __init__(self, sampler_function, extra_options={}, inpaint_options={}): + self.sampler_function = sampler_function + self.extra_options = extra_options + self.inpaint_options = inpaint_options + + def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False): + extra_args["denoise_mask"] = denoise_mask + model_k = KSamplerX0Inpaint(model_wrap) + model_k.latent_image = latent_image + if self.inpaint_options.get("random", False): #TODO: Should this be the default? + generator = torch.manual_seed(extra_args.get("seed", 41) + 1) + model_k.noise = torch.randn(noise.shape, generator=generator, device="cpu").to(noise.dtype).to(noise.device) + else: + model_k.noise = noise + + if self.max_denoise(model_wrap, sigmas): + noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0) + else: + noise = noise * sigmas[0] + + k_callback = None + total_steps = len(sigmas) - 1 + if callback is not None: + k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps) + + if latent_image is not None: + noise += latent_image + + samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options) + return samples + + +def ksampler(sampler_name, extra_options={}, inpaint_options={}): + if sampler_name == "dpm_fast": + def dpm_fast_function(model, noise, sigmas, extra_args, callback, disable): + sigma_min = sigmas[-1] + if sigma_min == 0: + sigma_min = sigmas[-2] + total_steps = len(sigmas) - 1 + return k_diffusion_sampling.sample_dpm_fast(model, noise, sigma_min, sigmas[0], total_steps, extra_args=extra_args, callback=callback, disable=disable) + sampler_function = dpm_fast_function + elif sampler_name == "dpm_adaptive": + def dpm_adaptive_function(model, noise, sigmas, extra_args, callback, disable): + sigma_min = sigmas[-1] + if sigma_min == 0: + sigma_min = sigmas[-2] + return k_diffusion_sampling.sample_dpm_adaptive(model, noise, sigma_min, sigmas[0], extra_args=extra_args, callback=callback, disable=disable) + sampler_function = dpm_adaptive_function + else: + sampler_function = getattr(k_diffusion_sampling, "sample_{}".format(sampler_name)) + + return KSAMPLER(sampler_function, extra_options, inpaint_options) + +def wrap_model(model): + model_denoise = CFGNoisePredictor(model) + return model_denoise + +def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model_options={}, latent_image=None, denoise_mask=None, callback=None, disable_pbar=False, seed=None): + positive = positive[:] + negative = negative[:] + + resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], device) + resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], device) + + model_wrap = wrap_model(model) + + calculate_start_end_timesteps(model, negative) + calculate_start_end_timesteps(model, positive) + + if latent_image is not None: + latent_image = model.process_latent_in(latent_image) + + if hasattr(model, 'extra_conds'): + positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask, seed=seed) + negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask, seed=seed) + + #make sure each cond area has an opposite one with the same area + for c in positive: + create_cond_with_same_area_if_none(negative, c) + for c in negative: + create_cond_with_same_area_if_none(positive, c) + + pre_run_control(model, negative + positive) + + apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x]) + apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x]) + + extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed} + + samples = sampler.sample(model_wrap, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar) + return model.process_latent_out(samples.to(torch.float32)) + +SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] +SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"] + +def calculate_sigmas_scheduler(model, scheduler_name, steps): + if scheduler_name == "karras": + sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=float(model.model_sampling.sigma_min), sigma_max=float(model.model_sampling.sigma_max)) + elif scheduler_name == "exponential": + sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=float(model.model_sampling.sigma_min), sigma_max=float(model.model_sampling.sigma_max)) + elif scheduler_name == "normal": + sigmas = normal_scheduler(model, steps) + elif scheduler_name == "simple": + sigmas = simple_scheduler(model, steps) + elif scheduler_name == "ddim_uniform": + sigmas = ddim_scheduler(model, steps) + elif scheduler_name == "sgm_uniform": + sigmas = normal_scheduler(model, steps, sgm=True) + else: + print("error invalid scheduler", scheduler_name) + return sigmas + +def sampler_object(name): + if name == "uni_pc": + sampler = UNIPC() + elif name == "uni_pc_bh2": + sampler = UNIPCBH2() + elif name == "ddim": + sampler = ksampler("euler", inpaint_options={"random": True}) + else: + sampler = ksampler(name) + return sampler + +class KSampler: + SCHEDULERS = SCHEDULER_NAMES + SAMPLERS = SAMPLER_NAMES + + def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}): + self.model = model + self.device = device + if scheduler not in self.SCHEDULERS: + scheduler = self.SCHEDULERS[0] + if sampler not in self.SAMPLERS: + sampler = self.SAMPLERS[0] + self.scheduler = scheduler + self.sampler = sampler + self.set_steps(steps, denoise) + self.denoise = denoise + self.model_options = model_options + + def calculate_sigmas(self, steps): + sigmas = None + + discard_penultimate_sigma = False + if self.sampler in ['dpm_2', 'dpm_2_ancestral', 'uni_pc', 'uni_pc_bh2']: + steps += 1 + discard_penultimate_sigma = True + + sigmas = calculate_sigmas_scheduler(self.model, self.scheduler, steps) + + if discard_penultimate_sigma: + sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) + return sigmas + + def set_steps(self, steps, denoise=None): + self.steps = steps + if denoise is None or denoise > 0.9999: + self.sigmas = self.calculate_sigmas(steps).to(self.device) + else: + new_steps = int(steps/denoise) + sigmas = self.calculate_sigmas(new_steps).to(self.device) + self.sigmas = sigmas[-(steps + 1):] + + def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None): + if sigmas is None: + sigmas = self.sigmas + + if last_step is not None and last_step < (len(sigmas) - 1): + sigmas = sigmas[:last_step + 1] + if force_full_denoise: + sigmas[-1] = 0 + + if start_step is not None: + if start_step < (len(sigmas) - 1): + sigmas = sigmas[start_step:] + else: + if latent_image is not None: + return latent_image + else: + return torch.zeros_like(noise) + + sampler = sampler_object(self.sampler) + + return sample(self.model, noise, positive, negative, cfg, self.device, sampler, sigmas, self.model_options, latent_image=latent_image, denoise_mask=denoise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed) diff --git a/ldm_patched/modules/sd.py b/ldm_patched/modules/sd.py new file mode 100644 index 0000000000000000000000000000000000000000..2830cc7214690dd52037fc1b5e70d6873deea139 --- /dev/null +++ b/ldm_patched/modules/sd.py @@ -0,0 +1,595 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import torch + +from ldm_patched.modules import model_management +from ldm_patched.ldm.models.autoencoder import AutoencoderKL, AutoencodingEngine +import yaml + +import ldm_patched.modules.utils + +from . import clip_vision +from . import gligen +from . import diffusers_convert +from . import model_base +from . import model_detection + +from . import sd1_clip +from . import sd2_clip +from . import sdxl_clip + +import ldm_patched.modules.model_patcher +import ldm_patched.modules.lora +import ldm_patched.t2ia.adapter +import ldm_patched.modules.supported_models_base +import ldm_patched.taesd.taesd + +def load_model_weights(model, sd): + m, u = model.load_state_dict(sd, strict=False) + m = set(m) + unexpected_keys = set(u) + + k = list(sd.keys()) + for x in k: + if x not in unexpected_keys: + w = sd.pop(x) + del w + if len(m) > 0: + print("extra", m) + return model + +def load_clip_weights(model, sd): + k = list(sd.keys()) + for x in k: + if x.startswith("cond_stage_model.transformer.") and not x.startswith("cond_stage_model.transformer.text_model."): + y = x.replace("cond_stage_model.transformer.", "cond_stage_model.transformer.text_model.") + sd[y] = sd.pop(x) + + if 'cond_stage_model.transformer.text_model.embeddings.position_ids' in sd: + ids = sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] + if ids.dtype == torch.float32: + sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round() + + sd = ldm_patched.modules.utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24) + return load_model_weights(model, sd) + + +def load_lora_for_models(model, clip, lora, strength_model, strength_clip, filename='default'): + model_flag = type(model.model).__name__ if model is not None else 'default' + + unet_keys = ldm_patched.modules.lora.model_lora_keys_unet(model.model) if model is not None else {} + clip_keys = ldm_patched.modules.lora.model_lora_keys_clip(clip.cond_stage_model) if clip is not None else {} + + lora_unmatch = lora + lora_unet, lora_unmatch = ldm_patched.modules.lora.load_lora(lora_unmatch, unet_keys) + lora_clip, lora_unmatch = ldm_patched.modules.lora.load_lora(lora_unmatch, clip_keys) + + if len(lora_unmatch) > 12: + print(f'[LORA] LoRA version mismatch for {model_flag}: {filename}') + return model, clip + + if len(lora_unmatch) > 0: + print(f'[LORA] Loading {filename} for {model_flag} with unmatched keys {list(lora_unmatch.keys())}') + + new_model = model.clone() if model is not None else None + new_clip = clip.clone() if clip is not None else None + + if new_model is not None and len(lora_unet) > 0: + loaded_keys = new_model.add_patches(lora_unet, strength_model) + skipped_keys = [item for item in lora_unet if item not in loaded_keys] + if len(skipped_keys) > 12: + print(f'[LORA] Mismatch {filename} for {model_flag}-UNet with {len(skipped_keys)} keys mismatched in {len(loaded_keys)} keys') + else: + print(f'[LORA] Loaded {filename} for {model_flag}-UNet with {len(loaded_keys)} keys at weight {strength_model} (skipped {len(skipped_keys)} keys)') + model = new_model + + if new_clip is not None and len(lora_clip) > 0: + loaded_keys = new_clip.add_patches(lora_clip, strength_clip) + skipped_keys = [item for item in lora_clip if item not in loaded_keys] + if len(skipped_keys) > 12: + print(f'[LORA] Mismatch {filename} for {model_flag}-CLIP with {len(skipped_keys)} keys mismatched in {len(loaded_keys)} keys') + else: + print(f'[LORA] Loaded {filename} for {model_flag}-CLIP with {len(loaded_keys)} keys at weight {strength_clip} (skipped {len(skipped_keys)} keys)') + clip = new_clip + + return model, clip + + +class CLIP: + def __init__(self, target=None, embedding_directory=None, no_init=False): + if no_init: + return + params = target.params.copy() + clip = target.clip + tokenizer = target.tokenizer + + load_device = model_management.text_encoder_device() + offload_device = model_management.text_encoder_offload_device() + params['device'] = offload_device + params['dtype'] = model_management.text_encoder_dtype(load_device) + + self.cond_stage_model = clip(**(params)) + + self.tokenizer = tokenizer(embedding_directory=embedding_directory) + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device) + self.layer_idx = None + + def clone(self): + n = CLIP(no_init=True) + n.patcher = self.patcher.clone() + n.cond_stage_model = self.cond_stage_model + n.tokenizer = self.tokenizer + n.layer_idx = self.layer_idx + return n + + def add_patches(self, patches, strength_patch=1.0, strength_model=1.0): + return self.patcher.add_patches(patches, strength_patch, strength_model) + + def clip_layer(self, layer_idx): + self.layer_idx = layer_idx + + def tokenize(self, text, return_word_ids=False): + return self.tokenizer.tokenize_with_weights(text, return_word_ids) + + def encode_from_tokens(self, tokens, return_pooled=False): + if self.layer_idx is not None: + self.cond_stage_model.clip_layer(self.layer_idx) + else: + self.cond_stage_model.reset_clip_layer() + + self.load_model() + cond, pooled = self.cond_stage_model.encode_token_weights(tokens) + if return_pooled: + return cond, pooled + return cond + + def encode(self, text): + tokens = self.tokenize(text) + return self.encode_from_tokens(tokens) + + def load_sd(self, sd): + return self.cond_stage_model.load_sd(sd) + + def get_sd(self): + return self.cond_stage_model.state_dict() + + def load_model(self): + model_management.load_model_gpu(self.patcher) + return self.patcher + + def get_key_patches(self): + return self.patcher.get_key_patches() + +class VAE: + def __init__(self, sd=None, device=None, config=None, dtype=None, no_init=False): + if no_init: + return + + if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format + sd = diffusers_convert.convert_vae_state_dict(sd) + + self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) #These are for AutoencoderKL and need tweaking (should be lower) + self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype) + self.downscale_ratio = 8 + self.latent_channels = 4 + + if config is None: + if "decoder.mid.block_1.mix_factor" in sd: + encoder_config = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} + decoder_config = encoder_config.copy() + decoder_config["video_kernel_size"] = [3, 1, 1] + decoder_config["alpha"] = 0.0 + self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "ldm_patched.ldm.models.autoencoder.DiagonalGaussianRegularizer"}, + encoder_config={'target': "ldm_patched.ldm.modules.diffusionmodules.model.Encoder", 'params': encoder_config}, + decoder_config={'target': "ldm_patched.ldm.modules.temporal_ae.VideoDecoder", 'params': decoder_config}) + elif "taesd_decoder.1.weight" in sd: + self.first_stage_model = ldm_patched.taesd.taesd.TAESD() + else: + #default SD1.x/SD2.x VAE parameters + ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} + + if 'encoder.down.2.downsample.conv.weight' not in sd: #Stable diffusion x4 upscaler VAE + ddconfig['ch_mult'] = [1, 2, 4] + self.downscale_ratio = 4 + + self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=4) + else: + self.first_stage_model = AutoencoderKL(**(config['params'])) + self.first_stage_model = self.first_stage_model.eval() + + m, u = self.first_stage_model.load_state_dict(sd, strict=False) + if len(m) > 0: + print("Missing VAE keys", m) + + if len(u) > 0: + print("Leftover VAE keys", u) + + if device is None: + device = model_management.vae_device() + self.device = device + offload_device = model_management.vae_offload_device() + if dtype is None: + dtype = model_management.vae_dtype() + self.vae_dtype = dtype + self.first_stage_model.to(self.vae_dtype) + self.output_device = model_management.intermediate_device() + + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device) + + def clone(self): + n = VAE(no_init=True) + n.patcher = self.patcher.clone() + n.memory_used_encode = self.memory_used_encode + n.memory_used_decode = self.memory_used_decode + n.downscale_ratio = self.downscale_ratio + n.latent_channels = self.latent_channels + n.first_stage_model = self.first_stage_model + n.device = self.device + n.vae_dtype = self.vae_dtype + n.output_device = self.output_device + return n + + def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16): + steps = samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap) + steps += samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap) + steps += samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap) + pbar = ldm_patched.modules.utils.ProgressBar(steps, title='VAE tiled decode') + + decode_fn = lambda a: (self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)) + 1.0).float() + output = torch.clamp(( + (ldm_patched.modules.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = self.downscale_ratio, output_device=self.output_device, pbar = pbar) + + ldm_patched.modules.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = self.downscale_ratio, output_device=self.output_device, pbar = pbar) + + ldm_patched.modules.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = self.downscale_ratio, output_device=self.output_device, pbar = pbar)) + / 3.0) / 2.0, min=0.0, max=1.0) + return output + + def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64): + steps = pixel_samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap) + steps += pixel_samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap) + steps += pixel_samples.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap) + pbar = ldm_patched.modules.utils.ProgressBar(steps, title='VAE tiled encode') + + encode_fn = lambda a: self.first_stage_model.encode((2. * a - 1.).to(self.vae_dtype).to(self.device)).float() + samples = ldm_patched.modules.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar) + samples += ldm_patched.modules.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar) + samples += ldm_patched.modules.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar) + samples /= 3.0 + return samples + + def decode_inner(self, samples_in): + if model_management.VAE_ALWAYS_TILED: + return self.decode_tiled(samples_in).to(self.output_device) + + try: + memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype) + model_management.load_models_gpu([self.patcher], memory_required=memory_used) + free_memory = model_management.get_free_memory(self.device) + batch_number = int(free_memory / memory_used) + batch_number = max(1, batch_number) + + pixel_samples = torch.empty((samples_in.shape[0], 3, round(samples_in.shape[2] * self.downscale_ratio), round(samples_in.shape[3] * self.downscale_ratio)), device=self.output_device) + for x in range(0, samples_in.shape[0], batch_number): + samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device) + pixel_samples[x:x+batch_number] = torch.clamp((self.first_stage_model.decode(samples).to(self.output_device).float() + 1.0) / 2.0, min=0.0, max=1.0) + except model_management.OOM_EXCEPTION as e: + print("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.") + pixel_samples = self.decode_tiled_(samples_in) + + pixel_samples = pixel_samples.to(self.output_device).movedim(1,-1) + return pixel_samples + + def decode(self, samples_in): + wrapper = self.patcher.model_options.get('model_vae_decode_wrapper', None) + if wrapper is None: + return self.decode_inner(samples_in) + else: + return wrapper(self.decode_inner, samples_in) + + def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16): + model_management.load_model_gpu(self.patcher) + output = self.decode_tiled_(samples, tile_x, tile_y, overlap) + return output.movedim(1,-1) + + def encode_inner(self, pixel_samples): + if model_management.VAE_ALWAYS_TILED: + return self.encode_tiled(pixel_samples) + + pixel_samples = pixel_samples.movedim(-1,1) + try: + memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype) + model_management.load_models_gpu([self.patcher], memory_required=memory_used) + free_memory = model_management.get_free_memory(self.device) + batch_number = int(free_memory / memory_used) + batch_number = max(1, batch_number) + samples = torch.empty((pixel_samples.shape[0], self.latent_channels, round(pixel_samples.shape[2] // self.downscale_ratio), round(pixel_samples.shape[3] // self.downscale_ratio)), device=self.output_device) + for x in range(0, pixel_samples.shape[0], batch_number): + pixels_in = (2. * pixel_samples[x:x+batch_number] - 1.).to(self.vae_dtype).to(self.device) + samples[x:x+batch_number] = self.first_stage_model.encode(pixels_in).to(self.output_device).float() + + except model_management.OOM_EXCEPTION as e: + print("Warning: Ran out of memory when regular VAE encoding, retrying with tiled VAE encoding.") + samples = self.encode_tiled_(pixel_samples) + + return samples + + def encode(self, pixel_samples): + wrapper = self.patcher.model_options.get('model_vae_encode_wrapper', None) + if wrapper is None: + return self.encode_inner(pixel_samples) + else: + return wrapper(self.encode_inner, pixel_samples) + + def encode_tiled(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64): + model_management.load_model_gpu(self.patcher) + pixel_samples = pixel_samples.movedim(-1,1) + samples = self.encode_tiled_(pixel_samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap) + return samples + + def get_sd(self): + return self.first_stage_model.state_dict() + +class StyleModel: + def __init__(self, model, device="cpu"): + self.model = model + + def get_cond(self, input): + return self.model(input.last_hidden_state) + + +def load_style_model(ckpt_path): + model_data = ldm_patched.modules.utils.load_torch_file(ckpt_path, safe_load=True) + keys = model_data.keys() + if "style_embedding" in keys: + model = ldm_patched.t2ia.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8) + else: + raise Exception("invalid style model {}".format(ckpt_path)) + model.load_state_dict(model_data) + return StyleModel(model) + + +def load_clip(ckpt_paths, embedding_directory=None): + clip_data = [] + for p in ckpt_paths: + clip_data.append(ldm_patched.modules.utils.load_torch_file(p, safe_load=True)) + + class EmptyClass: + pass + + for i in range(len(clip_data)): + if "transformer.resblocks.0.ln_1.weight" in clip_data[i]: + clip_data[i] = ldm_patched.modules.utils.transformers_convert(clip_data[i], "", "text_model.", 32) + + clip_target = EmptyClass() + clip_target.params = {} + if len(clip_data) == 1: + if "text_model.encoder.layers.30.mlp.fc1.weight" in clip_data[0]: + clip_target.clip = sdxl_clip.SDXLRefinerClipModel + clip_target.tokenizer = sdxl_clip.SDXLTokenizer + elif "text_model.encoder.layers.22.mlp.fc1.weight" in clip_data[0]: + clip_target.clip = sd2_clip.SD2ClipModel + clip_target.tokenizer = sd2_clip.SD2Tokenizer + else: + clip_target.clip = sd1_clip.SD1ClipModel + clip_target.tokenizer = sd1_clip.SD1Tokenizer + else: + clip_target.clip = sdxl_clip.SDXLClipModel + clip_target.tokenizer = sdxl_clip.SDXLTokenizer + + clip = CLIP(clip_target, embedding_directory=embedding_directory) + for c in clip_data: + m, u = clip.load_sd(c) + if len(m) > 0: + print("clip missing:", m) + + if len(u) > 0: + print("clip unexpected:", u) + return clip + +def load_gligen(ckpt_path): + data = ldm_patched.modules.utils.load_torch_file(ckpt_path, safe_load=True) + model = gligen.load_gligen(data) + if model_management.should_use_fp16(): + model = model.half() + return ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device()) + +def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None): + #TODO: this function is a mess and should be removed eventually + if config is None: + with open(config_path, 'r') as stream: + config = yaml.safe_load(stream) + model_config_params = config['model']['params'] + clip_config = model_config_params['cond_stage_config'] + scale_factor = model_config_params['scale_factor'] + vae_config = model_config_params['first_stage_config'] + + fp16 = False + if "unet_config" in model_config_params: + if "params" in model_config_params["unet_config"]: + unet_config = model_config_params["unet_config"]["params"] + if "use_fp16" in unet_config: + fp16 = unet_config.pop("use_fp16") + if fp16: + unet_config["dtype"] = torch.float16 + + noise_aug_config = None + if "noise_aug_config" in model_config_params: + noise_aug_config = model_config_params["noise_aug_config"] + + model_type = model_base.ModelType.EPS + + if "parameterization" in model_config_params: + if model_config_params["parameterization"] == "v": + model_type = model_base.ModelType.V_PREDICTION + + clip = None + vae = None + + class WeightsLoader(torch.nn.Module): + pass + + if state_dict is None: + state_dict = ldm_patched.modules.utils.load_torch_file(ckpt_path) + + class EmptyClass: + pass + + model_config = ldm_patched.modules.supported_models_base.BASE({}) + + from . import latent_formats + model_config.latent_format = latent_formats.SD15(scale_factor=scale_factor) + model_config.unet_config = model_detection.convert_config(unet_config) + + if config['model']["target"].endswith("ImageEmbeddingConditionedLatentDiffusion"): + model = model_base.SD21UNCLIP(model_config, noise_aug_config["params"], model_type=model_type) + else: + model = model_base.BaseModel(model_config, model_type=model_type) + + if config['model']["target"].endswith("LatentInpaintDiffusion"): + model.set_inpaint() + + if fp16: + model = model.half() + + offload_device = model_management.unet_offload_device() + model = model.to(offload_device) + model.load_model_weights(state_dict, "model.diffusion_model.") + + if output_vae: + vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(state_dict, {"first_stage_model.": ""}, filter_keys=True) + vae = VAE(sd=vae_sd, config=vae_config) + + if output_clip: + w = WeightsLoader() + clip_target = EmptyClass() + clip_target.params = clip_config.get("params", {}) + if clip_config["target"].endswith("FrozenOpenCLIPEmbedder"): + clip_target.clip = sd2_clip.SD2ClipModel + clip_target.tokenizer = sd2_clip.SD2Tokenizer + clip = CLIP(clip_target, embedding_directory=embedding_directory) + w.cond_stage_model = clip.cond_stage_model.clip_h + elif clip_config["target"].endswith("FrozenCLIPEmbedder"): + clip_target.clip = sd1_clip.SD1ClipModel + clip_target.tokenizer = sd1_clip.SD1Tokenizer + clip = CLIP(clip_target, embedding_directory=embedding_directory) + w.cond_stage_model = clip.cond_stage_model.clip_l + load_clip_weights(w, state_dict) + + return (ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae) + +def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True): + sd = ldm_patched.modules.utils.load_torch_file(ckpt_path) + sd_keys = sd.keys() + clip = None + clipvision = None + vae = None + model = None + model_patcher = None + clip_target = None + + parameters = ldm_patched.modules.utils.calculate_parameters(sd, "model.diffusion_model.") + unet_dtype = model_management.unet_dtype(model_params=parameters) + load_device = model_management.get_torch_device() + manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device) + + class WeightsLoader(torch.nn.Module): + pass + + model_config = model_detection.model_config_from_unet(sd, "model.diffusion_model.", unet_dtype) + model_config.set_manual_cast(manual_cast_dtype) + + if model_config is None: + raise RuntimeError("ERROR: Could not detect model type of: {}".format(ckpt_path)) + + if model_config.clip_vision_prefix is not None: + if output_clipvision: + clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True) + + if output_model: + inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype) + offload_device = model_management.unet_offload_device() + model = model_config.get_model(sd, "model.diffusion_model.", device=inital_load_device) + model.load_model_weights(sd, "model.diffusion_model.") + + if output_vae: + vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True) + vae_sd = model_config.process_vae_state_dict(vae_sd) + vae = VAE(sd=vae_sd) + + if output_clip: + w = WeightsLoader() + clip_target = model_config.clip_target() + if clip_target is not None: + clip = CLIP(clip_target, embedding_directory=embedding_directory) + w.cond_stage_model = clip.cond_stage_model + sd = model_config.process_clip_state_dict(sd) + load_model_weights(w, sd) + + left_over = sd.keys() + if len(left_over) > 0: + print("left over keys:", left_over) + + if output_model: + model_patcher = ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device(), current_device=inital_load_device) + if inital_load_device != torch.device("cpu"): + print("loaded straight to GPU") + model_management.load_model_gpu(model_patcher) + + return (model_patcher, clip, vae, clipvision) + + +def load_unet_state_dict(sd): #load unet in diffusers format + parameters = ldm_patched.modules.utils.calculate_parameters(sd) + unet_dtype = model_management.unet_dtype(model_params=parameters) + load_device = model_management.get_torch_device() + manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device) + + if "input_blocks.0.0.weight" in sd: #ldm + model_config = model_detection.model_config_from_unet(sd, "", unet_dtype) + if model_config is None: + return None + new_sd = sd + + else: #diffusers + model_config = model_detection.model_config_from_diffusers_unet(sd, unet_dtype) + if model_config is None: + return None + + diffusers_keys = ldm_patched.modules.utils.unet_to_diffusers(model_config.unet_config) + + new_sd = {} + for k in diffusers_keys: + if k in sd: + new_sd[diffusers_keys[k]] = sd.pop(k) + else: + print(diffusers_keys[k], k) + offload_device = model_management.unet_offload_device() + model_config.set_manual_cast(manual_cast_dtype) + model = model_config.get_model(new_sd, "") + model = model.to(offload_device) + model.load_model_weights(new_sd, "") + left_over = sd.keys() + if len(left_over) > 0: + print("left over keys in unet:", left_over) + return ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device) + +def load_unet(unet_path): + sd = ldm_patched.modules.utils.load_torch_file(unet_path) + model = load_unet_state_dict(sd) + if model is None: + print("ERROR UNSUPPORTED UNET", unet_path) + raise RuntimeError("ERROR: Could not detect model type of: {}".format(unet_path)) + return model + +def save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None): + clip_sd = None + load_models = [model] + if clip is not None: + load_models.append(clip.load_model()) + clip_sd = clip.get_sd() + + model_management.load_models_gpu(load_models) + clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None + sd = model.model.state_dict_for_saving(clip_sd, vae.get_sd(), clip_vision_sd) + ldm_patched.modules.utils.save_torch_file(sd, output_path, metadata=metadata) diff --git a/ldm_patched/modules/sd1_clip.py b/ldm_patched/modules/sd1_clip.py new file mode 100644 index 0000000000000000000000000000000000000000..50c67d6def9dcb40e077ffe4de93aa333fb8baac --- /dev/null +++ b/ldm_patched/modules/sd1_clip.py @@ -0,0 +1,583 @@ +# Implementation of CLIPTextModel transformer + +# using https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py as reference +# written by Forge + + +import os + +from transformers import CLIPTokenizer +import ldm_patched.modules.ops +import torch +import traceback +import zipfile +from . import model_management +import ldm_patched.modules.clip_model +import json +from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def gen_empty_tokens(special_tokens, length): + start_token = special_tokens.get("start", None) + end_token = special_tokens.get("end", None) + pad_token = special_tokens.get("pad") + output = [] + if start_token is not None: + output.append(start_token) + if end_token is not None: + output.append(end_token) + output += [pad_token] * (length - len(output)) + return output + +class ClipTokenWeightEncoder: + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def encode_token_weights(self, token_weight_pairs): + to_encode = list() + max_token_len = 0 + has_weights = False + for x in token_weight_pairs: + tokens = list(map(lambda a: a[0], x)) + max_token_len = max(len(tokens), max_token_len) + has_weights = has_weights or not all(map(lambda a: a[1] == 1.0, x)) + to_encode.append(tokens) + + sections = len(to_encode) + if has_weights or sections == 0: + to_encode.append(gen_empty_tokens(self.special_tokens, max_token_len)) + + out, pooled = self.encode(to_encode) + if pooled is not None: + first_pooled = pooled[0:1].to(model_management.intermediate_device()) + else: + first_pooled = pooled + + output = [] + for k in range(0, sections): + z = out[k:k+1] + if has_weights: + z_empty = out[-1] + for i in range(len(z)): + for j in range(len(z[i])): + weight = token_weight_pairs[k][j][1] + if weight != 1.0: + z[i][j] = (z[i][j] - z_empty[j]) * weight + z_empty[j] + output.append(z) + + if (len(output) == 0): + return out[-1:].to(model_management.intermediate_device()), first_pooled + return torch.cat(output, dim=-2).to(model_management.intermediate_device()), first_pooled + +class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): + """Uses the CLIP transformer encoder for text (from huggingface)""" + LAYERS = [ + "last", + "pooled", + "hidden" + ] + def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77, + freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=ldm_patched.modules.clip_model.CLIPTextModel, + special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True): # clip-vit-base-patch32 + super().__init__() + assert layer in self.LAYERS + + if textmodel_json_config is None: + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json") + + config = CLIPTextConfig.from_json_file(textmodel_json_config) + self.num_layers = config.num_hidden_layers + + with ldm_patched.modules.ops.use_patched_ops(ldm_patched.modules.ops.manual_cast): + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) + + if dtype is not None: + self.transformer.to(dtype) + + self.transformer.text_model.embeddings.to(torch.float32) + + self.max_length = max_length + if freeze: + self.freeze() + self.layer = layer + self.layer_idx = None + self.special_tokens = special_tokens + self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1])) + self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) + self.enable_attention_masks = False + + self.layer_norm_hidden_state = layer_norm_hidden_state + if layer == "hidden": + assert layer_idx is not None + assert abs(layer_idx) < self.num_layers + self.clip_layer(layer_idx) + self.layer_default = (self.layer, self.layer_idx) + + def freeze(self): + self.transformer = self.transformer.eval() + #self.train = disabled_train + for param in self.parameters(): + param.requires_grad = False + + def clip_layer(self, layer_idx): + if abs(layer_idx) > self.num_layers: + self.layer = "last" + else: + self.layer = "hidden" + self.layer_idx = layer_idx + + def reset_clip_layer(self): + self.layer = self.layer_default[0] + self.layer_idx = self.layer_default[1] + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def set_up_textual_embeddings(self, tokens, current_embeds): + out_tokens = [] + next_new_token = token_dict_size = current_embeds.weight.shape[0] - 1 + embedding_weights = [] + + for x in tokens: + tokens_temp = [] + for y in x: + if isinstance(y, int): + if y == token_dict_size: #EOS token + y = -1 + tokens_temp += [y] + else: + if y.shape[0] == current_embeds.weight.shape[1]: + embedding_weights += [y] + tokens_temp += [next_new_token] + next_new_token += 1 + else: + print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored", y.shape[0], current_embeds.weight.shape[1]) + while len(tokens_temp) < len(x): + tokens_temp += [self.special_tokens["pad"]] + out_tokens += [tokens_temp] + + n = token_dict_size + if len(embedding_weights) > 0: + new_embedding = torch.nn.Embedding(next_new_token + 1, current_embeds.weight.shape[1], device=current_embeds.weight.device, dtype=current_embeds.weight.dtype) + new_embedding.weight[:token_dict_size] = current_embeds.weight[:-1] + for x in embedding_weights: + new_embedding.weight[n] = x + n += 1 + new_embedding.weight[n] = current_embeds.weight[-1] #EOS embedding + self.transformer.set_input_embeddings(new_embedding) + + processed_tokens = [] + for x in out_tokens: + processed_tokens += [list(map(lambda a: n if a == -1 else a, x))] #The EOS token should always be the largest one + + return processed_tokens + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def forward(self, tokens): + backup_embeds = self.transformer.get_input_embeddings() + device = backup_embeds.weight.device + tokens = self.set_up_textual_embeddings(tokens, backup_embeds) + tokens = torch.LongTensor(tokens).to(device) + + attention_mask = None + if self.enable_attention_masks: + attention_mask = torch.zeros_like(tokens) + max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + for x in range(attention_mask.shape[0]): + for y in range(attention_mask.shape[1]): + attention_mask[x, y] = 1 + if tokens[x, y] == max_token: + break + + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") + self.transformer.set_input_embeddings(backup_embeds) + + if self.layer == "last": + z = outputs.last_hidden_state + elif self.layer == "pooled": + z = outputs.pooler_output[:, None, :] + else: + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: + z = self.transformer.text_model.final_layer_norm(z) + + if hasattr(outputs, "pooler_output"): + pooled_output = outputs.pooler_output.float() + else: + pooled_output = None + + if self.text_projection is not None and pooled_output is not None: + pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() + return z.float(), pooled_output + + def encode(self, tokens): + return self(tokens) + + def load_sd(self, sd): + if "text_projection" in sd: + self.text_projection[:] = sd.pop("text_projection") + if "text_projection.weight" in sd: + self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1) + return self.transformer.load_state_dict(sd, strict=False) + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def parse_parentheses(string): + result = [] + current_item = "" + nesting_level = 0 + for char in string: + if char == "(": + if nesting_level == 0: + if current_item: + result.append(current_item) + current_item = "(" + else: + current_item = "(" + else: + current_item += char + nesting_level += 1 + elif char == ")": + nesting_level -= 1 + if nesting_level == 0: + result.append(current_item + ")") + current_item = "" + else: + current_item += char + else: + current_item += char + if current_item: + result.append(current_item) + return result + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def token_weights(string, current_weight): + a = parse_parentheses(string) + out = [] + for x in a: + weight = current_weight + if len(x) >= 2 and x[-1] == ')' and x[0] == '(': + x = x[1:-1] + xx = x.rfind(":") + weight *= 1.1 + if xx > 0: + try: + weight = float(x[xx+1:]) + x = x[:xx] + except: + pass + out += token_weights(x, weight) + else: + out += [(x, current_weight)] + return out + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def escape_important(text): + text = text.replace("\\)", "\0\1") + text = text.replace("\\(", "\0\2") + return text + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def unescape_important(text): + text = text.replace("\0\1", ")") + text = text.replace("\0\2", "(") + return text + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def safe_load_embed_zip(embed_path): + with zipfile.ZipFile(embed_path) as myzip: + names = list(filter(lambda a: "data/" in a, myzip.namelist())) + names.reverse() + for n in names: + with myzip.open(n) as myfile: + data = myfile.read() + number = len(data) // 4 + length_embed = 1024 #sd2.x + if number < 768: + continue + if number % 768 == 0: + length_embed = 768 #sd1.x + num_embeds = number // length_embed + embed = torch.frombuffer(data, dtype=torch.float) + out = embed.reshape((num_embeds, length_embed)).clone() + del embed + return out + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def expand_directory_list(directories): + dirs = set() + for x in directories: + dirs.add(x) + for root, subdir, file in os.walk(x, followlinks=True): + dirs.add(root) + return list(dirs) + + +# Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py +# This function is only for reference, and not used in the backend or runtime. +def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=None): + if isinstance(embedding_directory, str): + embedding_directory = [embedding_directory] + + embedding_directory = expand_directory_list(embedding_directory) + + valid_file = None + for embed_dir in embedding_directory: + embed_path = os.path.abspath(os.path.join(embed_dir, embedding_name)) + embed_dir = os.path.abspath(embed_dir) + try: + if os.path.commonpath((embed_dir, embed_path)) != embed_dir: + continue + except: + continue + if not os.path.isfile(embed_path): + extensions = ['.safetensors', '.pt', '.bin'] + for x in extensions: + t = embed_path + x + if os.path.isfile(t): + valid_file = t + break + else: + valid_file = embed_path + if valid_file is not None: + break + + if valid_file is None: + return None + + embed_path = valid_file + + embed_out = None + + try: + if embed_path.lower().endswith(".safetensors"): + import safetensors.torch + embed = safetensors.torch.load_file(embed_path, device="cpu") + else: + if 'weights_only' in torch.load.__code__.co_varnames: + try: + embed = torch.load(embed_path, weights_only=True, map_location="cpu") + except: + embed_out = safe_load_embed_zip(embed_path) + else: + embed = torch.load(embed_path, map_location="cpu") + except Exception as e: + print(traceback.format_exc()) + print() + print("error loading embedding, skipping loading:", embedding_name) + return None + + if embed_out is None: + if 'string_to_param' in embed: + values = embed['string_to_param'].values() + embed_out = next(iter(values)) + elif isinstance(embed, list): + out_list = [] + for x in range(len(embed)): + for k in embed[x]: + t = embed[x][k] + if t.shape[-1] != embedding_size: + continue + out_list.append(t.reshape(-1, t.shape[-1])) + embed_out = torch.cat(out_list, dim=0) + elif embed_key is not None and embed_key in embed: + embed_out = embed[embed_key] + else: + values = embed.values() + embed_out = next(iter(values)) + return embed_out + +class SDTokenizer: + def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, pad_to_max_length=True): + if tokenizer_path is None: + tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer") + self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path) + self.max_length = max_length + + empty = self.tokenizer('')["input_ids"] + if has_start_token: + self.tokens_start = 1 + self.start_token = empty[0] + self.end_token = empty[1] + else: + self.tokens_start = 0 + self.start_token = None + self.end_token = empty[0] + self.pad_with_end = pad_with_end + self.pad_to_max_length = pad_to_max_length + + vocab = self.tokenizer.get_vocab() + self.inv_vocab = {v: k for k, v in vocab.items()} + self.embedding_directory = embedding_directory + self.max_word_length = 8 + self.embedding_identifier = "embedding:" + self.embedding_size = embedding_size + self.embedding_key = embedding_key + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def _try_get_embedding(self, embedding_name:str): + ''' + Takes a potential embedding name and tries to retrieve it. + Returns a Tuple consisting of the embedding and any leftover string, embedding can be None. + ''' + embed = load_embed(embedding_name, self.embedding_directory, self.embedding_size, self.embedding_key) + if embed is None: + stripped = embedding_name.strip(',') + if len(stripped) < len(embedding_name): + embed = load_embed(stripped, self.embedding_directory, self.embedding_size, self.embedding_key) + return (embed, embedding_name[len(stripped):]) + return (embed, "") + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def tokenize_with_weights(self, text:str, return_word_ids=False): + ''' + Takes a prompt and converts it to a list of (token, weight, word id) elements. + Tokens can both be integer tokens and pre computed CLIP tensors. + Word id values are unique per word and embedding, where the id 0 is reserved for non word tokens. + Returned list has the dimensions NxM where M is the input size of CLIP + ''' + if self.pad_with_end: + pad_token = self.end_token + else: + pad_token = 0 + + text = escape_important(text) + parsed_weights = token_weights(text, 1.0) + + #tokenize words + tokens = [] + for weighted_segment, weight in parsed_weights: + to_tokenize = unescape_important(weighted_segment).replace("\n", " ").split(' ') + to_tokenize = [x for x in to_tokenize if x != ""] + for word in to_tokenize: + #if we find an embedding, deal with the embedding + if word.startswith(self.embedding_identifier) and self.embedding_directory is not None: + embedding_name = word[len(self.embedding_identifier):].strip('\n') + embed, leftover = self._try_get_embedding(embedding_name) + if embed is None: + print(f"warning, embedding:{embedding_name} does not exist, ignoring") + else: + if len(embed.shape) == 1: + tokens.append([(embed, weight)]) + else: + tokens.append([(embed[x], weight) for x in range(embed.shape[0])]) + #if we accidentally have leftover text, continue parsing using leftover, else move on to next word + if leftover != "": + word = leftover + else: + continue + #parse word + tokens.append([(t, weight) for t in self.tokenizer(word)["input_ids"][self.tokens_start:-1]]) + + #reshape token array to CLIP input size + batched_tokens = [] + batch = [] + if self.start_token is not None: + batch.append((self.start_token, 1.0, 0)) + batched_tokens.append(batch) + for i, t_group in enumerate(tokens): + #determine if we're going to try and keep the tokens in a single batch + is_large = len(t_group) >= self.max_word_length + + while len(t_group) > 0: + if len(t_group) + len(batch) > self.max_length - 1: + remaining_length = self.max_length - len(batch) - 1 + #break word in two and add end token + if is_large: + batch.extend([(t,w,i+1) for t,w in t_group[:remaining_length]]) + batch.append((self.end_token, 1.0, 0)) + t_group = t_group[remaining_length:] + #add end token and pad + else: + batch.append((self.end_token, 1.0, 0)) + if self.pad_to_max_length: + batch.extend([(pad_token, 1.0, 0)] * (remaining_length)) + #start new batch + batch = [] + if self.start_token is not None: + batch.append((self.start_token, 1.0, 0)) + batched_tokens.append(batch) + else: + batch.extend([(t,w,i+1) for t,w in t_group]) + t_group = [] + + #fill last batch + batch.append((self.end_token, 1.0, 0)) + if self.pad_to_max_length: + batch.extend([(pad_token, 1.0, 0)] * (self.max_length - len(batch))) + + if not return_word_ids: + batched_tokens = [[(t, w) for t, w,_ in x] for x in batched_tokens] + + return batched_tokens + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def untokenize(self, token_weight_pair): + return list(map(lambda a: (a, self.inv_vocab[a[0]]), token_weight_pair)) + + +class SD1Tokenizer: + def __init__(self, embedding_directory=None, clip_name="l", tokenizer=SDTokenizer): + self.clip_name = clip_name + self.clip = "clip_{}".format(self.clip_name) + setattr(self, self.clip, tokenizer(embedding_directory=embedding_directory)) + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def tokenize_with_weights(self, text:str, return_word_ids=False): + out = {} + out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids) + return out + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def untokenize(self, token_weight_pair): + return getattr(self, self.clip).untokenize(token_weight_pair) + + +class SD1ClipModel(torch.nn.Module): + def __init__(self, device="cpu", dtype=None, clip_name="l", clip_model=SDClipModel, **kwargs): + super().__init__() + self.clip_name = clip_name + self.clip = "clip_{}".format(self.clip_name) + setattr(self, self.clip, clip_model(device=device, dtype=dtype, **kwargs)) + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def clip_layer(self, layer_idx): + getattr(self, self.clip).clip_layer(layer_idx) + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def reset_clip_layer(self): + getattr(self, self.clip).reset_clip_layer() + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def encode_token_weights(self, token_weight_pairs): + token_weight_pairs = token_weight_pairs[self.clip_name] + out, pooled = getattr(self, self.clip).encode_token_weights(token_weight_pairs) + return out, pooled + + # Taken from https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/sd1_clip.py + # This function is only for reference, and not used in the backend or runtime. + def load_sd(self, sd): + return getattr(self, self.clip).load_sd(sd) diff --git a/ldm_patched/modules/sd1_clip_config.json b/ldm_patched/modules/sd1_clip_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0158a1fd52727adf22359238285afafb150f66f2 --- /dev/null +++ b/ldm_patched/modules/sd1_clip_config.json @@ -0,0 +1,25 @@ +{ + "_name_or_path": "openai/clip-vit-large-patch14", + "architectures": [ + "CLIPTextModel" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "dropout": 0.0, + "eos_token_id": 2, + "hidden_act": "quick_gelu", + "hidden_size": 768, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 77, + "model_type": "clip_text_model", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 1, + "projection_dim": 768, + "torch_dtype": "float32", + "transformers_version": "4.24.0", + "vocab_size": 49408 +} diff --git a/ldm_patched/modules/sd1_tokenizer/merges.txt b/ldm_patched/modules/sd1_tokenizer/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..76e821f1b6f0a9709293c3b6b51ed90980b3166b --- /dev/null +++ b/ldm_patched/modules/sd1_tokenizer/merges.txt @@ -0,0 +1,48895 @@ +#version: 0.2 +i n +t h +a n +r e +a r +e r +th e +in g +o u +o n +s t +o r +e n +o n +a l +a t +e r +i t +i n +t o +r o +i s +l e +i c +a t +an d +e d +o f +c h +o r +e s +i l +e l +s t +a c +o m +a m +l o +a n +a y +s h +r i +l i +t i +f or +n e +ð Ł +r a +h a +d e +o l +v e +s i +u r +a l +s e +' s +u n +d i +b e +l a +w h +o o +d ay +e n +m a +n o +l e +t o +ou r +i r +g h +w it +i t +y o +a s +s p +th is +t s +at i +yo u +wit h +a d +i s +a b +l y +w e +th e +t e +a s +a g +v i +p p +s u +h o +m y +. . +b u +c om +s e +er s +m e +m e +al l +c on +m o +k e +g e +ou t +en t +c o +f e +v er +a r +f ro +a u +p o +c e +gh t +ar e +s s +fro m +c h +t r +ou n +on e +b y +d o +t h +w or +er e +k e +p ro +f or +d s +b o +t a +w e +g o +h e +t er +in g +d e +b e +ati on +m or +a y +e x +il l +p e +k s +s c +l u +f u +q u +v er +ðŁ ĺ +j u +m u +at e +an d +v e +k ing +m ar +o p +h i +.. . +p re +a d +r u +th at +j o +o f +c e +ne w +a m +a p +g re +s s +d u +no w +y e +t ing +y our +it y +n i +c i +p ar +g u +f i +a f +p er +t er +u p +s o +g i +on s +g r +g e +b r +p l +' t +m i +in e +we e +b i +u s +sh o +ha ve +to day +a v +m an +en t +ac k +ur e +ou r +â Ģ +c u +l d +lo o +i m +ic e +s om +f in +re d +re n +oo d +w as +ti on +p i +i r +th er +t y +p h +ar d +e c +! ! +m on +mor e +w ill +t ra +c an +c ol +p u +t e +w n +m b +s o +it i +ju st +n ing +h ere +t u +p a +p r +bu t +wh at +al ly +f ir +m in +c a +an t +s a +t ed +e v +m ent +f a +ge t +am e +ab out +g ra +no t +ha pp +ay s +m an +h is +ti me +li ke +g h +ha s +th an +lo ve +ar t +st e +d ing +h e +c re +w s +w at +d er +it e +s er +ac e +ag e +en d +st r +a w +st or +r e +c ar +el l +al l +p s +f ri +p ho +p or +d o +a k +w i +f re +wh o +sh i +b oo +s on +el l +wh en +il l +ho w +gre at +w in +e l +b l +s si +al i +som e +ðŁ Ĵ +t on +d er +le s +p la +ï ¸ +e d +s ch +h u +on g +d on +k i +s h +an n +c or +. . +oun d +a z +in e +ar y +fu l +st u +ou ld +st i +g o +se e +ab le +ar s +l l +m is +b er +c k +w a +en ts +n o +si g +f e +fir st +e t +sp e +ac k +i f +ou s +' m +st er +a pp +an g +an ce +an s +g ood +b re +e ver +the y +t ic +com e +of f +b ack +as e +ing s +ol d +i ght +f o +h er +happ y +p ic +it s +v ing +u s +m at +h om +d y +e m +s k +y ing +the ir +le d +r y +u l +h ar +c k +t on +on al +h el +r ic +b ir +vi e +w ay +t ri +d a +p le +b ro +st o +oo l +ni ght +tr u +b a +re ad +re s +ye ar +f r +t or +al s +c oun +c la +t ure +v el +at ed +le c +en d +th ing +v o +ic i +be st +c an +wor k +la st +af ter +en ce +p ri +p e +e s +i l +âĢ ¦ +d re +y s +o ver +i es +ðŁ ij +com m +t w +in k +s un +c l +li fe +t t +a ch +l and +s y +t re +t al +p ol +s m +du c +s al +f t +' re +ch e +w ar +t ur +ati ons +ac h +m s +il e +p m +ou gh +at e +st ar +wee k +! !! +c lu +th ere +n er +t om +s el +ï¸ ı +wor ld +v es +c am +go t +in ter +of f +u m +ton ight +o ther +h ou +loo k +j e +i d +si on +be au +at t +el i +or t +re c +f f +st er +su pp +g en +be en +il y +te am +m m +i c +pe op +it t +at s +on ly +mb er +en g +b ri +m p +k now +b ur +b ar +in s +lo w +sh e +ro w +â Ŀ +t ro +peop le +vi a +lo w +ag a +be t +x t +f ac +ch ar +e ar +w al +s en +f am +b le +n ati +is h +n or +g ame +li ve +s co +le y +d on +ic k +b all +ver y +the se +p an +i a +at ing +c r +a re +g ir +ma ke +st re +sho w +. " +f l +u p +d r +than ks +il li +w om +st s +i g +s ur +ever y +c ur +vie w +le t +in to +mo st +n a +in di +g ar +ha d +s ou +v ed +an t +iti on +ma de +f ol +un i +it ed +ðŁ ı +ic al +th r +read y +ch ec +d ra +k es +boo k +e p +si c +mor ning +ne ws +c au +c t +w ell +an c +pho to +th an +or s +bir th +g g +ou t +ne xt +som e +en ing +stor y +ch ri +do wn +hom e +f fe +fre e +d a +b or +f il +ci al +than k +si de +le ar +qu e +l ine +t en +at es +ye ars +m y +pho to +beau ti +ri ght +n u +for m +shi p +b an +th er +d ays +g am +as on +g y +ðŁ İ +birth day +se t +ic k +e t +st ill +com ing +ta ke +ðŁ ĩ +b b +s ol +s on +d en +e p +mu sic +the m +de n +wh y +f oo +c ra +am az +w n +h ol +t ting +w r +u e +ma g +c ro +l an +c lo +b ra +a k +s ing +c al +re ad +' ve +jo h +b ab +d ri +b lo +bi g +er ic +in t +t or +tr y +l a +le g +hou se +m ic +v al +beauti ful +l itt +chec k +ne w +ver s +s w +ar i +pla y +h er +âĢ ĵ +w in +m a +con gr +sch ool +f un +. @ +he al +ic h +d el +wh ere +l on +ke t +tw o +mu ch +wat ch +v en +d ed +a st +k ed +b as +go ing +m p +e ver +w ays +ro o +de sig +l y +s ed +to p +l in +ch an +to o +it ing +d ent +gh ts +t y +sp o +ne ed +b lu +in st +be ing +âĿ ¤ +w el +l s +hi m +m ay +st ing +n a +el y +litt le +g a +n at +tom or +m c +h on +w ant +a ir +pi c +am eric +p er +le ss +wee k +ve l +a h +c ap +ch am +g er +ti m +tomor row +ne ss +st ate +h al +ser v +z e +o s +p at +v is +ex c +s in +f f +c ity +c en +an y +b el +su mm +t in +w ould +loo king +k o +ce le +fam ily +m er +po w +hel p +bu s +c o +c le +sel f +en s +ic s +th o +an i +ch o +le ad +b s +t wee +th ink +for e +ch il +vi de +di d +al e +ch i +v il +en ds +w ing +p as +' ll +v ol +s a +g s +man y +j ec +be fore +gra ph +n y +ur ing +w il +d d +bu il +f av +st ed +tr an +l ing +ou d +d ge +fi el +nati onal +st a +c er +w ere +in a +se ason +c ou +n ed +amaz ing +ti ons +cele br +n s +a th +he ad +s day +d ar +lo c +v in +an other +g oo +s at +n y +jo in +pre s +s es +s ing +an a +in ing +.. .. +c our +ï¸ ı +ac t +cau se +li ght +am s +t a +b al +f c +hi gh +off ici +t t +chri st +d ic +d ay +ra l +h or +: ) +vi si +n am +o b +ma s +gh t +re ally +t un +fin d +thr ough +por t +u t +ti ve +st y +n e +or e +ðŁĺ Ĥ +supp ort +ne ver +ev en +ðŁ Ķ +h a +y a +l d +u k +r an +j am +wi th +me di +d es +ne y +ch ing +al e +h y +k in +! ! +d y +pl ace +al so +b le +wh ich +bl ack +b li +s ay +par k +pl ay +ir e +vide o +week end +a il +ke y +p t +w ard +fri day +d in +ine ss +g ro +b en +al ways +t ball +ag o +m il +c y +pro duc +di sc +un der +ple ase +sp or +fu ll +e y +ðŁ Ļ +is e +iti es +c at +k no +u se +fo re +k er +ar t +hi gh +op en +s an +e f +our s +sh ed +st ri +d ro +aga in +i m +ðŁ ĵ +en jo +fu n +ge tting +p en +g er +c li +an y +ever y +e u +wom en +â ľ +e st +c ould +r y +" @ +th ou +sh a +comm un +b er +d ents +di s +wh ile +aw ay +di o +h am +g la +d ate +k a +mis s +un ch +w on +in f +roo m +g a +re al +ex per +di rec +sh ould +sp r +g ol +l ong +bet ter +or i +e y +i ence +il s +z z +h an +f ound +v s +â Ļ +po st +ti c +par t +m en +ren ce +ce ss +v ic +s il +sho p +ðŁĺ Ĥ +f ood +v al +sti c +y ou +s ays +e lec +st ar +o c +l and +i d +c tion +fiel d +s of +st art +wat er +fri ends +on es +ðŁ Į +f la +f ar +wh ite +par ty +in st +gr ou +t v +every one +m ent +j a +ch a +pr in +an ts +d uring +l at +l ar +we st +th en +k a +y oun +in sp +in te +we en +visi t +aga inst +re le +he ad +c es +to wn +loo ks +th re +re gi +ren t +pro jec +gir l +se ar +w o +m om +c ar +h un +pu bli +d i +p le +c all +c ri +u m +for d +per fe +fri end +h ard +ssi on +te st +pla ying +ar ound +be cause +ke ts +me et +sat ur +ar ti +wor k +j un +v en +r un +me mber +por t +su per +t wit +s am +el s +t ly +ad v +ati ve +at h +s ure +av ail +la r +s qu +ar ds +ev ent +m en +l l +o ver +lo gy +it al +tim es +m al +b ack +c oo +ma king +st ru +â ģ +it u +sh ar +g an +c as +s n +summ er +pic ture +f an +h in +christ mas +c y +pr oud +cham pi +desig n +pp ing +ho pe +c a +avail able +ma y +we d +photo graph +spe cial +sal e +sto p +er y +a we +al ity +hi story +am a +pre si +b ru +wor king +d one +d r +k en +fe at +w ood +ate st +sun day +mo vi +vel y +s le +f ace +sp ec +stu dents +b y +ha m +sp on +bus iness +d at +i e +i p +so ci +g lo +h and +re cor +r s +me e +ke ep +p ur +heal th +sh e +com ple +go d +da vi +col lec +li st +r a +clu b +t ers +in clu +th ings +pl an +â ĺ +joh n +sh ing +at ul +so on +blu e +g or +satur day +w on +congr atul +se e +âĿ¤ ï¸ı +tho se +ðŁĺ į +fin al +d ou +it h +o wn +ro ad +t our +a st +indi a +ti l +n d +f er +fav or +su l +lear n +fir e +ju st +grou p +a h +r ac +bo dy +u r +c are +à ¸ +p lo +o h +po s +gi ve +te ch +su b +c ent +er ing +y m +il ity +f ic +lon don +v ir +gu ys +b a +ðŁ ¤ +bab y +sc re +ðŁĺ į +tru mp +un der +chan ge +i an +col le +ss es +l er +ss ed +n ice +ann oun +pow er +s ar +a king +min i +s li +s wee +k ar +fu l +c ru +ac tion +a ther +) . +st and +de vel +a a +g an +le ft +lo l +re l +tran s +m ents +in t +e f +man ag +di g +gen er +do wn +p au +ti v +k u +th ur +k en +st on +f ans +tal k +twee t +t oo +sty le +pro te +se con +fr on +awe some +g l +p al +ne t +s or +la u +g on +sin ce +t ty +ser ies +me mor +b eli +fil m +di d +di es +o t +congratul ations +p ra +e ve +w oo +offici al +su c +in cre +b on +par t +pp ed +cla ss +si ve +bo y +cu l +perfe ct +t ou +d am +wel come +foo tball +h i +p ap +wa it +ad a +congr ats +youn g +exc ited +re ce +j an +v a +re d +st ra +medi a +' d +do es +le t +mu l +ill s +gre en +m el +to ge +fu ture +ye ster +vers ity +for m +ta in +i de +ch es +ki ds +qu i +ha ha +de ta +bi g +favor ite +gir ls +con tin +do m +sear ch +u al +a ir +d ers +mon th +c er +yester day +commun ity +ad e +do g +vil le +ic es +d eli +sy ste +ru n +is m +he art +c up +en ti +fe w +presi dent +e ds +un til +fe sti +o k +f lo +sa id +ol e +me d +tra vel + £ +ph one +toge ther +fa st +lo t +gam es +sh ir +bet ween +y es +th ers +do ing +m ac +at or +b and +fol low +projec t +devel op +di ffe +con fe +spe ci +ca st +y s +bo ard +r d +i al +sh oo +r am +ha ving +sh are +fol low +on e +n ame +m r +pu t +disc u +or y +c ame +ou s +s ite +twit ter +t b +t it +fin ally +z ed +su per +com pan +us ing +all s +li st +r is +sho t +g al +t ar +de l +joh n +âĢ Ķ +some thing +ra m +inte re +wh e +b it +ðŁ į +stre et +oun d +a i +tic kets +movi e +re al +k y +ta king +o pp +c c +l am +m oun +in ve +bl ack +us ed +on line +y or +loc al +gu e +c ks +o w +ge st +bo ys +illi on +con t +re ci +in ed +eu ro +no w +se en +p h +te ach +de f +sou th +su ch +aw ard +mu st +is su +ca re +fe el +p lu +l atest +spor ts +we b +te x +e ment +s k +fi c +w an +te ch +o t +bo x +n er +fre e +t al +a sh +c ase +ho t +won der +mee ting +er a +ch all +ðŁ IJ +jo b +il i +c ool +j our +th s +m o +f el +di e +mic ha +e le +te am +serv ice +st and +ma kes +p ing +ear ly +com es +e k +ho li +v ers +ag ue +s au +thre e +mon day +fa shi +some one +th ro +se a +b ad +supp or +tur n +ur y +m ing +photograph y +n ic +mar k +pre tty +ss ing +wat ching +me mb +ar ri +coun ty +be ach +fr an +cen ter +pol ice +b at +publi c +t an +pre ss +s af +s y +ge ts +ro y +n ers +y our +bu y +st ers +sho w +as ed +chil dre +af ric +in es +sp ace +sc ri +h all +pa in +ar ing +hom e +m ur +heal th +ch ed +s and +rece i +gu y +e a +americ an +re si +childre n +- - +i ri +ing ton +coun try +ro ss +le n +ann a +boo ks +b c +e ce +d om +lo vely +k h +pe t +g y +g ri +st age +off ice +ro ck +m on +b ay +t able +su n +m ed +th in +l or +f low +( @ +uni versity +stor e +fron t +goo d +z a +vo te +nor th +he y +an im +or der +mi d +with out +a de +re member +mar ket +? ? +mu s +tra ining +e duc +bu t +co ver +st an +sc en +b la +bre ak +l ou +s ame +g old +a in +o s +bo th +l it +ver n +a i +al bu +p a +enjo y +be g +ell ing +thur sday +inf o +s an +americ a +ha ir +te l +mar ch +con cer +colle ge +confe rence +ap p +h our +ch ang +â ļ +s our +ol s +we ather +w ar +p hi +festi val +secon d +cu te +pr ac +en er +str y +le a +pol it +s av +se n +o w +m i +ne ar +ou ght +z e +co ffe +w illi +d an +se y +davi d +e se +f an +de ci +the at +no v +ati on +tr ac +sc i +re view +c el +e m +u n +ju ly +or ig +ti on +d ru +form er +st ay +af ter +in v +too k +dat a +b al +tu es +d an +ev ening +ðŁĺĤ ðŁĺĤ +d ol +u res +pro vi +t s +e st +sig n +j ac +u k +s ong +ye t +bo w +in du +j ap +h oo +po int +any one +z y +i st +h ur +it al +buil ding +wom an +ch ur +j er +per for +co ach +le ague +ce ss +ne t +i mag +nati on +br it +qu e +aw ards +ag es +wor ks +c ed +man ce +l ate +ig n +mon ey +tru e +i i +t ell +pl ac +p ac +as y +wor ld +be hin +im port +read ing +gra m +gi ving +me t +h it +for ward +st om +pres ent +jun e +so cial +no on +mar t +hal f +s we +go vern +k er +deta ils +li sh +_ _ +ac y +si a +ber t +f all +! !!! +) , +th i +d iti +sp ort +k ing +f it +st af +c at +mu se +cen tr +y er +con tro +b loo +wal k +ac tu +did n +li m +lear ning +re search +wed ne +au th +h ours +k y +f ar +h en +.. .. +it ch +ri l +str ong +sk y +que sti +jam es +r on +d g +f ur +c in +do es +app ro +mar ke +tu res +ful ly +ch at +behin d +te m +fin i +mis sion +b att +fe el +he av +every thing +b ar +w ish +pre mi +i ma +exper ience +e ach +re port +swee t +tic s +spr ing +re spon +syste m +vic tor +l in +sa w +al ready +gh ter +f le +ã ĥ +br ing +albu m +- - +ell s +st an +to m +inter national +w ent +an ni +mat ch +pp er +st one +sm all +ra in +fashi on +are a +v an +ag ram +k o +thou ght +wor th +v an +m er +coffe e +it es +g n +arti st +c on +ar ch +c ir +se cre +gr ound +is o +h and +co m +bri dge +h s +x i +l ink +pu l +sp l +r ace +f li +ri ver +g as +di sco +d al +play er +f it +photo s +it y +o k +j or +tr a +ap ril +ad s +a di +sol u +beau ty +do or +me ss +up date +ali a +sch o +en ed +mom ent +sco t +sc ience +i or +ti es +ac ross +ous ly +sh es +does n +p age +wat er +m illion +cla ssi +l ic +ca st +form ation +micha el +ell o +s mo +in ts +vi sion +op ening +ld n +au str +tues day +win ner +po ssi +r ound +shir t +di t +b o +u es +il led +al ong +tri p +star ting +im pro +k an +per son +no t +re co +ne eds +c le +li e +re st +r ing +win ter +si mp +mo m +be er +fac e +tor s +us a +collec tion +ge or +se ssion +tr ying +la s +la ke +j en +orig in +stu dent +se cur +v in +pic s +ex pe +com p +gon na +e qu +b ad +le y +a u +memb ers +bre ak +w all +gi c +din ner +bu l +insp ir +r i +min d +ic a +win ning +tal king +t ren +s is +t en +wonder ful +s now +he ar +th om +no thing +gu i +st in +blo g +fe st +b un +le e +war ds +ch ance +dre ss +re n +pau l +p es +tech no +ru ssi +c ard +e ast +mar i +w ine +t i +la w +str ic +k i +ap e +au gu +pro fe +as h +cour se +ma il +ren tly +d un +m un +lo ve +is land +dri ve +s l +end ed +ma in +lo st +nat ure +âĿ¤ ï¸ı +ch ic +re por +p in +pr o +st ation +ce p +ta kes +compan y +go es +on d +ma ch +ra dio +d ad +ro ck +j a +p ay +champi on +e e +in de +tt a +ati c +t ab +beli eve +ener gy +z i +t at +wor d +on ce +re sul +y l +and re +an o +inst agram +clo se +t am +cu stom +w a +con om +sho ws +li fe +k in +ro b +t age +n ation +al most +list en +sa ve +re li +ac e +mar y +tre e +for get +j ack +wa iting +direc tor +h ill +bor n +te mp +f l +st e +on a +sing le +wedne sday +un ited +in o +@ _ +ne l +celebr ate +en ding +de al +j i +can ada +hu ge +tr ack +âĢ ¢ +f y +fan ta +an g +yor k +rele ase +p un +ep iso +wor ds +t our +p ack +i gh +classi c +perfor mance +ke t +after noon +recor d +win s +pro ble +âĿ ¤ +f our +b ed +ban k +d ance +s la +cal led +mi ght +a p +pa st +ðŁ ļ +diffe rent +it e +gi ft +ssi ve +chur ch +c us +pro gram +ho tel +ic e +ma d +secur ity +en ge +d c +en ough +st a +e ty +de ad +g un +he ar +m ir +hu man +gre ss +oun ds +pi ece +bre aking +gar den +fi ght +vie ws +f ish +star ted +run ning +gre en +ser i +s m +as k +d or +de ath +e conom +er i +ir d +s er +l unch +âģ ¦ +bo x +nat u +ba se +b an +f al +glo bal +wil d +wo w +out side +mo ve +le ad +an al +muse um +on g +ha w +pow er +than k +b ac +char ac +cam pa +dig ital +r o +op er +de v +w ol +p ati +f a +m ale +pap er +ill ing +c s +â ĥ +educ ation +ta ken +e ffe +m ou +s ad +" . +bas ed +staf f +inclu ding +li ving +a c +ch ina +mo b +stor m +lu ck +ph il +o o +y n +tra vel +k el +ti al +pr ice +boo k +import ant +bi o +p ool +ny c +f ab +lo ad +? ! +chall enge +cr y +ser ve +we ar +bu s +ta in +nu mber +ro r +k at +i z +th ough +ho sp +m m +fa ir +ut es +ho t +po p +fi ed +cam p +develop ment +li br +c ali +em s +âģ¦ @ +b ol +is ed +stand ing +mo del +it a +g le +bro wn +ima ge +ve red +for ce +o il +par tic +sh u +da ily +la w +se c +cla ss +cam p +holi day +cl in +k ers +pres ent +gam e +incre di +er ship +inter view +b ill +du e +and y +ab o +in nov +ke y +ac ade +p il +mo der +st ars +br and +f er +wee ks +con si +pr e +sa fe +wr it +di um +la unch +marke ting +ann ual +as si +cour t +la dy +c ted +and a +in side +chil d +opp or +sm ith +centr e +gu e +âģ © +f ren +st y +for t +ent ly +is n +ke ep +to ber +on y +bo y +al d +col la +de mo +le vel +com pet +ad o +b our +fanta stic +m ate +s u +sou th +oppor tun +vers ary +lat er +bu d +face book +la un +ster n +p it +! " +ma j +gr am +tb t +fi re +happ y +a ks +wh ole +actu ally +ill er +ell a +lo ts +al ex +an ge +lan ds +ðŁĺ Ń +en ter +r ou +episo de +p ed +in ten +sh ire +wh o +pl an +h o +ca ke +we st +mag az +fre sh +c c +n ar +ch ris +wr iting +w er +n om +l o +mi dd +dre am +o l +ti onal +de b +> > +be come +s i +gr and +all ing +hi stor +ri de +i red +saf e +que en +ci l +in tro +vi l +d ani +.. . +ar tic +st at +sh ort +or ing +sel fi +mis si +do c +b it +g all +b om +i re +se lec +d ition +ðŁĶ ¥ +fri end +be at +gh ting +ðŁĺ Ĭ +pe ace +ex hi +ant a +ab ility +il lu +j on +qu ality +tri bu +m es +play ers +fa ir +cu t +c ab +suc cess +b i +su s +pro mo +sch e +an ge +ic o +comm it +cat ch +ill a +kin d +feel ing +qu o +s ay +anni versary +spo t +mo ther +an e +p end +your self +op s +app le +min utes +p o +gr and +ri es +ha ha +care er +ed ition +de c +ric k +am i +concer t +iti ve +ge ous +d ly +t te +adv ent +i g +li ghts +ak er +sk y +âĥ £ +r ay +fini shed +w ay +s d +ac coun +ðŁĴ ķ +ck y +ch el +lit er +pain ting +lo s +st un +techno logy +n as +ma r +b il +afric a +ki e +ey es +gol f +plu s +ni a +it ec +serv ices +wed ding +kno wn +te le +.. ... +star ts +pa ren +w ants +ati onal +mon ths +win do +fav our +er t +magaz ine +ex clu +re ve +b c +origin al +e ss +n al +an ti +st ro +t ice +stu dy +à ¤ +v ac +nation al +fi ve +ra in +ve ment +u te +ver se +em er +ar my +possi ble +gue ss +val ley +ther n +cro w +m r +col or +on to +pic k +cle ar +dar k +t ac +wan ted +it ting +can cer +govern ment +di e +ri se +z ing +col d +f oun +stu dio +str ation +bro ther +a head +sh el +mic ro +ic ally +d au +sig ned +vi ol +a x +as se +i o +w re +spl ay +ch ick +augu st +pl at +ti ps +sp i +hu man +e asy +lo gi +mi ke +gro w +ag re +w w +sh ad +mo tiv +wi de +tur ns +om g +v ar +de fin +su g +j im +ðŁĶ ¥ +t d +campa ign +nam ed +re tweet +co p +t v +le av +k is +dou ble +s mar +issu e +vil la +in formation +li es +sto ck +n t +di stric +sh or +mi x +er o +se p +me x +see ing +li ve +re min +co de +g ur +s c +wil d +l un +h ood +spo t +fa ther +fore ver +up d +tra f +f ly +ne ed +gra du +tra in +ma ke +s ab +be y +si ze +lead er +tal ks +e u +lo g +fo x +gor geous +le ss +le ts +sur pri +my self +no te +li ves +f ru +lo ved +se ver +de m +j i +so c +h old +do gs +n i +â ŀ +lea ve +air port +ben ef +ex pl +shi ps +comple te +ach i +gre at +vin tage +j ack +ro c +woo d +pri v +off er +ey e +ver sion +te a +co ach +off ic +w ell +g en +s at +h h +you th +o x +? " +m t +mi x +g g +d le +natu ral +buil d +break fast +thin king +theat re +mo on +ber g +go als +geor ge +en e +exc ell +il ing +tun e +y ed +g ate +m it +net work +jo e +h ello +f b +tu be +we aring +ath le +stru c +har d +gla ss +g ers +thro w +g es +b t +indu stry +manag ement +ali st +go al +stre am +y el +a vi +ici ous +o thers +s ki +chri sti +bir d +e sc +m in +tr o +l t +j an +im p +ri ghts +sh a +or gan +cent ral +ar a +ro ll +favour ite +che ster +el se +p ay +car s +m ine +ste p +prac tice +maj or +h ang +ðŁĺ ĺ +n on +v ari +eng ine +vol un +di a +i led +arch itec +p ink +d s +th y +wa sh +web site +ba g +contro l +el li +f ra +an sw +d ence +y u +r on +ol a +g in +dr in +li c +cou ple +sp ar +g on +cre ate +c t +celebr ating +de ep +e at +te e +vo ice +dro p +vis it +at ors +sta dium +f t +w is +ro l +gra de +fam il +po ints +re pre +w as +traf fic +jap an +or g +hon or +tex as +man u +âĻ ¥ +safe ty +re r +b ag +em plo +rele ased +re gu +ak a +n av +ro le +sen ior +spec t +cro ss +lin es +be st +p ack +s in +ti e +mis sing +sun set +li ber +is ing +j ay +sk i +champion ship +ac tiv +la dies +play ed +y y +pu bl +al o +pri de +s r +pa ki +lu x +sur vi +ck ed +e ts +cho col +austr alia +par is +mi les +h at +ment al +al a +me an +mob ile +en a +in si +f ound +chi ef +t ag +incredi ble +re turn +à © +goo gle +fren ch +cre w +hal lo +ali an +j az +ch er +sil ver +nor th +eng lish +base ball +c af +lim ited +follow ing +app reci +ear th +k ir +ve mber +w ed +p tion +g ed +oc tober +fl ori +c r +en cy +ga ve +lor d +stu ff +ber ry +po st +sm ile +bro ad +st ate +gg er +me ans +ic y +gu n +y o +ma ster +bur g +han ds +ni e +/ / +uni on +brit ish +big gest +distric t +am ing +h il +o ce +per son +pas s +en vir +scho ols +arri ved +anc es +insp ired +ex pla +be n +libr ary +bo tt +am p +ste ph +cont act +b ang +m s +cali for +t old +batt le +b b +chic ago +âľ ¨ +str ate +sh i +de ce +- ) +ad d +la b +j ones +leg end +cast le +ing er +st ance +be l +ur a +re fu +lead ers +po t +se x +h ic +artic le +ki d +fr ance +x x +ex e +gui de +volun te +pr int +al i +ce o +twee ts +w x +scen e +vol u +ant i +h an +as soci +shar ing +ro se +mini ster +sh er +in ste +cle an +demo cr +po ster +sk in +p sy +pro per +cra zy +i am +o re +in i +any thing +po d +mo ving +cl ick +ex plo +com b +cra ft +f i +bloo d +is ra +publ ic +d ent +ol ym +eng land +a si +ch er +fac t +envir on +har ry +g one +me dic +enjo ying +just ice +j r +indi an +wi fe +s ound +t es +dra wing +p al +ide a +cr it +ju li +il er +war m +cl ar +thou ghts +def en +coun cil +intro duc +di ed +jan u +an i +s end +li er +m l +intere sting +tra de +win d +b ay +s ac +anc y +sour ce +b es +org ani +ar ly +lar ge +ff ici +ta g +u t +de sp +o es +tit le +sy m +pic tures +op en +wom en +sho wing +ri a +le ast +lead ership +cur rent +elec tr +val ent +list ening +c key +gener al +de ser +du ce +; ) +c ent +ðŁĺį ðŁĺį +sco tt +po or +selfi e +ev ents +i on +wr ong +de v +h ill +sep te +cul ture +l ine +sor ry +s ent +si ster +ce pt +k ri +no vember +ar i +announ ce +z ation +br an +g ent +d u +l en +per s +f m +mart in +o p +e mb +om e +midd le +suc cess +pe ter +janu ary +f lu +rac ing +d av +bi ke +ðŁı » +pe t +shoo t +profe ssi +feat uring +septe mber +now playing +sta ur +z a +on ic +qu ick +bas ke +spe aking +mil it +z er +chick en +b ell +s ad +co ast +lo ving +y ers +d j +pan el +ver age +s wit +ic ks +b ou +califor nia +s am +paren ts +er o +k illed +ph ys +jo bs +mi gr +an th +e mo +hallo ween +and er +c m +compet ition +e ag +s ket +sp ir +may be +exclu sive +app e +jour ney +scre en +for d +i o +h ate +u g +sou l +her o +soci ety +sy n +gu it +n h +d j +as es +im pre +ti me +sal es +d d +f ts +summ it +stun ning +om s +tur ned +cle an +sof t +be at +re staur +de red +en ces +ma gic +di o +sh ine +gu est +health y +exhi b +stor ies +po pu +n is +el a +bel ow +fun ny +resul ts +s ne +cur rently +ar d +down load +f light +m al +f ine +p ad +ch u +ent ed +h at +ðŁij ı +ste ve +j o +mar k +r at +b all +p c +p on +b by +o li +ar ts +as ure +bow l +att ack +mi c +de ar +ran ge +en ter +chocol ate +br illi +ac cess +, " +? ?? +ch ap +con st +t n +mat ter +blu e +gall ery +em p +work shop +lead ing +y ours +baske tball +w anna +th u +_ _ +mar ri +sle ep +bi a +ch e +ma d +imp act +o wn +si r +chan nel +euro pe +e sp +k itch +hosp ital +w ra +roy al +f s +ne u +qu ar +ne y +ac ks +ch ase +pp y +st al +at ely +ti m +dece mber +r are +per form +cre am +we ight +ch oo +ni ght +ha ven +fr anc +kh an +buil t +hel ping +tru st +ty pe +gol den +ta x +s now +s wi +di sa +questi ons +ve y +li ght +c n +cl oud +thom as +ag ed +sh ou +te ams +gr an +re ason +a a +you tube +v p +pi zz +manag er +bur y +cre dit +tre at +ma x +i k +ma in +g ing +de ad +pro bab +ye ah +ã Ĥ +br and +so li +pl ant +ta yl +gir l +ðŁĺ Ń +nam ent +au to +mess age +ko re +n ur +ter r +ag u +ma p +sen ting +lo ves +gi ves +g ab +z en +ro bert +con fir +w ars +o m +sta in +cam era +and er +won der +a b +ca p +s old +su it +wal king +contin ue +effe c +dau ghter +d anc +cha in +mul ti +ki d +y an +champi on +v o +ta ins +ho st +min i +mis sed +re sc +ly n +fin ish +del icious +s as +tayl or +i b +pro mis +produc ts +moun tain +flori da +regi ster +tre at +rec ent +fe male +boo th +mat t +ve hic +s op +mo tor +suppor ting +phi c +ex tre +dr ink +lan e +th ird +p s +con stru +ce re +far m +ðŁİ ī +tu red +ðŁij ī +c ats +a j +gi e +shoo ting +as ked +paki stan +am e +m b +g il +leg al +squ are +in vol +dra w +oo oo +!! !! +opportun ity +p y +e i +b ts +teach er +charac ter +john son +br on +ly wood +ch ine +c ing +c ine +d ge +gam ing +russi a +ci a +quo te +ric h +go v +flow ers +sp iri +st in +grow th +ðŁı ¼ +comm er +j uni +mu m +r an +s na +a ren +c b +ac tor +col or +si t +pa ir +ch i +bo w +acade my +hel d +r ang +me tal +y l +ac tive +probab ly +t ch +need ed +spe e +cho ice +ital y +ry an +ðŁĩ º +flow er +v it +m n +found ation +b ak +si ons +ne igh +f loo +he ard +re mo +fre sh +ing ing +re f +to wn +cl ou +je sus +spiri t +cou ldn +z es +ðŁĴ Ļ +willi ams +pro ce +moder n +pro cess +sho es +cre ated +tri c +issu es +ann e +att en +de but +h r +n it +sti g +a po +e ps +z u +ã Ģ +si x +car ds +lan gu +fam ous +tour nament +se l +e bay +y n +st on +k ick +announ ced +k am +vo c +brilli ant +hou se +che ese +war ri +mus ic +ho ckey +ðŁĺĤ ðŁĺĤ +sk ills +au tom +smar t +med ical +mon y +e x +gu ar +gi ve +pers onal +ven tion +al li +pre ss +flo or +m c +victor y +hi m +simp le +th or +ðŁĩº ðŁĩ +ta il +lu cky +ale x +qu ite +bo t +ssi ons +chall eng +c ann +amaz on +h ell +b ought +) : +ed y +secre t +produc tion +inde pend +de fe +ad ded +p r +p ag +be d +gre atest +with in +j ay +ðŁ ¥ +ire land +re ly +s d +te xt +dri ving +pro gram +spe ed +col um +str on +à © +fore st +â ĸ +mach ine +co in +sc ar +oun t +bi e +¡ ï¸ı +por tra +comm on +wre st +recei ved +kno w +inve st +pl ans +ac cor +ad op +ter y +re ali +p p +k al +art work +me an +go d +inste ad +an ci +motiv ation +as ing +inspir ation +up coming +polit ical +euro pe +m ers +heav y +ðŁij į +fe bru +scot land +ou gh +b t +bo ss +sche du +spe ak +n ick +u red +in o +e k +ri sk +tor y +pres ents +b on +ru g +st ates +exhib ition +il o +m ill +br ought +: -) +tou ri +com e +offici ally +champi ons +do ors +re p +po se +ex tra +k ings +soc cer +squ ad +app lic +at a +some times +t ari +excell ent +ðŁĺ ĺ +stra ight +car ol +ri p +âĢ į +gra phic +m ol +elec tion +febru ary +as ons +l i +di r +m t +n ick +u su +m rs +com ics +inst itu +cor por +v i +ðŁĻ ı +tu ral +di se +ac ci +we are +am ong +sho pping +t ill +wh at +cha ir +sp an +chine se +innov ation +jo y +k it +cent ury +ob ama +ph ili +f c +re ach +c iti +ul ous +n on +d ang +happ ening +bur n +p el +or ange +d v +k ick +cla im +ing ham +ph y +no v +pod cast +wh i +ni ghts +ear lier +be ar +la h +exc iting +or a +gi ven +s lo +memor ies +contin ues +produc t +gh o +c d +kno ws +ðŁİ ī +publi shed +discu ss +y ard +i phone +tri es +w all +fe b +are n +tru th +win ners +tu re +diti onal +milit ary +proble m +m and +do g +lo ss +c ric +can adi +ve ter +villa ge +" , +y r +un g +don ald +ag ing +bir ds +sci enti +le s +th is +regi on +tic al +itt en +il a +ðŁĺ İ +d ad +di am +abo ve +st ren +li t +p ir +la b +fo cus +bus y +d ur +app ly +s ma +auth or +ac i +exe cu +dom in +re la +jack son +at o +wash ington +ðŁĻ Į +k ill +popu lar +ce ment +ro ad +e ating +loc ation +v ent +ar re +n an +cu sto +advent ure +or din +spor t +ul t +lo ck +questi on +dri ver +land sc +on i +k ins +p d +jor dan +te red +k k +a f +chil d +s p +just in +en i +s elling +z o +wh it +bo ston +partic ip +sig ning +happ ened +he at +m am +dre ams +lo ws +gra ph +the day +head ing +br o +ble ssed +vi c +ve gas +h d +in ning +ro man +and ro +den ti +u se +c it +pro gress +writ er +bo b +ff s +gro wing +b ly +aw are +ex am +sp ent +be t +sc ore +bey ond +do cu +ad el +s f +cou ra +colla bor +in c +priv ate +bo at +* * +z one +p ha +b ill +to tal +plan ning +to wards +plac es +pre view +cre ative +dam n +ide as +se ems +po ten +say ing +di splay +s w +a qu +lou is +by e +li l +e mail +we stern +ger many +ell er +re s +f ant +ment ary +de als +ric hard +jer sey +stren g +ra d +pizz a +mon d +w are +l ac +g i +ar chi +c d +yel low +rec ently +re ach +à ¹ +kitch en +desig ned +tr y +g al +restaur ant +at ure +w w +j as +l ma +ðŁij Į +pa in +av o +min ute +sch ol +ther ap +tic ket +d ry +jap an +diti ons +ter ri +sel ves +happ en +t up +ma g +cop y +sh er +free dom +f ile +speci ally +tor onto +lo ad +g ary +re y +answ er +lo y +cau ght +pri ze +u ne +fic ation +ni ger +sy d +tou ch +feat ure +jaz z +recor ds +him self +di sh +ro ber +spot ted +ma ster +wa ve +fin als +bu ll +for um +al d +re comm +ch a +a e +d oo +inst ru +tru ly +l g +in k +bro thers +de st +j im +m it +clo sed +is on +tri ed +s anta +af fe +w an +hor se +g row +camp us +rel ation +nati ve +jour n +go v +o ct +k it +b ound +part ner +re ma +crow d +! ) +c alls +ra il +qu ali +solu tion +con test +con vers +sn ap +b ase +in iti +ta x +y e +ent repre +it or +constru ction +foo d +present ed +n ings +cli mate +k m +mo del +b j +blo ck +present ation +dre am +fi x +c alling +bus ine +con gress +under stand +we b +val ue +ï¸ı âĥ£ +mex ico +it ely +ki m +char ity +ref lec +bl an +fl ying +anal y +famil ies +b and +reci pe +celebr ation +ac cep +ar y +to t +g b +intere sted +cap tain +âĻ ¥ +ti p +ab sol +bra z +inve stig +o logy +de c +tru ck +ver ing +c lear +don t +go tta +ad vis +beg ins +ma ss +de scri +blo ck +k im +davi d +son gs +memor ial +feat ures +su stain +' . +gra b +jo se +v a +con serv +se ts +man chester +fi ghting +de gre +ag a +in d +sle ep +pos ition +ha ir +sig ns +pol icy +it o +al ert +st am +sp end +w y +absol ut +d m +anim al +my ster +success ful +proble ms +ro bo +k ay +gar den +p d +may or +d ale +t ol +off ers +vis iting +friend ly +tre es +offic er +accoun t +ke vin +ðŁij į +gi ant +contin u +con su +tr act +n fl +ðŁĺ Ĭ +h q +b ility +a ar +dis ney +te en +on ed +wh ite +tra iler +de dic +al one +absolut ely +dig ital +willi am +in ation +s wa +e e +enti re +ger man +ro ll +h its +co st +st ay +th a +ali ve +accor ding +co t +liter ally +her it +re ti +haha ha +exper i +li kes +g t +ste el +__ __ +ch air +christi an +to wer +diffe rence +m d +tre ss +mi d +prin ce +afric an +fe der +foo t +car ri +ser ved +r ice +sh all +feat ured +ck er +rec ru +po e +sen se +ni fic +com edy +cont ent +f at +po sted +con tribu +tim ate +li ver +mb le +inter net +ag e +europe an +cl ing +gla d +ff ic +sc o +ak es +el le +ter min +ton y +p ale +col our +seri ous +pat ri +movi es +b m +professi onal +ad o +al u +br inging +f alls +isra el +ter m +langu age +bro ok +man n +commun ic +can not +ac ti +p he +y an +entrepre ne +tur key +log ical +lon g +ar m +ur s +work ers +ing ly +gg s +ri c +tu al +recei ve +op ens +ge ar +soci al +fe et +c king +ad ver +fin an +fe els +sp la +h r +ea ster +bra in +ã ģ +fi g +le dge +ne arly +prote ct +ma ssive +e th +aw a +ðŁĺ ģ +y rs +aware ness +defin itely +k n +imag ine +k u +syste ms +ðŁij ı +f as +li k +provi de +am o +disco ver +inf lu +ma ker +g az +fit ness +stre et +er s +te d +w c +ys is +pos itive +hel ped +que st +andre w +bra d +b in +hang ing +l ing +bri ght +se ction +ma ss +ðŁĻ Į +follow ers +ho sting +tem por +fla g +a ve +let ter +k ur +re qui +of ten +cry p +su ff +âļ ½ +russi an +treat ment +al le +ha y +l an +keep ing +hol y +power ful +pre dic +fun d +e specially +windo w +je wel +il y +ðŁĴ ľ +gener ation +app a +seri ously +o d +ðŁĺĤðŁĺĤ ðŁĺĤ +cer ti +iri sh +ðŁij Į +mi ami +be th +v ity +se cu +che f +cri me +graph y +ma x +arti sts +re volu +gu ard +spee ch +u c +upd ates +fac es +st ant +chang ed +repor ts +low er +pe ar +n c +k il +loo ked +spe aker +s f +re spect +ok ay +oce an +s itting +architec ture +tra il +se at +i ra +le g +japan ese +d am +u lar +sw im +polit ics +finan cial +ol d +mou th +at temp +de stin +fi shing +atten tion +me m +chang es +deci ded +reli gi +g in +c av +z z +ad am +ma c +wr ite +beg in +sc ul +al ter +is s +ath on +imag es +m oo +jo ined +ðŁĺ ī +âŀ ¡ï¸ı +pas sed +mu sli +h ir +lar gest +cam er +com ic +gh ted +rug by +bur gh +gg ing +te sting +pre par +lau gh +al ed +impro ve +beli ev +adv ice +sha res +he art +tur ning +s b +t el +caf e +n es +dani el +pat ter +t z +se tt +par k +c and +st ick +happ ens +bri an +ne west +e pic +ad or +ki es +war ning +anim als +custo m +ar c +di an +gol d +cor e +t f +c ity +pan ts +re ality +con fi +in ju +fo x +gu il +k new +âĺ º +cor rec +itu de +d den +. # +re duc +pas s +f on +y a +ow ner +re turns +n c +e ast +ap ol +in sur +th o +si m +juni or +be e +ang el +att le +elec tric +hor ror +cra sh +e ye +pat h +sou thern +emplo ye +ge o +t an +ha z +r ally +ðŁı » +proper ty +was n +enjo yed +gre y +g as +bre w +nor thern +hol ding +g p +ta ke +ch art +ly n +dr ama +z o +pa id +throw back +cu p +discu ssion +down town +w ill +le w +b is +t ary +bre ad +up on +r ate +teach ers +it ation +anc ed +cy cle +choo se +d c +ir an +co w +da ve +ra ise +prin cess +fa ith +- > +indu stri +sp ain +guit ar +fac ts +m n +sp en +cour te +go tt +projec ts +au di +o sc +pe ter +s and +intere st +happ iness +ven ue +sol di +surpri se +poten tial +per io +custom er +i i +g ni +manu fac +e co +bro ken +sing er +vel s +wal es +hu s +in j +f our +tal ent +d ying +mat the +fil m +jo ining +s ell +j ar +lma o +sur ger +bb c +sour ces +au stin +ni k +char les +f am +prin ci +ange l +cas h +lo t +o red +pla ys +pl ate +don e +memor y +br ings +n ba +solu tions +teach ing +gr ace +cir cu +hel ps +foun der +mar y +expl ore +de cor +par ts +ch o +inte gr +ha u +is es +pu tting +in er +r it +v y +mic hel +blu es +every day +for ms +bi o +ye ar +p in +t ter +spr ing +) ) +po t +al ing +perform ing +sh an +plan et +mus ical +head s +it alian +stru gg +âĢį âĻ +w ings +pu mp +h h +tr ou +a id +pri me +ear th +pa int +mon t +am y +bb c +fab ulous +fru it +andro id +bour ne +cere mony +enti al +? ? +deb ate +on ing +dra ft +sol ar +t x +j am +cor n +!! !!! +bro o +mil k +po sed +o hi +mo vement +b ren +part ner +p g +et te +ar ies +sh out +n g +leav ing +t ells +sen s +ta ste +kel ly +wor l +gy m +ric h +e gy +pi d +ma s +â Ĥ +courte sy +fran k +incre ase +wr itten +pp ers +re l +ha i +s as +s ound +tt i +w ich +ri ver +.. ." +a g +fel low +ro me +sm all +gen cy +ic an +lux ury +pro of +me t +wild life +mom ents +ra ther +cor ner +com pe +canadi an +lik ely +therap y +li am +econom ic +indi e +rou te +fi ght +ho pe +se tting +ant ly +cro ss +fant asy +de e +sket ch +comp li +ym i +ru les +engine ering +fig ure +ro w +. , +f w +syd ney +w ou +t ation +dre w +us es +the re +sp read +struc ture +pat rick +appa rently +ro s +h ills +w we +ann y +com mission +di v +f ying +con sul +anal ysis +ex i +ten nis +vehic le +ðŁĺŃ ðŁĺŃ +as s +high ly +op ened +b ann +ðŁĴ Ļ +mp h +wi shing +v or +fi f +give away +r r +ra y +je ss +g at +ic ymi +x it +high est +yor k +pi e +invol ved +high er +ri e +mal ay +int elli +desp ite +che e +sar ah +be an +reco gni +ar sen +tal ented +pas sion +ic h +ab c +lead s +dise ase +v is +se c +pre senting +m illi +hol e +sho ts +de part +surger y +gov t +b in +du al +e vi +lon ger +ev ol +scre en +portra it +et c +lo se +ch at +p en +p i +om a +s ick +er c +compan ies +en try +plan e +gr y +ven e +liver pool +premi ere +sha red +a red +fil ms +ir a +holi days +cric ket +ici an +v ing +. ) +ul timate +di vision +con duc +se pt +for ces +mon t +s mart +disa pp +sun shine +in d +b less +ma de +col ors +fran k +ir on +bott le +s go +m ood +j ason +er ic +bir th +te en +respon se +tar get +state ment +fe ar +th el +al um +ar ab +bl in +direc tion +ste ps +er ial +wor ked +at l +ðŁĴ ķ +fel t +pol i +scen es +hom es +b ell +e at +ate ful +t in +l ace +fol ks +p se +an n +wis dom +fa v +but ter +s r +are as +sm oo +bi z +dg es +app o +mo re +the m +effe ct +windo ws +sun ny +cap ital +tot ally +c ities +gr ant +mb ers +s low +au tu +il ities +w ro +ri sing +st ics +viol ence +i gh +qu ot +h it +t c +herit age +bu ff +ne s +z ar +den tial +ex ac +ed ge +de ep +aren a +be came +benef its +mar ks +mb er +a z +am es +pre ci +dra gon +re g +d ings +do s +ðŁĴ ª +n el +s ity +me al +di st +leg end +pur chase +pic al +st ick +f at +du ba +profe ss +car to +pro f +coun tries +respon si +se qu +fa b +tribu te +hon ored +prac tic +pur ple +an ton +pa red +t ough +summ er +environ ment +s ons +ðŁĻ ı +m ps +gi es +her oes +t elling +hen ry +f en +know ledge +Ģ ï¸ı +f r +ne g +u re +ac king +hear ts +s oo +hol lywood +ju mp +sau ce +schedu le +tur n +yo ga +cre ating +c ket +cre ek +â Ń +custom ers +ma dri +gu l +asse mb +moun t +c ell +to p +st al +dav is +t wi +sig n +premi er +iti ons +he aring +un k +pati ents +app ear +heav en +al ty +doc tor +a e +plat form +je ff +ðŁĵ · +regi onal +bi d +box ing +ex ten +or ity +a w +w ise +il le +sever al +bi e +s itu +sy ria +âľ ħ +remin der +enter tain +li on +part ners +in n +ph ar +f au +pl s +expe cted +sug ar +deci sion +s b +ch ron +associ ation +leav es +vis ited +sh ap +ðŁĴ ĸ +fur ther +h ann +w i +run s +l er +fun ding +fil led +.. .... +tin y +han g +or g +co ol +se min +ðŁı Ĩ +spon s +nav y +sa int +dru g +d al +r oun +co vered +tra ditional +invest ment +de te +al ism +f low +n is +sun rise +fe at +f ted +we ird +je re +ve gan +medic ine +an o +ac cu +deli very +temp le +chang ing +wil son +phili pp +re fe +n d +is er +g ay +r and +ati ves +t ely +p and +intelli g +g are +am bas +de mon +commit tee +strate gy +refu ge +bud get +prote c +pi er +ex press +nom in +econom y +al low +ic on +gal ax +o h +indi vi +dem and +vir gin +lu ke +ali sts +man i +s mi +ju dge +ent y +mic hi +resul t +am ed +spe aks +' , +hou ston +sh in +b ing +fl y +ch em +au to +v as +ge t +ar m +thank s +d in +gan g +x x +si on +loc ated +p l +jo sh +in fo +jo ins +adver ti +ot d +el d +si e +re asons +v ent +ðŁĩºðŁĩ ¸ +â ł +convers ation +stu di +ðŁĶ¥ ðŁĶ¥ +go s +s ounds +un it +mu sc +ge l +ack ed +pac i +co s +de re +u u +a o +la m +inspir ing +ar ms +tw are +mat ters +ad dic +du de +ex t +cri sis +b ath +me et +sing h +expe ct +del hi +resc ue +wor st +au g +shi pping +ser ving +st o +dar k +ac es +histor ic +landsc ape +desig ner +b illion +gr ateful +wa ke +e ve +m iller +hou sing +dy nam +is co +be ha +sh op +pr ou +e as +a sia +e ding +k on +depart ment +aw ar +mar ine +in ci +photograph er +ta pe +lo go +r ings +d it +-- -- +vin yl +w c +vo ting +se ven +ambas sad +dal las +t u +com ment +k ra +b les +w ag +u d +au dio +stri ke +offici al +o ts +me tho +to ols +ra di +al an +hun t +wat ched +a ke +fa ke +drin king +mer ry +m l +b day +ri o +ni ke +c ant +re pe +co stu +mur der +ak ers +ch ers +ou ts +beg inning +so s +ad es +n in +not es +wro te +sol o +c i +li ghting +ur ban +bre xit +att end +shir ts +pla yo +ac tress +pl ic +stand ard +quot es +par ade +anci ent + © +tur ing +re e +pri mary +fla sh +citi z +mat es +ste in +z i +clin ton +sk in +gen e +hu m +g ar +t le +y i +fo cu +de an +pl ants +cy ber +b u +om e +ho p +ad dress +ti x +gi fts +relation ship +sub scri +fe ed +exac tly +haw ks +ex o +stre ss +s n +arre sted +an e +sof tware +z ero +the me +mu mb +im migr +mi a +make up +ple asure +uni vers +har b +eng ine +ap er +r in +br a +institu te +le ather +al th +sing ing +co s +gh ty +me as +st ic +si de +insur ance +co t +pit ch +moun tains +cri min +su pre +valent ine +at er +wou ldn +sc ale +rel ated +re gar +star tup +pack ed +mi ke +week ly +p ts +coun t +ha r +gott en +min d +ber lin +con ditions +swit ch +cor n +sa ve +g li +emer gency +tun ed +sto ck +discu ssing +every body +s day +whe ther +wrest ling +ec es +gen der +ch en +ðŁij Ģ +madri d +mar athon +e gg +i er +th x +as king +kore a +wol f +ay a +g m +g au +at ory +v r +gra ss +k illing +b ble +ur o +un i +e th +sh ore +th en +re ale +bot tom +ex erc +k ar +or ies +ad ri +san ds +se x +. ' +volunte ers +per form +par liam +inclu de +deli ghted +execu tive +fu el +kis s +ã ħ +char ge +h u +ca kes +ve t +g lu +agre e +pr ices +n au +h l +g ru +ra j +streng th +b ic +sp ending +al es +av en +b last +: ( +yo f +nor mal +si x +qu ick +se a +d aw +mee ts +lo vers +upd ated +po tat +comple ted +coo k +opportun ities +p ure +organ ic +tem per +c am +avo id +par king +duba i +and o +di stri +to y +comple tely +don ald +tri al +bas s +b oun +back ground +v as +mar vel +lu m +ru s +t ool +com missi +throw back +fin ding +is lam +! ? +st op +e vil +or al +resi dents +i denti +o ak +ðŁİ ¶ +l il +span ish +chap ter +sto pped +direc t +ho sted +pic ked +lab our +lew is +defen se +à ® +health care +wh is +mat h +pe ak +ra ised +fi x +bu ll +th ir +chel sea +fol k +tr e +can di +pau l +ei ther +ad am +poe try +jewel ry +ðŁ ¦ +pr ay +Ø § +g c +o z +wi shes +fore ign +sun g +lear ned +en e +n ing +micha el +illu stration +legend ary +w av +b au +ðŁļ ¨ +cal end +stre ets +â Ĩ +mon ster +bu ck +g r +scho ol +ba th +wa ste +ne ck +ha wa +be ach +re plac +jec t +on er +fac tory +coun t +ðŁĵ ¸ +mor gan +der ing +se an +steph en +de p +no vel +vide os +ic al +press ure +arsen al +ex pre +ir s +tren ding +ss a +fla sh +re sear +thr ough +profess or +scul p +to s +gg ed +mm a +be e +a pe +hun ter +am i +he i +pla stic +bu cks +uni verse +le gen +niger ia +ple ased +ri s +thin ks +autu mn +i ds +d is +anth ony +ðŁı ½ +ak ed +gla sses +fin ance +z er +k as +con tract +nu mbers +sh aw +partner ship +t il +laun ched +s al +victor ia +theat er +usu al +nam es +perio d +eli za +i th +bar cel +ro cks +bag s +mat e +distri bu +j on +di ffic +ali zed +cur ren +sco red +b ha +du blin +ro se +in ted +soli d +beha vi +wal ker +simp ly +garden s +head ed +in i +ohi o +we ap +f o +gl en +e state +ran dom +th under +thr u +k ill +jac ket +it i +entertain ment +thanks giving +ent al +en coura +el o +a ther +tan k +high lights +f ting +ru le +model s +bor der +bj p +hus band +in done +ken ya +be ars +al o +n inten +pi x +str o +or ders +sal ad +ro ads +n or +l ation +sop hi +ðŁı ¼ +pi eces +b one +min s +inclu des +nu tr +phi l +s ent +fun dra +ga in +bor ough +n ad +mon day +activ ity +it ems +be coming +ken ne +de tro +car di +gue sts +u x +world wide +sever e +new s +thank ful +fic tion +ve ge +m all +si an +er al +inj ury +le e +men u +danc ing +scot ti +exam ple +( # +na i +studi os +ba i +ðŁĴ Ľ +j av +diam ond +vin ce +ric k +prote ction +lin col +cham ps +appro ach +d ar +m ile +clou ds +je ff +in fin +l ers +p les +pe ace +go p +âĻ ¡ +tech n +str a +a verage +ef fort +introduc ing +di versity +austr alian +am p +boo st +s ke +pati ent +appreci ate +ici ans +pu r +f ell +woo ds +illu str +ðŁ ĸ +ag ency +ac tions +brit ain +under way +se attle +el and +ag o +f ill +stre aming +pro test +challeng es +ky o +et sy +coo king +exper t +ru ss +rain bow +commer cial +sp in +be ats +c ry +val u +el i +th row +gr ams +le vels +michi gan +c ad +ador able +const itu +w s +pu b +mid night +th at +net fli +braz il +die go +regu lar +jo y +âĤ ¬ +li qu +ea stern +k ni +fl at +n p +bro wn +w er +se y +tt ers +ac ting +v anc +cy cling +program me +ra w +comple x +tat too +throwback thursday +se ssions +ro oms +si ght +speci es +bom b +lau gh +ke eps +mo on +offic ers +con ver +t r +ha sh +t ack +ri ous +ad ap +a j +reco gn +ex po +sug ge +confir med +rol ling +dre ssing +ic t +fri day +ph ones +ri dge +con cept +ro y +ke ys +ef for +c ate +k ne +ev en +l ay +commun ities +mo d +n az +every where +al ab +bit coin +ban ks +out door +feder al +sto res +h p +c al +m ely +sig nific +be ar +re public +clo ser +al lah +pic k +x d +pal ace +ch ill +b am +er ous +un a +al len +out standing +olym pic +supp ly +fi gu +v au +l p +char lie +un es +> >> +legen ds +ici al +co ast +benef it +mul ti +f its +far mers +am ount +si sters +har ve +hon ey +que en +b ers +pl ann +âŃ IJ +m u +barcel ona +al ber +stat us +re main +ex tra +c andy +vi ous +âľ Į +o v +warri ors +-- > +ju mp +am ar +x mas +stu dies +i ors +k or +don ate +pre p +fi sh +im a +pain ted +ad mini +co splay +spor ts +dro ps +fi ghter +evi dence +ðŁĴ ª +la ke +ro b +cine ma +pro file +à ± +stan ds +leg acy +sh ape +ro of +ci vil +i ans +sy l +sh am +vo ted +re tail +ph illi +li sted +du ty +n b +th es +f are +au ction +ffici al +stor ms +d p +l oun +sh ops +al y +ani me +multi ple +ðŁĺį ðŁĺį +psy cho +je an +ap art +candi date +gg y +con f +jose ph +w ick +me at +fr ame +c l +for got +ph y +f ing +li ed +re p +se ed +f all +u fc +nu t +lin d +mo de +fiel ds +en ce +s ley +ðŁ¤ Ķ +ch ill +follow ed +announ ces +cor ru +tro phy +them selves +ac le +al du +k ong +l on +s v +bro ke +ander son +ta i +stor y +tempor ary +activ ities +k ati +ari z +cry stal +spo ke +extre mely +tra ding +ðŁĴ ļ +à ¼ +in ch +ed in +out fit +equ ip +ma di +form ed +be ef +po p +ti ger +this day +ti red +neigh b +re tro +is a +un t +t as +kan sas +de st +secon ds +ta y +hur ric +o u +galax y +dad dy +bro w +bur ger +en ced +de sk +ac cur +secre tary +el ite +k ab +ch in +touri sm +bud dy +ici de +dre ssed +u d +vac ation +che ers +com for +charac ters +j et +bu ying +l ins +n ap +reale state +li e +af c +i ii +f ame +n r +b at +ag ent +ma kers +âĢ ¼ +sec tor +op ti +le on +di et +pra yer +hi p +mi r +le x +br y +an a +pas sing +w en +reco very +ak i +po pul +res ort +mar ia +stu ck +read s +ti er +perfe c +netfli x +p oo +cham p +o c +re duce +we red +comm ents +cla im +acci dent +s ag +h ack +sal t +kin da +k iller +i os +z y +ex change +lec ture +eng er +ic king +t au +reve als +pri son +z om +gh an +u l +jour nal +i ot +tr in +jon a +govern or +cap e +quar ter +spec tive +impre ssive +bab ies +t x +m ill +o y +har ri +jo int +su e +collabor ation +tren d +revolu tion +re new +alum ni +ge tt +sh ell +sun day +ent u +ni c +donald trump +block chain +paci fic +expla ins +sp y +ad voc +par adi +to f +star ring +p av +fe ed +br ac +smo ke +ham p +y am +to kyo +si mon +d h +e ffici +phys ical +n j +ell i +s low +gradu ate +americ ans +ti fy +f red +ap ore +fin ds +rob in +we t +not ice +se mi +un ve +k om +pil ot +scre ening +da ily +ðŁĴ Ĺ +roy al +sp a +vo tes +n ag +wh ate +att ending +exper im +ad dition +k ate +sto l +m ali +foo t +chri st +ch an +de e +lic en +glo bal +mo ore +ti a +bri gh +myster y +y ay +âĿ¤ï¸ı âĿ¤ï¸ı +cre ati +me chan +clo ck +di c +âĢ Ķ +pp er +al ph +through out +al low +re sources +selec tion +ham il +bb q +aa aa +virgin ia +dis ney +en g +so red +drin ks +f ancy +consi der +end a +jan e +hand made +du l +on tari +i us +s ville +color ado +whate ver +whe el +promis e +ne ver +desig ns +ab ly +sex ual +vanc ou +at i +con vention +cul tural +sing apore +pro mo +load ed +gla sgo +pp l +n oo +ke e +ste m +men tion +i do +cru ise +ri ding +be comes +be y +âļ½ ï¸ı +tw in +dedic ated +na sh +de si +work out +jen ni +i v +grou ps +rela x +pho eni +li ft +mix ed +m ck +p c +mu st +me tro +ci es +y ar +a im +ang er +i e +rec y +marri ed +dro pped +eng ag +le st +ambassad or +op h +de s +w ick +assi stant +nat ur +fa il +l td +shor t +k ap +sha w +bi gger +rema ins +crit ical +sur vey +co verage +er son +win d +n b +bil ly +let es +ac ts +jim my +at lan +al and +t c +import ance +dam age +f g +stor age +tw t +bon d +bal ance +cr ying +pu ppy +vo te +pu sh +ðŁĴ ľ +pol y +me l +lon don +terr ori +effec tive +corpor ate +atl anta +jac o +nas a +gre ek +sen ate +i sh +ev a +intellig ence +effor ts +al co +k un +h all +di ag +claim s +fir st +h b +ba e +v ul +pu ll + ° +se par +spe ed +vic ti +on thisday +audi ence +r ates +te ach +fil ming +bu sh +son g +y um +br un +ra ine +aw a +par ks +ð Ŀ +ra bb +ra ch +ra id +reach ed +ra il +mo ves +selec ted +fr i +ra ising +om y +st ones +su k +franc isco +cas es +cap it +con fu +w tf +po ke +equip ment +gre g +ess ential +off ering +ne x +pi es +be c +cre ation +chair man +cro wn +w al +john ny +shi ft +ne ck +ban g +bir d +ðŁĺ ı +du ck +re serve +de pu +ma sters +over all +no tic +ju ice +sne ak +che er +cla sses +eag les +n ca +car pet +ci vil +coach es +har ris +u ps +b alls +dec or +mar tin +ro s +v ice +announ cement +who se +ti gers +ste red +c ts +dr am +ste el +youn g +inst all +supp o +recor ding +de ck +se ats +l der +ang le +bo t +sty les +elec tions +for tun +n ab +but ter +ari an +ka sh +in ner +ou red +be ast +we i +ic onic +exper ts +ne cess +b eng +jam es +li a +gre ece +ðŁĵ · +ðŁĺ ģ +good bye +m itch +tw ice +mumb ai +ste am +ru sh +med al +ne tt +fashi on +t ar +r s +sav ing +ric ul +l m +sleep ing +brook lyn +mis s +sen ding +disco vered +sp here +of theday +k icks +missi ons +w right +er n +ght ly +i ous +mel bourne +star tu +mo ved +car ry +d ak +ag ues +bel gi +e ma +way ne +do t +er ie +pe l +it unes +matthe w +no body +est ab +cal m +win ds +lu c +prep are +tren ds +exerc ise +adv ant +ðŁĴ ¯ +athle tics +app s +c tions +adv ance +laun ches +litt le +real donaldtrump +eliza beth +carol ina +hu b +hi dden +n w +us er +pol l +great er +mo st +f ed +p at +life style +s ati +sco res +marri age +l r +aven ue +de serve +ri f +ðŁ Ĺ +wat ch +champion ships +gr ay +en ni +cot ton +g om +whe re +pack age +su m +ab solu +new ly +foo ds +ty ler +assemb ly +musli m +ban k +re memb +op tions +produc er +land o +fun ds +u pper +shad ow +pro gre +co p +ing e +leg s +detro it +hill ary +jo se +gi ants +sou p +sustain able +t us +clo thes +roc king +n z +min ne +mat eri +bru ce +ear t +ca sting +independ ent +thou sands +ta h +de cl +veter ans +li ons +wra p +âĢ ¦ +de ss +bl ing +st ine +e ggs +o on +clo sing +z ay +at t +bac on +fa il +ariz ona +de pre +gho st +new sp +w ers +vi p +li ked +id ent +volunte er +ad ult +pu pp +cir cle +mat erial +degre e +gro wn +boo m +calend ar +su r +vie wing +ath letes +ch and +re ll +asi an +en tr +vol ley +victi ms +bo dy +m ama +trans fer +ge ek +in dic +sav ed +ma i +g ent +it s +loun ge +k ol +the ory +situ ation +is lands +ar th +z oo +floo d +vi ously +show ed +parliam ent +ch ev +el ine +at trac +ab ad +ta il +h rs +lu s +por tu +gor y +provi des +to ys +de ath +in fe +an ce +g le +li am +lo ver +hu d +dv d +reve aled +g w +re ment +ca the +l ying +ra dio +der by +stor s +che mi +hosp it +âľ ¨ +' : +ilo ve +le mon +re public +s ni +ne ss +do or +re action +pre gn +fla v +schol ar +spo tify +is ation +vis ual +aw are +spon sored +jo ke +less ons +leg is +lo ck +si mil +ðŁĺ ĭ +kin d +la y +ma h +ho ping +vancou ver +as er +clean ing +gal a +thre at +la p +ach e +ro mance +ex pen +re post +z am +e pi +mir ror +o ak +ad ul +bat man +s lu +l c +vie wed +re views +d ates +indone sia +acti vi +off en +lea f +i si +ag ricul +costu me +s ites +spir itu +appear ance +ir y +st air +applic ation +spec tac +ic ity +ski es +hand le +pun k +paradi se +t n +de al +provi ding +do c +recei ving +bre w +micro soft +à ¶ +fer r +me tro +th ail +y um +car ter +à ¡ +gent le +bre aks +coo per +show case +cu tting +egy pt +bab y +semin ar +gl ori +ss on +fa ve +re hear +lo tte +la dy +al as +pre p +deli vered +nu clear +ir o +engag ement +at ta +con ven +z an +gl ory +hol ds +busine sses +str ange +sch e +it self +gra d +mar kets +f alling +st ats +ge on +bu dd +li s +she et +thi si +co lo +deser t +regi stration +ig n +expla in +inter ior +la ws +writ ers +spr ings +k r +fri ed +blo om +inf ra +a o +cre d +pa st +line up +bo o +bre a +boo ts +celebr ity +att acks +bro ok +ev es +ex cu +cher ry +oo p +fas cin +boy friend +se as +n ine +effec ts +po wered +k ha +ðŁĺ Ģ +sh out +con dition +i j +her o +enter pri +win ter +applic ations +sho e +g el +batt le +pro grams +w art +ðŁĴ ¥ +ra p +ho l +dang erous +di a +coun ter +ric s +i or +k night +co at +emo tional +at ures +d as +whe el +fore cast +tran sport +glasgo w +king dom +prepar ing +im medi +ff in +awar ded +prin ting +ro man +fight ers +any more +bel t +p ine +win e +x i +employe es +logi es +al led +de mo +birth day +ange les +lo g +dri vers +neck lace +k ath +s it +athle te +ef s +s burg +pur pose +resi stance +rele ases +t is +vari ous +deli ver +ch al +s anc +opp o +cra w +neu ro +dr a +suppor ters +sna p +diffic ult +swe ar +logi st +pa th +attemp t +à ¥ +swim ming +ste ve +hur t +inclu ded +b ap +wa re +ðŁĴ ĭ +end ers +ja ke +le eds +cli mb +l b +im ple +li sa +clo thing +ðŁĺ İ +d t +com pla +sw ing +stra w +v als +k le +us ers +stor m +cu ts +ontari o +p an +hand some +i ow +ar gu +chec king +scotti sh +Ķ ï¸ı +si er +em ma +po d +patter n +de sh +en h +ed ward +t ing +k h +hal f +lincol n +mo ther +al leg +r c +volley ball +d n +g ay +all y +le ton +gro ve +l oud +adv anced +re spec +cli ent +supre me +thail and +ho w +gi g +to i +do t +dol lar +ðŁij ĩ +p it +r b +h n +produc ed +gg ers +âĨ Ĵ +ml b +can vas +fin eart +us d +in the +p son +actu al +s l +t b +ip ad +en sure +u mb +w d +sk a +mar s +k end +f eli +th ing +count down +absolu te +r out +dra l +p y +inju red +min t +hun ting +mm er +s age +li gh +ac ity +ex pan +mur ray +ar o +sec ure +four th +eag le +reli ef +st akes +industri al +clar k +under standing +see m +pl enty +sil ver +cla u +thre at +sa il +pro duce +ab str +is is +b r +eng ers +wor ry +bie ber +s j +just in +reali ze +ky le +esp n +fil ter +s ch +ty pes +game dev +d ing +twit ter +soldi ers +p om +car bon +y ards +child hood +ri ed +ke l +ele ph +t ons +key note +qui et +wi re +po sting +is sa +repre senting +bac ks +alex ander +celebr ates +ta ining +| | +ch or +esc ape +pe ek +ti ves +fiel d +ssi e +im pac +spons or +r c +we dd +cann ab +si des +trac ks +com par +con trac +techn ical +bi ble +expl oring +sh are +tra v +n ate +ill o +sc ru +m ingham +gun s +of the +sh ame +se es +ca tho +ac cess +ce l +repor ted + » +mari o +p ad +hope fully +ou se +y on +disapp o +ol o +p itt +pa c +ga p +cru sh +s g +k le +ge m +emp ire +dir ty +a is +avi ation +ze aland +fac ing +high way +d anny +spi der +ot ta +ðŁĺ Ħ +w y +col ours +in fl +co sts +olym pics +au s +h m +ho ward +pas ses +lau ren +mu sh +op in +r ho +disc ount +oper ation +em ily +mm m +cham ber +d il +to yo +shi p +sam u +pic tured +un ic +po l +keep er +carto on +st en +ig nor +n ations +n l +ta sting +deta il +offici als +mo tor +franc is +ed itor +ðŁij ĩ +pe ts +rang ers +t g +r n +w ri +nic hol +i se +spo ts +ani e +chec k +tri ple +ku mar +spe akers +ic ing +pre pared +ab use +friend ship +mon th +swi m +air e +sc ent +hamil ton +indi an +j es +yum my +te ars +da wn +i zed +worl ds +ðŁ ķ +b illi +st one +n hs +ba sic +p or +st le +ir on +ol der +cle vel +e ing +ðŁĺįðŁĺį ðŁĺį +prin ts +fir m +air craft +fin est +devel op +aar on +t z +gra ham +own ers +fo li +less on +qu es +bab e +cra ft +ph en +ju n +bir mingham +v ine +ll er +i an +fineart america +evol u +st ab +im per +war d +com ic +wi z +inv ited +du ke +mat ch +por ts +ro ger +diag no +ke pt +te st +vis u +r hy +so c +to x +b aker +sur face +co vers +man s +b its +x box +ff le +n an +gar d +h art +wat ers +v illa +re tro +light ning +catho lic +democr acy +neigh bor +pen n +cr an +jona than +la ura +vi bes +su b +coach ing +clear ly +uk raine +bra ve +commit ment +t all +mar t +ra p +mo di +sco tt +bro s +show er +ðŁı ¾ +âĺº ï¸ı +cou sin +appro ach +br e +com pos +hil ari +phil ly +g ad +quick ly +ri an +t m +vir tual +hou ses +k t +phoeni x +w ire +ff y +b unch +anc ing +tal e +snap chat +star ter +h t +k icking +ap art +th y +) ! +blo gger +it z +com fort +ang els +w ash +" : +ar gent +re quest +hon est +mi ghty +bo bby +k g +ro l +thou se +ex po +h c +tab les +mag ical +po sts +de m +n w +or lando +ab er +* ** +ðŁĺ ľ +environ mental +trans formation +mi le +w ic +hir ing +ma ine +bo ar +r ying +ti s +nit ure +twee ted +anton io +opin ion +fin ale +di y +f is +th in +trou ble +le go +fi les +qu art +sp a +curren cy +cli mate +fan art +rail way +sp ace +ban ds +dani el +mo tion +l eng +hol der +oc cu +mar ie +cathe dral +bu zz +bi es +nas car +bm w +bat tery +char lotte +doc tor +zz le +se ven +in san +d dy +st en +lab or +thr illed +se ren +docu mentary +wav es +cer tain +can did +allow ed +ninten do +star wars +ta p +home made +d les +ther ing +bre e +emp ty +pi ano +pos iti +coun try +por k +pu ts +per ry +m atic +spot light +ti st +or ities +we alth +c p +bar bar +commit ted +as sau +pro fit +e ight +hu l +fini shing +run ner +ss o +insp ec +char ged +christ op +lo sing +co al +ho o +ele v +de le +mo ham +don ation +c able +clin ic +j in +manag ed +ter ing +â ¬ +ur ban +depu ty +bb er +bur n +acade mic +o tt +sta ke +it er +sto wn +ack er +advent ures +ad ams +gre g +pro m +vo l +ac qu +con gre +pa int +citiz ens +c all +af ford +v c +as ks +the tic +independ ence +â Ľ +h itting +bl on +fu ture +â ı +in no +gen e +bo ards +di stance +se t +re mem +th al +pre vent +l ang +ob jec +su sp +mat t +in duc +bor o +pi one +re di +vir tu +prin ted +sco pe +shar k +suc ce +a stron +il legal +j ag +c ting +ine e +at o +rob in +nutr ition +b f +du tch +b n +fur niture +for gotten +at ar +ru p +hy per +bran ch +communic ation +degre es +on ia +un cle +promo te +or che +wi i +j s +but ton +ma jor +c bs +bri stol +premi um +ordin ary +e dit +m g +we ed +st even +: ' +gu s +te s +cap tured +dru gs +do w +wr ites +bi shop +whe els +ali zation +disco very +w r +rach el +ne il +hy dr +cu test +entreprene ur +kore an +ore gon +ul ty +perfec tly +suppor ted +histor ical +t wins +ell y +we l +de vil +in come +scienti sts +de leg +h en +on i +ic ed +gi o +cur ry +reve al +e g +buff alo +n ol +op era +camer on +haha haha +j ab +gradu ation +cra ig +r al +i f +organi zation +le ge +g ang +su d +edin burgh +l ack +fli es +g ate +thr ones +q b +the real +e leg +pp in +c les +jam ie +tn am +cryp to +ou l +p ages +a se +roo ts +stu pid +a did +boo t +prote in +s ap +si um +su s +end or +fun ction +don t +en na +ch y +squ e +wor ker +m tv +e a +k an +ðŁĴ ļ +mu s +professi on +t to +oper ations +al lo +c tor +inv ite +sc and +ou th +z im +lin ks +cli ents +sam sung +discu sses +n ell +ul tra +some where +ste wart +ine t +de z +b out +fac tor +ti an +tr ans +jere my +d b +ðŁĩ ¬ +or n +develop ing +spo l +coo per +ma u +rememb ering +tre k +famil y +sen iors +fo ster +att ended +w ing +trans form +ele mentary +hor iz +li sting +malay sia +it ch +warri or +philipp ines +russ ell +m end +initi ative +cre ep +to ps +br iti +a ur +shar p +adverti sing +ug ly +achi ev +materi als +bu g +dev ice +bon us +fac ility +col e +nh l +y as +plann ed +pol e +excell ence +tr ick +con fl +r p +achi eve +lo an +swa g +jess ica +ho we +p our +sc u +z oo +r ated +dre sses +re bel +mex ican +co ordin +me ss +atlan tic +t l +osc ar +wal ks +phar mac +investig ation +... # +cc i +eas ily +monday motivation +y ment +au ti +for ced +ar med +colle agues +pap ers +pro per +sha ke +bu c +le an +exhi bit +e vement +co tt +bi z +sp er +k ent +sw an +/ @ +girl friend +haw k +âĺ Ģï¸ı +mon o +ðŁĴ Ľ +stat ue +ðŁĺ ³ +ra s +te eth +preci ous +t ile +p am +swi ft +v ali +no se +dr unk +experi ences +come back +gen ius +wor se +sh ef +ra d +ed it +hon our +au spol +lar ry +h ire +gor don +achi evement +.... .... +su icide +alter native +su p +sur roun +sha ke +ke ith +pe pper +tur k +crimin al +be ck +su m +w alls +cn n +an tic +of fe +col li +win es +high light +hawa ii +emb ar +l fc +ðŁĩ ® +m v +> > +at mo +wor d +car l +shout out +bre wing +ì Ŀ +do f +s ic +hot test +col on +hh h +shu t +low ing +volu me +apart ment +agre ement +de stro +we e +religi ous +iow a +ro d +land ing +re present +ðŁĵ· : +la s +usu ally +h l +c ac +sal v +al ong +laugh ing +be ans +remin ds +pha se +some body +ma sk +ran ked +dest roy +sc i +âĢ¼ ï¸ı +gab ri +le o +ro a +fa iled +si l +refuge es +re vi +r ing +ber ries +coo kies +y y +conserv ation +sh ab +human s +de termin +a in +ni all +as su +mb a +fro m +extre me +vic es +commer ce +ght ful +or dered +suppor ts +re cap +v or +dro pping +correc t +pay ing +mean ing +n j +qui z +" # +busine ss +ðŁĩ® ðŁĩ +indi gen +du st +box es +bl ind +x xx +zz y +ðŁĩ¬ ðŁĩ +ss els +s ant +dd le +hilari ous +desig n +wonder ing +vehic les +k re +ju d +rece ption +par ker +Ã Ń +pri vi +hy dro +sof tball +pol lu +lo cked +ba h +e ar +scri pt +di vi +br ace +geor ge +the ast +bel o +j al +tion ary +dent al +roc ket +pur ch +sh ak +manufac turing +e z +it is +con cep +tb all +ch s +direc ted +pra yers +oo k +phil os +vari ety +che ss +ser ver +g and +bal ti +ðŁĵ ¸ +sel y +cru z +spectac ular +bur ning +re present +i z +t one +mer ce +h ell +bed room +estab li +bo l +com mon +ãĥ » +ab or +kit ty +hei ghts +re pair +willi am +qu ake +alab ama +popul ation +re v +re tt +i sts +n ite +le m +a ha +clevel and +r m +po ver +ob se +mon tre +man ia + ® +con ne +car ni +sh ah +f y +u a +sc or +strugg le +bo b +' ' +appro pri +deci de +ff ed +ca ster +s ort +hun gry +dra g +ا Ù +gr ounds +d w +sli ghtly +car din +dead line +bron ze +web in +bar ry +sil ence +e uro +op tion +ear n +ðŁĴ ĸ +howe ver +na ren +na ils +bath room +v ine +ph d +min ing +gar age +( ) +shou lder +defe at +di r +o v +liber ty +ple as +x on +com pre +a v +j in +ab les +sil ent +fam ili +vis its +di pl +ha bit +milli ons +regar ding +innov ative +sen ator +r ts +v on +k l +wh il +requi red +âĿ Ħ +lu v +presi dential +po cket +hun dre +sho wn +fro zen +to ward +fa st +confi dence +r ough +indivi dual +qu et +ðŁı ½ +dom e +fi fa +engine er +z en +re mix +ðŁĺ ĥ +pl ant +min or +robin son +as y +pul led +cer tain +potat o +( : +pre s +oc ca +w it +it em +si e +d ating +thom pson +own ed +an u +vi e +te dly +good night +ex cept +ðŁĮ Ł +ira q +ki e +ren ces +li p +simil ar +sau di +vi g +arth ur +pic ks +mil an +hon da +ma xi +o g +ste st +ar ch +analy tics +ba sti +pear l +ter ry +hor se +ast ro +ac ce +laun ching +inter national +s no +ta sty +den ver +ir l +pe te +tor n +advant age +var sity +" " +sol e +g c +lan g +demon str +ol ds +un ity +ne ts +insp ire +cre te +nash ville +nel son +e ter +wal k +hy un +m ack +tre as +see king +ra ge +bru sh +ab and +whil st +co con +h ong +shel ter +i p +possi bly +so o +it ed +â Ħ +rac es +war ming +qu in +tele vision +mat ches +ra pi +ment al +pal m +jenni fer +rol ls +indi ana +b ars +cat ching +resc u +candid ates +fa re +âł Ģ +se o +vie tnam +alph a +michel le +visi ble +re gre +wn ed +app le +li p +f fe +li z +york shire +ha il +se asons +be gan +m d +k c +la p +fascin ating +hel p +ur y +u ms +nu ts +se m +along side +bri dge +ori al +o ve +world cup +briti sh +comfor table +i ve +hot els +fair s +hor ri +so x +d ining +stre am +bar ri +ss y +w im +ter ms +v u +pe re +l ens +wal ked +r or +l ars +shi eld +dou bt +pro to +cro ssing +me ant +medi um +ad ding +e b +che ap +fun c +pap er +bran ds +ry an +feed back +col lins +un known +tro pical +sand wich +fal len +for mu +selec t +lo ads +answ ers +or i +mag a +d or +du o +ali e +dru m +ur i +de er +sou l +sh ut +âĺ º +sto len +don ated +bu zz +patri ots +ha l +na sty +nomin ated +mon te +ki a +th ri +ing u +te sts +pe tro +ðŁij ij +ho sts +ne st +to pic +pat ch +m my +hu gh +ab ilities +ma the +s miles +g b +ag enda +insi ghts +chi p +ph an +fail ure +dg ers +ha i +signific ant +sho ck +ru ral +gl am +figu res +pot us +o ta +mini stry +appe ars +fe ar +r h +americ an +h att +son y +fi res +e di +n ou +e qui +wh en +univers al +mad ness +i x +sculp ture +b ach +t to +swe den +et a +en to +develop ed +month ly +ma ps +ra h +le d +del ta +sa ints +is lam +ben ch +fif th +v ard +so cks +wel coming +j e +tur ner +v b +ad i +nor way +ad y +hurric ane +por sche +tra dition +ex am +newsp aper +lu ci +a ver +ide al +d na +madi son +ðŁ § +wit ness +ac ou +insi ght +si mon +robo t +sna ke +n bc +ac o +ro ss +sh ment +religi on +ch ann +in su +camp bell +inst alled +we ather +hor ses +ol i +rober t +k az +ðŁı Ģ +veter an +th read +quar ter +ea sier +cap ture +hi pho +law rence +roman tic +pas sion +cl ay +ox ford +th ai +stu dying +fi a +elec ted +most ly +c b +tu mb +âĢįâĻ Ĥ +x l +sh an +fa ster +ev ans +sli de +sh ri +see k +mi es +chemi stry +pump kin +tu m +, , +ro om +fi red +li ps +pres ence +af f +brew ery +arri ve +sw ag +photo graph +pen gu +chi ps +at tor +val ues +accur ate +con temporary +princi pal +cannab is +ari o +any where +gi a +democr ats +buil dings +li ved +ap s +neg ative +m are +bal lo +li on +diam on +loo k +re form +tom my +il la +tre ats +hundre ds +port land +wor thy +ex cep +ar ia +ido l +be er +cd n +y u +aw k +ðŁĩ ¨ +c ells +à ³ +ident ity +dra wn +de vil +f inger +th am +ðŁij Ĭ +ear ned +fin tech +dol ph +twee ting +evolu tion +ðŁĵ į +est im +m vp +n one +ðŁĩºðŁĩ ¸ +toyo ta +au x +mar in +b old +l bs +ste ak +mur phy +it able +lou is +sol ve +pi a +sk ir +ill ino +webin ar +ban ana +lo v +th on +vo ters +afford able +defe ated +lm fa +air lines +super b +any way +deb t +bo red +ver si +me tal +responsi ble +m k +s se +f ay +cau sed +f p +recomm end +pla za +spor ting +alli ance +au stri +n n +t ours +surpri sed +arti f +th under +sur ve +wor e +bri ef +necess ary +z ie +ash ley +dra ke +r t +kni fe +im mun +char ges +a the +bri de +rep ly +g av +broad cast +pu er +brace let +cap acity +harve st +id k +perfor man +d ding +il ers +par a +jam a +pro vince +ch in +id ers +har i +te aser +ch en +re stor +r at +fl at +col om +ðŁĴ ŀ +ðŁĩ¨ ðŁĩ +smoo th +r t +p itch +stay ing +isra eli +t cot +per spective +do ck +open er +lo vel +x o +class room +l ington +go al +kenne dy +sh am +sp aces +mitch ell +home coming +uk i +claim ed +recru it +ing o +mu fc +mon it +g roo +resi dent +per cent +per man +otta wa +int ment +an xi +stand ards +wor ship +sche me +f x +pot ter +bi an +athle tic +af gh +s se +sat ell +par ties +âĿ¤ âĿ¤ +infra structure +rela x +mo du +wor n +smo king +y ach +practic es +wc w +am b +dome stic +tay lor +k entu +provi ded +mo di +ve g +" ... +ob serv +ðŁĺ © +be ard +m our +an gry +ðŁĺ ± +startu ps +woo den +di ve +na il +anti que +ro ses +torn ado +m at +^ ^ +su spect +far m +de vices +me ga +tu l +scholar ship +ge e +disa ster +arri val +po in +mar c +kati e +bb ed +fal se +deser ves +ric hard +ju ana +fre y +tion ed +hy bri +r w +sar ah +ach i +c ure +o le +mor ris +ch ic +broad way +la bel +pa k +pover ty +gol f +e red +f u +er ies +be es +alo gue +st el +wire less +je wish +ti de +blo cked +life time +b har +sp lit +am ster +th i +jo shu +br unch +ha ps +s for +oo ps +ka poor +hi king +suppo sed +ro of +re as +tra in +ti ght +tru mp +bas ically +r r +ea red +see ds +entr ance +c p +wi e +son ic +vic tim +he re +e h +ear rings +sal mon +arc tic +an ne +dou gla +corru ption +hann ah +ha sn +vo ices +con ce +att a +fle et +clin ical +democr atic +ton y +st ood +le f +twit ch +a il +honest ly +incre ased +dro me +don na +accep ted +visit ors +ap ar +ad or +p ar +jer ry +ra i +brand on +ab u +!! !!!! +me me +in gh +glori ous +b hu +pu mp +j ol +li ke +fi sher +ma z +ag an +destin ation +play list +le tters +gen u +br ace +celebr ated +bann er +r he +dra gon +ðŁĺ ħ +sig nature +gre y +âľ Ķï¸ı +al ice +be red +ph er +ber n +ca th +ga thering +sc oring +influ ence +sm iling +de pt +lo cal +a x +ac u +reti rement +hon or +her self +chem ical +asse ss +y all +fre qu +appreci ation +ac a +cho ir +cu z +so il +c il +repor ting +u h +enterpri se +gr at +jaco b +ru m +fe e +j ak +sp in +bi kes +phi a +ste re +p is +bloo d +t att +ra ft +war ren +sh eri +back stage +mar sh +hash tag +ther ine +re in +game day +guar an +reci pes +min ds +stron ger +issu ed +bic y +n ak +ment ed +sc ary +u x +pre vious +tt le +th ats +ac tors +u ma +tin a +bun ny +promo tion +u ss +oli ver +montre al +what s +appreci ated +la kes +excu se +kno wing +pri zes +musc le +shad es +sco t +ing redi +electr onic +ju an +comb at +s ri +e h +turk ish +l om +stri kes +pri son +re e +po pe +vi d +ol dest +dol l +sw iss +certi fied +cli p +re turning +lat or +le igh +tt es +wat son +heal ing +el im +per haps +ha ss +k au +d der +mou se +new castle +indigen ous +wel comes +co le +tau ght +no ise +appe ar +jo e +can on +wedne sday +u tah +c tive +dri ven +i v +c ell +stri p +ac c +focu sed +ar rest +sto cks +wo o +â Ĺ +notic ed +shad o +di spla +ter ror +bor ne +secon d +que ens +wo ke +ja il +no tt +cam bridge +har t +se af +fa x +ac cept +âĺ ħ +goo ds +k at +t win +h s +thou sand +s ins +su ite +amp ton +ar n +rele v +ric har +hoo ps +n bc +class ic +p ab +soldi er +de plo +le ans +install ation +cla sh +le ban +ee e +ti re +belo ved +fu sion +travel ing +ne i +coo kie +glo be +phys ics +s q +co l +wol ves +d l +ex it +" - +foo tball +le af +ster ling +hi de +minne so +fresh man +natu re +indi e +supp lies +bri s +iri sh +ink tober +doo dle +ic op +mess ages +adul ts +recor ded +fix ed +ar do +offe red +under ground +dr one +p ine +ma inten +and re +ham mer +s x +r ound +hi ke +bra d +ro me +fu ll +on ey +ro ws +colum bia +archi ves +appro ved +bat ch +illino is +recogn ition +shou ldn +fo g +nca a +ke vin +human ity +al though +pow ers +p ou +s ar +pe st +alco hol +con sci +phil adel +en o +t m +ok la +cate gory +particip ate +accu sed +bri ef +po em +clu bs +consul t +ja b +big data +amster dam +ac ing +certi fic +n u +d at +impro ved +and y +campa ig +pale stin +p ace +mo bi +feel ings +wol f +bra in +pro pos +inter active +prin ce +inde x +c is +cha e +peace ful +co vering +ac o +cour ses +mon key +re place +b l +bloo dy +tal es +brigh ton +neighbor hood +g ates +spiritu al +af raid +bre ast +b ones +ðŁij ī +vide o +w au +tou ch +inju ries +car l +ri x +une x +âĢ ¢ +fre d +consi dered +thu si +an ch +on y +u sa +graph ics +ac re +ðŁĺ © +com memor +com mod +go ti +guar dian +star bucks +pre vention +haha haha +admini stration +portu gal +fac ulty +bet a +ul a +al bert +bre ath +er i +le tting +tr ic +ment ation +incredi bly +ten nes +v d +ðŁĻ Ī +ed die +br ick +gr ill +bt w +wat ches +resear chers +t ney +ni e +p as +a ster +vi br +poke mon +ch rome +go at +pitt s +il ly +festi ve +y d +can al +ðŁ Ĩ +fi es +car los +re que +partic i +tra ins +sam ple +temper ature +sym ph +pic king +in door +z ers +playo ffs +____ ____ +ap es +ly rics +islam ic +performan ces +d ick +spar k +se as +hom a +gr ound +disc i +employe e +com mu +alas ka +al an +fe ast +dg ing +ban king +manu el +slow ly +tru cks +mc car +oo o +sc rat +orche stra +indivi du +m x +bre ath +stair s +equ ality +bla ke +loc ations +cocon ut +balti more +aa a +l c +ðŁı Ĩ +har vey +resi st +immigr ation +adid as +fil i +re f +lg bt +mo s +pp i +ken ny +terr or +ban e +apol is +s g +social media +ka i +hon est +as sas +bol lywood +âĢįâĻ Ģï¸ı +ferr ari +hor n +cryp to +bo om +mainten ance +i di +s man +w l +ext ended +in sul +ve s +go sp +tr i +pi g +tar ge +cel er +st ati +sm h +ri dic +appe al +? ) +con clu +cos me +she ep +christop her +en thusi +po lish +me ts +oun ded +sustain ability +creati vity +con crete +ra i +ali en +ble ss +te es +clu b +ro t +bo s +ex ist +perfe ction +lu ck +rock y +expen sive +mean while +happy birthday +pre t +thr iller +ca ve +playo ff +som er +l u +le x +def ence +am writing +home less +pro phe +ch et +past or +ðŁ¤ £ +land er +ww w +Ģ ï¸ı +tic a +! # +o tic +rad ar +po sters +pow der +po li +ha un +tra p +bl in +assau lt +shor ts +re y +sh y +squ ir +rac ist +gar lic +fu r +remo te +sm ell +impre ssed +fing ers +âł Ģ +din o +le ment +s nu +promo ting +str ing +produc tive +b age +ma son +ra z +direc tly +j k +ev al +ðŁij Ĭ +doc tors +co w +ri der +st v +re move +w u +na than +ro d +n r += > +affe cted +inve st +mp tion +g inger +o d +agricul ture +s que +mu g +coun ting +ke e +mag nific +coo k +ani stan +roo t +plac ed +sym po +gh ana +un d +che er +thro wing +secre ts +f illing +opti mi +butter fly +bu bb +ðŁĺ ī +terri ble +d g +sil k +obse ssed +lo u +ai de +sal ute +mon u +philadel phia +scienti fic +i st +u ae +dess ert +bott les +can yon +ðŁĺ Ī +car ib +o ther +w ich +re source +guil ty +un d +le on +e ss +kan e +el e +tra iner +he im +an te +man age +roo kie +tre ated +po ses +rs vp +cau ses +aw ak +je well +le tt +on ics +tit les +cardi ff +g aga +bu mp +use ful +? ! +loo se +bb ing +: : +argent ina +de bu +cy cl +wh el +dis gu +j el +k ills +bio logy +ex ter +tra sh +bo dies +tr am +circu it +expe ct +la ds +w ells +sho t +ge e +naren dr +fa stest +b ent +b ills +mar shall +h ats +intro duce +citi zen +im possible +gi b +az z +net working +r ant +thin k +in dy +st ops +f theday +bri an +* * +amo di +dom e +coura ge +pac king +af fairs +g n +si zed +ent ary +pol and +swit zer +afgh anistan +w u +ten der +subscri be +mo sco +att end +republic an +hon ey +âĢ ĭ +si mul +we ster +foo die +or o +midd le +ab t +co pies +ma je +narendr amodi +ty pical +inspir ational +vit am +wis con +cu bs +tiv ity +h ali +e ars +k ay +d are +mari juana +cu rious +an ia +tom ato +re mind +ðŁĩ · +sc ared +cou p +po et +land ed +ri d +wra pped +mor ri +climb ing +e ws +fe eding +con tra +tho logy +gri d +ti vely +read er +la ser +di ving +di g +lat in +ti ed +shake spe +o ci +ad m +show ers +chu ck +mar cus +oo s +kne e +o live +ow l +dy lan +an no +g ym +deci sions +well ness +arri ves +sati s +chri s +thur s +ðŁ¤ £ +inter views +thank you +switzer land +over night +journ alist +ser ves +vol can +.... ... +plo t +nic ol +car rying +mag ne +tre asure +ex p +be ver +ðŁĺ ¢ +mar ty +mo le +don ations +recogni zed +b h +du s +sh ann +al do +success fully +ent e +ðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤ +cab inet +cu is +tit led +d as +so l +strate gies +deli vering +ad ds +ani an +ne ther +ðŁĴ ĥ +con tain +su its +pa irs +to dd +rel la +ro pe +ci o +cro p +paint ings +su z +re jec +bu st +d h +fra ud +m h +contro l +je al +destroy ed +al lows +wo ol +minneso ta +om en +j u +sympo sium +d af +lim it +accoun ts +load ing +inter n +re solution +hol land +qu al +meet ings +gra ve +cam ping +v am +re nov +liber al +am ber +gre e +hu mb +fe ver +el ing +broo ks +à ² +be th +ad ed +al t +ro e +perform ed +jo sh +frank lin +nic ole +de ss +bb s +m g +net works +min im +al t +weap ons +gu y +jas on +g ha +harb our +at on +pra ise +kentu cky +bel fast +st icks +blo ss +ho pes +an thro +famili ar +wa it +ch ile +depre ssion +la x +je ts +le ice +recei ves +si er +an k +de x +inde ed +fle xi +fab ric +lam b +hel icop +am anda +âĢĶ âĢĶ +compe te +sn ack +techno logies +sy rian +mom s +mu ham +cho sen +an at +dev on +shar ks +re t +fundra iser +selfi es +st ations +communic ations +tennes see +tu tor +ro t +valu able +dynam ic +nur se +i ed +earth quake +deser ved +a ve +sar a +stre tch +dougla s +ne pal +à § +ob viously +d ame +ra pe +any body +k w +pat rol +hol ders +h anna +info graphic +ec o +be ating +stan ley +bo ats +ri bb +e z +wit ch +inv a +ac id +boar ding +- @ +gi l +da ve +care ers +opp os +l loy +in ter +do pe +re su +j agu +sh ade +in dy +on ist +rel ations +ag en +ab le +inci dent +me ter +shar ma +id r +pro ve +immedi ately +tro ops +am an +g low +gaz a +blo cks +person al +chron ic +all er +si d +sh r +whats app +lu cy +ar chae +ho u +journ alism +our selves +go t +the med +shap ed +we ak +cas ual +leng th +sla m +ab bey +e v +coun ter +est a +reci pi +cha pel +expan sion +sel f +suff ering +sp ice +n z +sp art +desp er +boo king +quart ers +y on +ðŁĴ Ĺ +p k +continu ed +- # +man hatt +tal ked +sh en +com bo +hybri d +je ans +liqu id +se al +re tweets +ac celer +collec tive +t as +: )) +profession als +ra w +o tt +su san +ir ing +okla homa +re ven +survi val +cre ator +tran sit +st ac +sur f +i k +ed iting +ch illing +bai ley +ste al +ra ble +pa rent +hun ger +sn app +collec t +philos oph +dedic ation +c f +c m +le ep +repe at +re ha +un fortun +a er +a ero +abstr act +mon itor +ag ents +bu l +sci ence +harb or +drag ons +floo ding +ac compli +d ash +juli a +the red +tues day +cy ber +b low +ta ined +le m +refe rence +pp o +ne goti +char le +con nor +au lt +access ories +commissi oner +rain y +re ar +advis ory +luc as +ma id +co al +k av +pol o +ðŁı ¾ +tran sport +mar gare +straw berry +bur ns +gre ens +ne v +partici pants +col in +belgi um +col our +in form +d ell +br on +cal y +kick off +strate gic +re union +hon ors +li b +egy p +âŃIJ ï¸ı +hy po +si zes +regi stered +bet es +relax ing +bloo m +inten se +valent ines +insan e +w wii +p x +tri o +bla de +wiscon sin +con e +plat in +ali ze +ra ven +incre asing +indi ans +il ian +bl u +rabb it +exten sion +je f +au di +fer ry +s ell +a day +us b +swe at +cham pag +metho d +mem ph +assi st +s by +ca pe +remo ved +mag n +v t +r ams +f bi +tack le +phe w +h on +motor cycle +su spec +eleph ant +sub ject +let te +da iry +whe at +awk ward +ac t +tro l +mit ted +zay n +sheri ff +ene my +con s +ke tt +bul ls +ev alu +bt c +satell ite +ho lo +por ter +dia betes +bet ter +rele asing +sur f +: - +se basti +collec ting +en cing +e thi +go ds +al ley +health y +m ills +sma sh +co pper +cr ack +read ers +sp ac +licen se +bas ket +bang la +en tic +om i +m ere +si vely +anim ation +lan es +dent ally +chill in +fi e +k aren +dep th +li pse +n g +ri p +mel o +sand y +ðŁijı ðŁijı +vin cent +nu t +hu g +who le +cre ates +? ??? +âĿ¤ï¸ı âĿ¤ï¸ı +bak ed +up grade +rober ts +har a +carib bean +auth entic +mb s +mosco w +attor ney +wi ki +ch lo +hu ll +cor k +" ! +sty lish +ðŁĵ¸ : +di ary +impro ving +ex pand +bri ght +pollu tion +k nights +person ality +chec ked +fac ilities +z el +bow ling +gu er +ðŁİ Ĥ +on going +un its +hoo k +be ck +confl ict +to dd +far ming +educ ational +k ak +cla y +stro ke +bel ly +explo re +mill enni +th m +loo p +sm s +consi st +cir ca +br yan +d ab +youn ger +soli dar +pp a +experi enced +b ella +bo ard +shef field +steph en +consu mer +sub mit +spon sor +t ang +ag gre +comb ined +trac king +sand ers +b az +survi ve +fer red +equ al +se p +re ed +str ong +priv acy +st ap +un g +ac ry +pa sta +pir ates +ag er +fair y +du p +introduc ed +wi p +let s +spr ay +ðŁĵ º +gre w +a sts +pitts burgh +new york +jo ey +lau ren +tra de +ch op +pi pe +cla ire +behavi or +v ap +cre ws +lap top +ðŁ¤ Ĺ +che ster +disci pl +d f +out doors +k s +go ver +super star +cas ino +far mer +; -) +re turned +ðŁı Ī +ma il +roa sted +co sta +v ill +pe z +gard ening +distribu tion +sh ining +inve stors +ra sp +dec ades +reali zed +bar n +p ti +st able +ut d +pan thers +m ens +b n +ca de +bu cket +yn n +when ever +wa ke +da is +ber nie +lo dge +ju lie +atmo sphere +ðŁĺĺ ðŁĺĺ +major ity +par ti +exc it +cu t +me h +musli ms +be gun +fli ghts +vene ss +ce me +po sing +so le +g ou +dark ness +pe ach +cel tic +auth ority +grand ma +ful ness +smi th +speci fic +gar cia +co ins +good ness +aldu b +recru iting +den nis +gar y +sle eve +weap on +pl z +disco ver +harri son +recruit ment +ja i +ch im +com pared +tom s +mo thers +am y +archi ve +t ask +ben jam +se g +law yer +al um +inve sting +mi e +che z +j p +a ke +fl am +wall paper +âĻ¥ ï¸ı +t ton +che st +favor ites +we igh +coo lest +r ating +relev ant +lo gan +ma ple +run ners +pri or +peop le +ma ur +terrori st +te sted +carni val +su spen +me asure +m v +cyber security +app ren +terror ism +o z +v ital +ni es +gon z +fun ded +twi st +assess ment +die sel +en for +colum n +ad dressing +ca sts +pay ment +x ton +fi er +, ' +la st +ne e +un less +clo se +sk ill +cuis ine +fun eral +ti les +a un +k ru +relation ships +ðŁĴ ¯ +ev ent +âĢįâĻĤ ï¸ı +kind ness +pro posed +acou stic +a es +defen der +dan ce +h tt +w at +vo y +ðŁ¤ ĺ +au s +cli ff +sear ching +beauti fully +in qu +at l +speci alist +ðŁIJ ¶ +da i +tra ils +class ics +inst ant +v ous +re venue +mar ch +kir k +fr inge +fire works +tri via +âĺ ħ +tr action +wal ter +mo to +l ily +att itude +cli mb +sc an +sav ings +c w +fa ith +cred its +ab led +gra ff +auto graph +he he +ran ch +ha d +ro gers +ðŁĮ ¹ +f in +re qu +fol k +ad ditional +lyn n +u ber +dol lars +lo gic +wor th +so m +the sis +p ound +bi c +st ur +cer am +spen cer +en tered +v amp +organi zed +âľ Ī +pp s +tr on +merce des +no ti +compet itive +do w +ous ness +vic tor +gr illed +na i +pu tin +ab ra +bl ame +alex and +anim al +dec ent +p ent +inter ior +:' ) +but ler +bal let +ðŁĴ Ķ +albu ms +down s +la d +si r +pla in +p ers +blon de +dis c +paki stan +se ment +ga a +w age +ch as +man i +co ps +terr it +lo l +lau ghter +ri vers +magnific ent +lam p +w b +new sle +char ts +ble ssing +p unch +lon gest +fl oral +cu tie +fare well +sto pping +mb b +bu d +chee se +de cla +si m +mc donald +de ter +you th +t ch +fre der +kin dle +fer n +at or +as leep +p ond +spr int +p ounds +la zy +gh e +fundra ising +dead ly +gran de +dou g +he y +lin da +consi dering +i um +gol den +vi k +auth ors +di ss +u ally +appropri ate +mor ning +y le +hon oring +foli o +be c +re bec +fin land +formu la +corn wall +sh ay +cau sing +bl end +sig nal +t ent +kash mir +nation als +har mony +sc out +acce ssi +he ight +medi eval +impro vement +ke es +prac tical +car d +de par +hu n +om ing +cal gary +ste l +bu bble +gur u +ma h +unex pe +n h +ed a +me at +i ge +si o +god dess +in ches +tun es +br itt +sti on +ra j +âĻ « +mer cy +ðŁĴ ĺ +sen ds +i est +pol ici +val e +reduc ed +as ap +vi jay +defen sive +celebr ations +ri ders +med itation +har mon +g ing + ¡ +program ming +in au +sud den +m h +replac ement +sk u +j ar +gra des +ta st +k itt +brand ing +k aw +boo t +f ought +p ays +g f +iz ation +ho p +k k +activi st +v end +coast al +cha os +ðŁĶ ´ +se me +bill board +li fting +cu mb +sc al +ðŁĸ ¤ +stru ck +l v +indie dev +beat en +jun gle +al right +destin y +m ing +k c +ch ances +om an +q atar +cra f +tra ined +pri x +char m +o tive +s mu +e c +and ers +hand ed +al ban +certain ly +arri ving +i ze +sa i +tr ack +pain ter +hu mble +appo intment +head line +manag ing +mo d +as pe +andre a +à ¤ +ethi op +un ited +exi st +bal i +k ad +n t +d red +re x +recogni ze +tam pa +be ers +ati a +he els +no te +transport ation +tur tle +re de +hipho p +sp icy +sp urs +⬠ĩ +cor p +ther n +to ast +hur ry +proper ties +ma ge +mar co +ele ments +bou ti +syn drome +ms g +develop er +gra ders +he im +re sil +off ices +del ay +di men +vin tag +barbar a +ðŁĺ ± +vene zu +cu lar +fac ed +bar n +ðŁĺ Ĩ +survi vor +wor m +confu sed +passion ate +Ø ± +identi fy +electr icity +sou ls +brad ley +repor tedly +lun ch +shel f +eli a +swee t +smoo th +emplo yment +am el +manhatt an +ste am +oun ts +ye p +li ving +un e +descri be +ca res +man ila +sha wn +ac ted +bas h +st even +re st +pet ition +div ine +wel sh +rac e +platin um +ðŁĮ ¸ +p b +extra ordinary +solidar ity +m all +on ion +schedu led +game of +fer gu +de ms +nor m +p k +tri als +polici es +publi shing +st ole +fron t +charac ter +van ia +ex ce +sti e +sc a +resi dential +sa iling +ðŁĶ¥ðŁĶ¥ ðŁĶ¥ +spons ors +th ick +champag ne +she pher +continu ing +ven ice +per th +na p +a ster +y ak +un limited +cho ices +ne o +hi v +repor ter +bru ssels +f old +dy s +se mi +la wn +it alia +wi fi +as k +em ed +fr ame +monit oring +ste ad +i da +gr in +is a +fli p +re stric +offen sive +atta ched +di sh +wh y +philli ps +gre et +p als +mix tape +v ou +fiel der +spar k +alber ta +g len +ca sh +s ri +u ri +ro dri +entreprene urs +climate change +p sy +d le +em ents +lin ked +nether lands +acci dentally +oppos ition +vel vet +ra ys +c w +om o +m f +lmfa o +newsle tter +: ) +toi let +liter ature +di sp +phili p +uni form +sudden ly +head er +cool er +-- - +prou d +bri g +nis san +scienti st +j ah +con centr +pac ks +appo inted +so ap +eng age +cho se +âĻ ¡ +se tup +jeal ous +har ry +g ation +tun nel +te mp +osc ars +dec ade +recomm ended +child ren +ab a +anxi ety +ve ments +sal on +pho too +organi z +mach ines +ab s +vil le +hy pe +ti ff +emer ging +av geek +[ # +contribu tion +bra dy +re sto +g mail +fit z +photo shoot +hel met +h t +eleg ant +ug anda +nur sing +or leans +pen n +na h +foo tage +em a +w o +w ad +concer ns +ve re +re mark +who ever +str ang +p t +qu it +sh ang +histor y +s ick +perman ent +ill ness +col d +visi on +he m +ar row +con vic +pin k +oc cup +bal d +ex hau +u of +am o +on t +ãĥ » +adop t +la id +smo ked +inter pre +ess enti +associ ated +b d +bb y +fi er +inst all +dipl om +con diti +c f +w ak +any a +gr aci +fi sher +s ss +ap r +il it +mus ician +symph ony +cor d +h ack +le gi +l v +bless ings +hum or +sc ra +e ti +min ster +trav elling +bu sh +jewell ery +li me +!! ! +pregn ant +pe e +lo b +cap ital +ip a +pen cil +la bor +duc ks +prou dly +wedd ing +dere k +m w +pe g +valent ine +an gu +re treat +pro spect +dang er +vul ner +up set +, # +sr k +x im +thur sday +n fl +kis ses +re ds +cr ack +re ward +c u +ko k +me te +aband oned +it t +me als +sp ell +stan bul +del ays +ru m +le op +gu m +no va +super man +ch ick +m is +dram atic +inno cent +r ounds +re c +auti sm +bangla desh +mor al +mo vie +sp oo +k la +âĥ £ +ou ting +mess i +ab road +loo kin +a im +q i +st ack +colla ge +à ¯ +hud son +sc an +ho e +ch au +oc cur +comm ander +ho les +ðŁİ Ħ +bi as +v on +stick er +ma k +responsi bility +colum bus +sa int +ed mon +rac ism +far ms +w en +gul f +may o +!!!! !!!! +corpor ation +ba chel +el a +inter nal +je ep +fol lows +di alogue +de rer +smart phone +he len +rich mond +equ ity +s land +b g +ne ar +av i +memph is +we ir +discu ssed +bad ge +p up +mi stake +phen omen +un ite +ðŁ Ľ +de pic +ri des +in augu +n at +sof twitter +comb ination +gosp el +âļ ¾ +ad mission +retro gaming +ðŁIJ ¾ +sch u +mb o +jun ction +al arm +à ¦ +gr ac +kh ali +k ul +m ale +cap tion +wi sh +te re +cor ps +ru bber +play station +er in +effici ent +l or +jo kes +in ary +nor man +lu is +inaugu ral +ch ed +âļ½ ï¸ı +di p +to e +str at +aa c +am u +pi er +co tt +comm and +tt en +sn oo +cu be +clo ses +class ical +s word +expre ssion +reach ing +n app +co st +affe ct +ric o +gi f +brea the +tri be +or tho +h ay +l g +fri es +n m +hi ding +richar ds +en de +mic ro +capit ol +cop y +ro m +regi me +mary land +tax i +di al +embar ra +un believ +ch t +v s +elim in +o dd +pen ny +sound track +l ings +trans ition +rema ining +a is +mali k +? !? +rand om +def end +ul tra +tru m +danc er +st ol +dri ve +a ver +ro ast +defin ition +se an +excit ement +partic ul +su rely +sh av +ber y +di shes +com m +is ol +i am +ob li +gho st +hugh es +chi efs +b as +conserv ative +speci al +fe min +sh ri +n ancy +inte l +tu ne +ðŁĩ ª +jo el +gg le +mo to +ðŁĺ Ķ +bu ck +d ag +antic ip +mont ana +gu id +fro g +ec raft +op e +dri ves +nu mer +x y +color ful +wednesday wisdom +illu min +bey on +inau gur +deep ly +pre fer +for tune +coo ked +ti ble +âĺ ķ +swe ater +it ter +tt y +u i +gi e +com plic +~ ~ +tax es +cu ps +di verse +sam anth +âłĢ âłĢ +ba king +sy mp +wa i +be half +mer cur +travel s +ðŁİī ðŁİ +or ia +eng aged +jump ing +reti red +n aked +p uni +speed way +sci ences +rehear sal +on ym +dy ou +pl ates +r ati +kri sh +jaz z +car ol +ra f +pen alty +tim eline +ru by +engine ers +ra f +bel le +do se +che on +esc ap +me g +ran k +or d +me gan +mer ch +ec lipse +âĺº ï¸ı +ple dge +kir k +per si +leice ster +sa k +w k +saf ely +yy y +je t +promis ed +j c +en ne +no ah +re no +re a +ðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤ +tra il +ðŁij Ģ +f d +soo o +ri min +w k +ภ² +i al +x ox +bis cu +d ale +fan dom +particip ating +fla g +privi lege +pe ach +mach ine +bo ston +gro ss +o g +mir acle +adop tion +u ss +mon sters +be ij +clar ke +pu shing +pra ying +ar o +d n +ell is +apol lo +od ds +refuge e +to w +b p +ðŁĩ¬ðŁĩ § +h end +app eared +memb ership +pe an +du m +viol ent +v y +potat oes +aw w +greet ings +t ts +ac on +sh ane +photograph ed +cra b +temper atures +cu ba +c fc +wel com +he l +in nings +m k +co de +kno ck +gra ss +swe dish +p ta +ick y +v at +lin ing +s q +sa p +ar c +announ cing +sk ins +cit yof +br ing +co x +gam er +it arian +i da +h d +ros se +sad ly +ge o +âļ ¡ï¸ı +tag s +fa ther +chan ge +l ance +whis key +adel aide +te c +stick ers +marke t +class y +bad ass +flo rence +lin er +fro st +k ate +ac on +scand al +es sex +ðŁĺ ı +vi vi +dr ill +blo ggers +recomm end +d ha +ac res +ro ma +bu y +gro cer +er ia +ma har +ff er +patter ns +ver i +com pu +st ev +ang a +ment or +do o +it ali +cdn poli +on ly +conduc t +elec tro +de f +wh ale +prepar ation +bicy cle +vi ral +turn out +bra ss +qu ad +hospit ality +pack aging +den cy +ceme tery +abo ard +dre aming +pic ture +t all +inv ent +ad mi +o e +tem ps +qu an +fun dam +pro mp +resi dence +mu d +sour i +âĦ ¢ +graff iti +gi f +d nd +com p +s war +pe eps +pale stine +devil s +san g +assi stance +bi ke +missi ssi +inter viewed +ne phew +dru ms +v and +gentle men +n sw +inst a +leban on +ee ee +oli via +ver y +rou gh +industri es +m ation +ðŁĺ Ĵ +bar rel +n ay +po ps +moder n +ill y +are st +on ents +protec ting +v ans +e o +vi kings +restaur ants +re ck +jac kie +andre w +w illing +he ath +citiz en +disc rimin +๠Ī +stu art +m ys +hi p +tran sp +" ? +te x +su shi +ke d +cro ssed +dist ur +pe dia +f ate +some how +mo th +proce ssing +is s +r in +u ts +yy c +ver t +lg bt +re id +on to +arab ia +habit at += = +stre ak +simp son +addic tion +wim ble +deli vers +challeng ing +ðŁİ ¶ +fran ch +e du +s me +ai ds +hur st +th am +tari an +remem bered +palestin ian +fe es +tru m +sket ch +ur u +fit ting +jes se +ðŁĶ¥ ðŁĶ¥ +---- ---- +ba ch +ici a +colo red +da h +associ ate +int el +s eller +p u +stu ffed +ac s +b s +sh in +cooper ation +certific ate +ab u +ingredi ents +re v +in ge +el der +christi an +bun dle +th ic +dir t +beij ing +comm it +ted dy +ed u +to day +s field +w yn +confir ms +lo o +j v +ene ss +al pha +vir us +ari um +gr ind +bri dges +introduc tion +pol ls +bac ter +z ach +termin al +ra iders +fla vor +zom bie +vo d +sp reading +gameof thrones +effici ency +lat ely +ale m +twee t +cri mes +cl er +de y +dg ed +hy un +pay ments +cir cus +ðŁĺŃ ðŁĺŃ +mis souri +lu b +episo des +c age +po s +mat ching +tumb lr +lin ed +ge st +am bi +nar r +ing ton +regu l +blo wn +is le +co co +on don +joshu a +tour ing +sm a +sau sage +best friend +bo eing +desi re +sav age +ra pper +de vo +te ar +take over +cow boys +po ker +par ag +pp e +h int +we ars +se th +ro les +l anc +man ga +form at +fl yer +c ay +mo or +ba ke +spla sh +v ad +ker ala +proce eds +sil ly +reflec tion +di str +wi d +su it +ci vic +yan kees +by n +migr ation +di stin +or ch +fe mini +quali fying +tu ri +o be +hun dred +cra p +wan g +mathe mat +bu re +expo sure +fergu son +seme ster +re serv +pl ym +a hu +fac ial +wa x +wor ried +ca b +vi o +as a +co d +to pics +p cs +hal o +rescu ed +horiz on +ar k +âļ ª +hol ly +el f +ul ti +pu p +quali fied +attend ance +ati vely +destro y +y c +for th +photoo ftheday +c ents +ic eland +meas ures +de sk +port folio +artic les +direc tors +dat ab +e w +creep y +oun ding +hon oured +mi st +j it +men tioned +port able +iti c +d ann +friday feeling +am id +ti ger +scri p +helicop ter +hard ware +expl or +work place +austri a +beat les +ber nar +spi der +disc o +cul t +lim its +shor tly +fin al +nin ja +lu ke +le bron +wal mart +o il +van illa +shi re +ye g +ak y +c s +bl er +collec ted +t g +rol led +speci als +b ff +pier re +sh im +vi er +flash back +restor ation +individu als +pro d +fre aking +tu rer +o a +re fre +mor oc +gre et +re yn +care ful +our ing +u sh +is d +g ill +vie w +thunder storm +b led +pic nic +guar di +pi g +ar k +syl vania +bann ed +u cl +vi jay +ori um +av engers +believ es +eu r +monu ment +concer ned +la bs +ber g +a ap +vi sh +sing les +can cel +z el +ar ab +ru th +too th +ar ta +sh af +chair s +r ack +dise ases +crow d +cl y +fle x +christ ma +artif icial +tom at +fin e +dra ws +advoc ate +fran ce +Ù Ĭ +ðŁĺ ³ +heav y +s our +compre hen +no ble +aa p +hin du +cor al +g ars +ow en +n l +st all +yel low +mar ina +in ver +suppor t +tou gh +promis es +pi e +master piece +sco re +for ce +mor tg +crypto currency +o x +r ors +rock in +pro vin +ho g +no stal +oak land +pat rick +inclu sion +tra ffic +ah med +a ha +lux ury +con secu +de mon +âĸ º +b lowing +st ag +: " +encoura ge +ben e +sku ll +do dge +bu ster +kin son +wit ne +er ror +lo west +fel low +à ° +sh re +bl ur +vir gin +compos er +sli p +mor nings +ga ins +tab le +gra in +ari st +braz ilian +w we +tu es +ribb on +an ag +di st +sac rif +em brace +entreprene ur +af fili +de o +t ali +touri st +fat al +ì Ĭ +autom atic +ðŁĩ µ +we ak +wel fare +confir m +benjam in +fi ghts +alleg ed +me ad +strugg ling +pro secu +che f +à ¨ +propos al +er n +ðŁĺ Ħ +dy k +on gs +hon g +m ack +mel on +on ent +ru sh +d ap +tol er +pro pag +c ze +trans lation +wal let +cott age +sa il +constitu tion +ðŁĴ Ģ +mun ici +fav or +storm hour +i h +ðŁĺ Į +approach ing +pin ned +j ed +niger ian +n ach +sh at +particul arly +mc don +camer as +anni e +admini str +he at +electr ical +char ming +gib son +bouti que +ex posed +ac tor +pil low +beach es +genu ine +margare t +ben nett +lou isi +pos itions +el y +shin y +ten tion +architec t +ren tal +ac qui +goo gle +sub way +mom ent +ðŁļ ¨ +ri m +metho ds +cy cli +nor folk +Ù Ī +over whel +ra pid +we ar +happy birthday +progre ssive +ðŁĴ ¥ +co gn +pap a +f ool +philosoph y +pol ar +jim my +wi g +ðŁĴ ĭ +oper ating +reduc tion +ph i +fla gs +to the +o di +a res +k oo +k ang +ar kansas +ash ton +wimble don +sci fi +attrac tive +mississi ppi +logi sts +ral ph +la bel +gradu ates +ma ha +home town +âľĮ ï¸ı +foun ded +on the +li z +trans l +mini mum +pre sti +ta m +gener ations +re bel +journ alists +par am +mc m +acry lic +death s +tes la +w t +bry ant +jer us +i stanbul +muham mad +ri ley +k ris +work shops +is o +coun ts +stre t +prote cted +trin ity +man ual +r hin +r il +pleas ant +le mon +ner d +har der +dar ren +bur y +ra h +bas is +mi gu +occa sion +li sts +âĿ¤ï¸ıâĿ¤ï¸ı âĿ¤ï¸ı +e b +de cre +hamp ton +ìĿ ´ +tra vis +trans form +puer to +nh l +av oc +tri ps +unexpe cted +ve t +di dyou +bar ber +st ages +m son +re presented +for t +l al +pp le +nic ely +ignor e +qu il +qu inn +h k +carri er +remin ded +am ong +pass enger +el len +gue z +sc ape +mu ral +youn gest +ma sh +d ill +rout ine +stain less +jack son +gand hi +th al +on ers +edit orial +convers ations +sd ale +autom ation +i ke +า ภ+ðŁĩ ª +hau l +la ying +men tions +am en +abor tion +i bi +coun ties +ca therine +man ds +jam e +roll er +au t +n am +o logical +cep tion +ran king +tox ic +sn acks +victor ian +bang kok +psycho logy +re g +ang ela +respon d +sty le +sophi e +dak ota +achiev ed +mar ked +imper ial +in as +glo ves +sli m +confi dent +att acked +gg er +lon ely +valentine sday +re b +craft beer +orig in +zim bab +ce iling +te ens +other wise +w b +f ers +day sof +advis or +y ah +âĻ ª +en der +republic ans +av a +skir t +pi pel +chi e +jan e +ja x +ðŁĺ ĭ +âľ Ĭ +j ays +bre tt +bal o +cru cial +d har +as is +de au +lloy d +chat ting +âĿĦ ï¸ı +rel ay +remark able +n s +we t +bris bane +ðŁĶ ´ +tion ally +f k +la yer +house hold +consecu tive +es is +pend ant +st ir +crit ic +su gar +photo shop +pa res +arti stic +do dgers +c un +cra fted +am end +bo at +âŃIJ ï¸ı +egyp tian +sa w +tra ge +small er +ox y +pa ired +nex t +i res +tac o +o y +u c +st i +a erial +: // +dr o +dot com +gg ins +r pg +ay e +le an +stri ker +lo bby +prote sts +pri ority +congre ss +am ate +inv it +r ington +mom my +th us +allow ing +pione er +enfor cement +g ori +tal k +dra g +du mb +bul let +san ge +er y +tar gets +ðŁĩ ¦ +he ather +consi der +seaf ood +ve st +ris ks +% . +p g +sac red +he ating +kick ed +tto t +. - +chan di +co ven +po ol +pul se +i a +ro ster +shakespe are +es a +car go +pean ut +tro op +ac tion +tab let +home work +cast le +stru ction +mus icians +free zing +bu tt +justin bieber +j j +bah rain +an them +au dit +didyou know +na vig +guid ance +âĸ ¶ +tur f +n un +fic ations +ye men +char ging +x c +bron cos +su bur +p ale +bor ing +among st +for the +em per +om fg +p j +expe cting +ðŁĴ « +st l +ad min +expect ations +sw an +shoo t +oooo o +min ent +ãĢ IJ +wall ace +stan g +satur day +adop ted +dou bles +hom ie +ome z +d han +vent ure +surroun ding +fi le +mob ility +de es +w ski +broo ke +emb ro +re members +kar a +test im +bo tan +m tv +sacrif ice +jerus alem +d l + ´ +proper ly +ili on +as i +leg it +co pe +m cla +recy cling +lar ger +ðŁĴ ĵ +pat ric +gener ous +ja red +p f +mol ly +thom as +ju dges +h b +sor ts +bl vd +o ven +enter ing +plan es +be et +integr ation +boo ked +fre ed +ver n +ash es +to pped +de pot +welcom ed +ren a +m ick +d and +see ks +gam er +ran kings +ren e +mu t +whis ky +fire fighters +gu es +ga ther +tour ney +de men +y ang +new ton +autom otive +back yard +deta iled +mi st +to bac +fi ber +un usual +grat itude +sp are +ne ys +: * +per i +flo ating +fin alist +don ating +dre ss +bro ad +be the +econom ics +tai wan +ed wards +plu g +pra iri +val en +bab a +f ad +an as +har per +dis order +app lied +p att +bi kin +li ver +cu ri +carol ine +ann er +juli an +wal king +mal col +screen shot +co ding +skin care +activi sts +myster ious +ex act +blo cking +mercur y +bat ter +du mp +âľ Į +en se +li sh +ridic ulous +prote sters +ðŁĻ Ī +lu st +swe at +as s +ali ke +co dy +re ments +win ds +as pir +vi enna +pra y +.. .@ +bo i +cand le +assi sts +te e +der son +p ony +f ence +con spir +âĺħ âĺħ +oo th +e pic +ba rely +a unt +b am +diamon ds +end less +scre ens +can cer +gr o +p st +pro spec +mo sque +help ful +ou ri +bro ther +gu jar +cri sti +ine z +to wers +ad dresses +gra y +bur ton +re tweeted +ðŁ¤ Ķ +n ity +du ck +super vis +jo an +kin der +sanc tu +pi ed +âı ° +ł ï¸ı +m ati +reven ge +ce ster +eli fe +desig ners +back ed +bo li +wei ght +cou ch +su res +s its +shri mp +la gos +auth orities +os ity +hol ly +compu ting +fac tors +ab e +pan els +ram ad +sent ence +missi on +hol m +r b +d ads +shang hai +mon ey +she ets +sk ate +thre w +cup cakes +infin ite +l is +practic ing +ess ay +ka i +as ci +mo b +u gh +hol mes +re gg +ik h +mo ck +collec tions +pe p +o va +sal t +nan dez +co y +thre ats +tex ts +cin nam +pregn ancy +pen ding +stam p +flow er +g is +agre ed +pay ne +ro ver +ph ra +sof t +f fin +fa thers +pass engers +aw ays +al a +h es +li van +in s +samu el +ingu i +h of +j j +chen nai +cat al +om ic +he ath +ni ece +pump ed +integr ated +are l +no m +produc tivity +wan ting +vis a +di ana +tw il +it v +cam ps +ro wing +d ley +black and +gu ards +b ells +re verse +vi be +ric ky +mo ss +ny t +âĺ Ģï¸ı +el le +tro y +cu dd +ev an +women s +fo to +mi stakes +wick ed +mi l +c led +me mes +co smo +schol ar +ren o +ðŁĺ Ģ +v ents +# âĢ¦ +terrori sts +ca sey +cardin als +ðŁĺĬ ðŁĺĬ +venezu ela +bol a +liter acy +t w +en o +con tains +au stin +fin anci +ev an +har vard +origin ally +chev ro +her ald +nott ingham +manag ers +âŀ ¡ +accep ting +wal sh +tutor ial +entrepreneur ship +yach t +requi rements +glen n +pe de +unfortun ately +ach ing +dais y +gi an +night mare +âĿ Ĺ +r ina +b art +ema ils +oppo site +who m +sa ke +pu zzle +da shi +par ty +blan ket +bus es +lo re +beau ty +reas on +pun jab +winds or +func tional +exi sting +hel lo +gli mp +con vin +la k +scre aming +rebec ca +bli ss +north west +infin ity +cosme tics +pul ling +coffe e +pl ing +op ho +colom bia +interior design +( + +emo tions +sa c +sun glasses +sav es +d f +six th +al y +ðŁĺ » +de en +dev ast +polit icians +lac rosse +g u +pe i +jav a +comb ine +coal ition +er ts +survi v +ch ad +stri an +n n +de vi +coun c +concer n +contro ller +bre ast +j ury +tu m +introduc es +la di +mobi le +al z +ste ady +nur ses +h acking +on line +oce an +ðŁİ Ħ +a am +ju ven +ic c +louisi ana +ar te +street art +is on +wn s +fr m +p anda +no ir +main tain +del ay +symp toms +thor n +ge ome +ter n +carri ed +p ru +pan or +as sy +per u +clou d +sp ra +pe di +e ste +tag ged +ðŁĺ Ŀ +shado ws +naz i +ا٠Ħ +cor ri +âĻ¥ âĻ¥ +j ad +ðŁĩ « +form al +spo ken +ðŁĮ ŀ +enjo y +lo pez +out look +in ho +w ander +Ù ħ +ma ya +pe e +d ine +ãĢ ij +brief ing +suppor ter +ar ily +ght ers +natur ally +doctor who +j en +v ar +new year +re se +si mm +re x +con sequ +tomat oes +bur st +bra vo +bur gers +cr acking +nor theast +bi om +mush room +mar que +dou ble +ni er +v ag +tw enty +key board +win ni +jama ica +par ish +: - +mental health +ali zing +ren der +wa king +ðŁİ Ĥ +g ly +na than +wa shing +mel issa +jun g +loy al +chil i +song writer +guit arist +bo wie +neighb ors +onym ous +as set +ta i +head quarters +ðŁĮ Ī +i hear +ci gare +sur g +) " +re pl +dar ling +ðŁĻ Ħ +z ak +sa re +ãħ ĭ +mic key +ware house +mass age +ine es +did nt +i w +hur ts +eng aging +mag ic +women in +k itten +mor s +c art +tit ans +colle ague +compe ting +er an +k hal +mar ble +dem and +del ight +et ary +bli zz +lou ise +m ls +fini shes +experim ent +conduc ted +electr onics +itt ers +car ing +wh ats +sym bol +jun g +e cu +pi x +con text +char ger +ðŁĺ ĩ +re ig +fra g +ë ĭ +ch ad +tru e +ker ry +def ending +a int +au ton +check out +bar nes +less ly +d t +m me +clou dy +second ary +are z +_ : +app a +const ant +" ) +ve ts +jo b +i ent +ðŁĺŃðŁĺŃ ðŁĺŃ +m j +fren ch +di ver +davi es +hh hh +e book +๠ī +mar iti +bree ze +susp ended +mat o +vi et +ra hu +se i +bol t +en ary +le is +kar l +fr amed +expla ining +ab c +de aling +nat o +ja ke +exp and +leon ard +establi shed +du b +ar men +el led +voc al +nichol as +ori ent +k yo +illustr ated +ah h +danc ers +milli on +ge ta +po pp +as u +mur dered +gi ble +sto ked +gri ffin +maxi mum +adri an +en counter +ther o +david son +ðŁį » +holi day +ev o +asse ts +car son +memor able +âļ ½ +ob am +represent ative +cb d +tr icks +vo gue +vo ice +mm mm +sebasti an +cli f +ath y +par alle +ðŁ¤ · +pa k +ev acu +e ats +ا Ø +tou ched +organ ised +spir its +can ad +gui ded +frame work +ðŁĮ Ł +pe d +natur al +ag ar +replac ed +anch or +ti t +sha h +organ is +super ior +r n +ch ro +eric a +st ill +cor on +chu ck +loc ks +or gan +ro sen +sc am +ben ed +/ # +ke en +tre vor +vamp ire +sor ted +! ' +af ford +in tro +gr ace +ðŁĺ ľ +sau r +kick starter +influ en +v u +y up +po c +ðŁİ ¥ +a ar +s ang +tre k +et sy +tb h +scre am +chevro let +pix el +shepher d +an or +gabri el +tw ood +sd cc +me ters +develop ers +clo sure +v w +twit ch +ì Ĺ +se oul +pr ice +ho g +n ish +hill ary +scrat ch +in cen +wag on +dis ability +pan ther +ch ats +g d +wit z +sus sex +l ate +den mark +ger ald +cancel led +net te +i x +nav al +bap tist +te t +y ad +ma th +ho y +r andy +po int +intel lec +fru its +w ool +gu in +pr on +the ft +con dem +mar ry +n ola +architec ts +cin cin +roc kets +gentle man +ex plan +t ate +do e +ra ises +wild life +w l +insi der +blan c +w p +for sale +ny c +po well +unbeliev able +pen s +goo dies +mu stang +p ens +st ays +squ ash +xox o +near by +ever ton +co co +le agu +k han +stu d +south west +con struc +s worth +cro atia +le a +su ms +aim s +e an +van ess +iti ous +pa thy +arc ade +b end +sugge sts +sac ram +roy als +ri er +em ir +in cl +an k +clar k +ri ght +vac c +ठ¾ +tan e +li b +u sc +sal es +hu h +s ally +ver a +p ga +gro ws +dru m +tre e +eth ics +sug gest +is ab +se aled +pre viously +anim ated +ab du +ri ses +glo b +pre dat +scar f +del ic +om ar +ll i +sx sw +py thon +ne bra +fun k +reflec t +pav ilion +tic ally +ch asing +bak ery +inva sion +ko h +believ ed +co hen +con qu +cra fts +nat i +cle ver +govern ance +sam ples +fa ils +â Ķ +ti mo +r itu +stri king +inclu sive +sho cking +can t +requi res +dra wings +à¸ Ń +purch ased +du m +z ach +war ner +con sole +man sion +foun tain +circu m +e sh +is land +mil k +pro fits +hali fax +ri val +âľĪ ï¸ı +jen ny +sand ra +ny e +k elly +y al +qu ad +no s +inste in +fin alists +mid fielder +cu e +excep tional +a an +sa pp +gett in +sa a +f ati +sl ice +vol k +s wal +la sting +sum mary +it as +sm o +s z +âĺ Ĩ +ip l +fl ames +ene ws +ha v +hoo die +pitch er +win dy +re vol +centr al +ton ite +ðŁİī ðŁİī +sol ved +mil wau +organiz ations +wee ts +re fin +s th +ãĥ ¼ +el in +ton a +cinnam on +ðŁİ ¨ +ðŁİ ģ +ron aldo +pen insu +ome ga +el ds +desig ning +e igh +blu et +ben z +nu g +ash a +robo ts +su dan +choo sing +en do +ser ge +clo sely +hand y +fing er +be ing +ar te +survi ved +fl ame +mile stone +gu t +d war +fu tures +é e +el o +fri dge +eli c +ou ch +u b +p v +tit an +col lar +st ation +nev ada +aur ora +r d +dun can +âģ ł +bri en +mar sh +Ð ¾ +to tal +ch ry +s ers +su ffe +ra chel +colle ge +to days +cour ts +ch it +re united +gym na +gen esis +be side +re presentation +ch ant +collec tor +ra k +ath ens +ni gh +mun ich +langu ages +fl u +particip ation +__ _ +c v +spec trum +so da +co ver +refe ren +ab bo +ap a +public ation +ed m +mon ica +ar my +ðŁļ Ģ +div or +dr y +stre ams +robo tics +ci der +bull ying +appro val +sto ke +plat forms +sier ra +ex tin +i b +ha yes +succe ed +suff er +at ically +da i +lyn ch +h ound +del ines +ack now +d ated +exclu sively +he res +fac ilit +dam aged +char ter +la kers +fal con +unve iled +wel ove +e ase +pati ence +l one +gent le +gene tic +produc ing +g our +shann on +bil ities +zimbab we +p int +dau ghters +liter ary +bel le +cl am +surroun ded +k any +ne il +pir ate +rang er +hb d +nat alie +bel ong +olym pi +emb assy +sc ol +en er +ak in +lo ren +b h +: / +di va +den im +hi pp +ðŁĩµ ðŁĩ +arn old +? ' +we ren +em power +dis abled +man or +rasp berry +b af +aw ful +dru mmer +kar dashi +n ash +machine learning +ch u +rebel s +tim ing +mon roe +ton gue +ran ge +pup ils +re ss +amaz on +b z +har ley +pal mer +ballo on +s ings +ic ec +j b +c ers +g ps +whi st +ri se +l t +oo oo +c attle +shoo ter +vod ka +uc l +mt g +le sli +jon as +di spo +at ric +ste in +vintag e +fir ms +flo yd +cow boy +soo oo +is aac +war craft +disney land +beauti ful +be am +franch ise +bu n +k ag +an on +tur bo +swee p +made in +kar achi +dete ctive +penn sylvania +contro versi +vitam in +a side +chron ic +descri bes +remo val +ha h +ap er +ten ed +u to +bad ly +mir ac +f ry +ye a +in jec +ther mal +comp act +th or +te ed +ur gent +l ite +g illi +sop hom +ic o +che m +p m +for k +fre ak +ch ak +recipi ent +i y +ni k +model ing +c ans +ðŁı Ģ +del ux +se am +surviv ors +rad ical +investig ating +reli able +f m +tur t +ligh thouse +to ol +go wn +) ) +bo ts +auto graph +a id +bu ffe +h mm +horri ble +ssi onal +ann i +๠Ģ +k its +sch i +eter nal +hu ss +sens itive +r u +tast es +chec ks +im o +por tion +sk ate +e den +half time +fri ed +ri hanna +ti se +fl ick +ca in +s gt +âľ Ķ +sh au +sta ined +ra ffle +dro ve +sal man +princi ples +sh o +ar u +je ss +gu ine +gar bage +my an +jel ly +dis ru +z ia +q ld +ent ries +la v +fle w +ad mit +objec ts +comp are +ny times +cann es +p n +suff ol +ro c +d ana +e gg +hi st +coun sel +' ! +phy si +imag ination +ad just +explo sion +plym outh +hor ror +elli ott +bour ne +de x +bre ed +au dio +lob ster +disappo inted +nation wide +( ( +incre ases +austr ali +ce dar +star ing +rac ial +e is +g mt +visi ons +stay ed +discu ssions +de an +cur tis +mai den +stel lar +happ iest +h wy +pre season +car av +mon days +hospit als +glimp se +schol ars +ja i +ter race +ann a +goo se +gra ded +lot us +hun g +grocer y +stam ps +emper or +sc oop +in ser +c as +exist ence +he al +fal cons +mar vel +reduc ing +terri fic +magne tic +perfor ms +bar re +p us +tre ating +ic on +w h +decla red +tra uma +do d +come dian +nik on +bu gs +as m +mont gom +ibi za +comprehen sive +ha s +san ti +fellow ship +da sh +p sal +louis ville +sp y +fau lt +d the +fi led +vi sta +de sc +fe ars +you tu +sp s +es p +ri g +cri me +ber ger +wonder land +k ent +in formed +stev ens +my th +ast on +ir i +visit or +at ri +produc ers +al la +person ally +separ ate +agen cies +af ri +il an +spo ke +n ina +squ ad +di ves +de pend +li v +fier ce +enter taining +cha in +sc at +bor ders +pal ette +sp ro +os is +der by +tobac co +zi o +willi e +ju vent +zoo m +hol y +enti rely +af e +mart inez +be ds +pe a +bull dogs +ðŁĩª ðŁĩ +ib m +ne on +ethiop ia +team mates +plan ting +tw er +any time +for bes +ó n +run way +ner vous +ro ger +p ile +ch anc +apo caly +u w +o i +dr ought +territ ory +br ick +cre atures +go in +w aff +gre n +sou theast +je an +am bul +ed ited +stra p +c v +aar on +ãĥ» ãĥ» +t su +descri ption +kin dly +clu tch +im mer +en or +women sday +or ange +ra g +ob vious +hy der +chann els +man go +me yer +ra ining +ge tty +pil gri +coordin ator +up load +ninten do +don uts +san chez +app arel +j r +zz i +, @ +jeff erson +accessi ble +great ly +e id +initi al +budd ha +par is +ma scot +â¬ĩ ï¸ı +sch war +si ri +sp inning +mortg age +e cho +end ange +ge dly +chlo e +enh ance +kar nat +k ry +explo res +ðŁĴ ģ +af fair +ic als +all a +dar t +dolph ins +diffe rences +squir rel +au gh +dr ones +ell en +re store +pa w +un for +pi ke +hil ton +colla b +consu mers +co inci +out comes +pp p +a q +coup on +li est +si ms +k ho +av es +spo on +pu dding +cor byn +hat ers +ex ams +sla ve +. ! +p sa +app les +tam il +se d +co ke +zz o +lo sange +car bon +cla ir +... ) +k hu +cra ig +explor ation +sanctu ary +su e +al way +demen tia +won ders +super hero +pakistan i +brown s +bluet ooth +lo cker +mar c +ev entu +delux e +rodri guez +âĿ¤ âĿ¤ +ro bb +ðŁĴ ¦ +lin ux +ten s +intellig ent +se ed +vo ter +s ler +pe aks +inter n +teen age +peninsu la +hand ling +ti e +cou sins +wen dy +me e +à¹Ģ ภ+din o +ðŁĴ ° +ðŁĺ ĥ +ze e +s bury +trage dy +b k +bo re +z in +war ns +idi ot +tou ching +contin ental +tac os +saf ari +wa shed +po dium +morri son +fore sts +c bc +al on +partic ular +be ads +inv ented +lo ch +li ghter +where ver +i de +docu ments +a we +k r +no where +min er +st it +ro x +contribu te +har dy +cl an +ob ject +ca it +ðŁĴķ ðŁĴķ +happ ier +vege tables +t art +g ag +nom inee +heav ily +pan ic +j d +there sa +at m +u ph +s fc +su ri +drin k +n al +re vel +k l +avoc ado +nom ination +ma donna +shar on +malcol m +control led +sh ers +revi val +legis lation +shoo ts +n in +comm entary +pro s +human rights +str anger +mit ch +pipel ine +leg ally +th u +gil bert +tol l +gran ted +gh s +ir anian +refre shing +du k +ab i +pri me +jose ph +mo sa +stati stics +produc tions +mer ry +pat el +sa x +human itarian +struc tures +e missions +town s +fre el +ster ing +rat ings +alle gedly +cab in +st l +w ade +fl yers +tri m +promis ing +z u +bal lot +compar ison +free ze +ou ter +great ness +as sign +snow y +r ale +tor ies +med iter +kno ck +consult ant +cincin nati +analy st +sc oo +je ws +appro xim +pu re +portra its +cy rus +ation al +lo ans +acqu is +el u +accep table +uni on +water color +ru st +batt les +per fu +seas onal +ser ial +mind set +ri ot +fel d +enni al +clo set +pri est +tan ks +int l +scre w +bu m +ab dul +ou x +expla ined +ric a +imag ing +law yers +bu ried +ãĥ»ãĥ» ãĥ» +ear l +âĢ ķ +l ton +resto red +stri pes +fo ss +de mands +ste aling +alex is +mun d +ak er +ur us +war dro +hu gs +gen re +e go +Ù Ħ +particip ated +bab es +ban quet +ti ous +he mi +ds b +lo st +milwau kee +jen ner +ge m +ou tra +lo ses +id i +re ps +ðŁİ § +regu lation +fla w +f ang +vibr ant +ram p +ra ins +well being +so viet +vie wers +de po +libr aries +bi go +ser y +g ill +de struction +co z +c x +bri dal +al ds +plan ted +amate ur +lu d +che ering +show cas +pro file +i u +ver tical +pack ers +wiz ard +ski p +s light +be au +air ways +mu ch +re ra +ðŁĮ Ĭ +ab sor +pati o +pack ages +s ells +ment ally +ðŁĺ ¢ +reyn olds +k are +tri bun +wal t +kn it +ta ste +sur rey +boun ce +cre ature +b are +bet ting +su re +mi ley +laugh s +al ore +cy n +t l +arti st +ann ah +war mer +dynam ics +lunch time +mariti me +vulner able +ðŁĴ ĥ +wol ver +dur ham +const antly +am in +si bl +: @ +bul let +k ach +angel o +wil der +doo m +desk top +law suit +k ca +hen derson +inv iting +bet ty +ta wards +ra fa +le aked +and i +ge ms +af l +vel o +mediter ran +pro be +to tten +steph anie +sn ation +com be +q s +over come +assas sin +ra v +fil ip +winni peg +sh il +determin ed +k as +ou tre +regre t +gui des +aa a +ðŁĺ Ī +wi ves +mani fe +er ly +sm y +sh ima +x ing +pix el +jac ob +ac commod +to y +on o +po o +ti er +an swe +ðŁĴ ģ +ro sa +le ase +bel ongs +th ar +eventu ally +nei ther +go a +ski ing +at ra +ag h +broad casting +f ury +py ram +d ice +volk swag +wom ens +provi der +bom bs +miss ile +whi p +d ick +nor we +back up +el der +mat ure +concer ts +gi ous +sque e +good morning +bra ves +^ _ +au ssie +lun a +mal es +he ck +for tn +rome o +steel ers +p n +pe er +re presents + « +kat y +migu el +requ ire +cha ins +l ur +immedi ate +ti mber +âĸ¶ ï¸ı +advoc acy +ex port +an z +tiff any +auth or +ðŁİ Ī +du des +chil ly +hi d +har m +bu g +mon ster +terri er +tu c +story telling +ta k +in ti +immigr ants +b is +reach es +com passion +john ny +contribu tions +ðŁIJ ¶ +mechan ical +impre ssion +ran ks +ko be +men ting +bloss om +pab lo +buil der +bom bing +tw el +sul livan +om o +pe te +de mi +ku dos +w bb +t gif +mass ach +neighb or +che fs +eng ines +pun e +ga ined +phan tom +s days +ext end +gr an +cent ers +jac qu +dat asci +sleep y +el vis +answe red +s lot +con y +flexi ble +ti ally +le tics +% , +andre ws +si ble +mom ma +vin o +do x +invit ational +twil ight +j ade +ill ery +joh ns +f ou +p v +-- -> +break down +billi on +prin ter +mon d +c bc +mag gie +legi on +du b +kur t +po or +paren ting +regi ons +bikin i +be ware +si onal +au burn +kid ding +amp les +sp an +con tempor +c ic +ha bits +ak o +pre fe +bud dies +it z +em ily +person nel +moun tain +ver sus +ðŁĺ ¬ +ear ning +s ink +dar i +u u +s win +i ster +bru tal +n ac +kat a +clo th +am and +ðŁĶ Ĺ +ne o +alu min +week ends +nebra ska +co des +delay ed +brun o +pro ven +in c +i ght +fl an +or o +lam bert +regu lat +w f +massach use +kardashi an +bern ard +fi esta +volcan o +grand pa +anc a +d re +st itu +mean ing +fo am +au ck +at ed +r l +hot el +pers ons +dy nasty +ell or +ma i +am ne +sty ling +avi er +e g +vege tarian +, âĢ¦ +foun ders +sta in +g d +cy cles +sky line +trac tor +exi sts +tra l +kid ney +mar il +inst ag +se tte +addic t +tri angle +flash back +controversi al +z on +p ins +i as +tr ay +town ship +deleg ates +sp am +h ms +cr ane +peop les +o lo +fac tion +but es +on ica +deleg ation +new profile +eli er +mc a +w and +g ely +losange les +ber ke +ti ve +dis rup +zz a +cas a +jor dan +ford shire +ga thered +ic hi +atten dees +à¸Ń ภ+pe ppers +co in +bour bon +ern ity +ro tary +behavi our +jere my +team work +compli ance +tre mend +ðŁĩ § +bu hari +cam bo +bu yers +ha gen +bu ds +bay ern +mon te +sm ells +an za +ath lon +descri bed +work force +gi ving +ap i +invest ments +da il +sel ena +datab ase +th um +mor tal +stu dent +bu yer +do ver +gar ten +att le +loy alty +gen oci +holo cau +theat ers +ru ling +ven us +pat ent +ch un +ab by +awa ke +mass acre +bang alore +break ing +simm ons +ju sti +hal e +ed chat +gg les +haw k +mar king +head lines +stro m +co ve +breath taking +med als +hair cut +christ ine +tele graph +gujar at +ju ra +can e +sho re +propag anda +mu eller +.... .... +sa vi +stom ach +thro ws +ta b +war m +j ong +reno wned +hi r +ra is +mush rooms +guaran teed +bo a +m j +revolu tionary +certi fication +bru ins +jo in +w es +pas sport +c g +sex u +cap able +w v +ton es +jac kets +ac compan +spin ach +fore ver +bla ir +wat ts +g l +cou ples +prairi e +newprofile pic +logi stics +massachuse tts +jagu ar +o id +we al +under water +mo z +y i +ma ths +myan mar +pre ps +suffe red +tr ace +wal i +ah hh +bor g +st itch +cu lin +real ise +infe ction +discrimin ation +sh ame +an kle +hu mid +y t +brac ket +tru ck +tri u +ea ster +commun ity +post card +invol ving +ty ler +car amel +over view +ex amples +integr ity +base ment +instru ments +ani um +at us +gh er +laun dry +achi eve +gen eva +pr icing +hyder abad +beli ef +me ta +j aw +accoun ting +lead er +cristi ano +cou ture +cy p +vis ed +, ,, +k nu +h ick +break er +br am +ra b +mo or +ham as +gradu ating +pupp ies +ak h +ta h +ach es +ri e +op ini +g ta +re ign +tra gic +re ver +p ill +pine apple +tou ches +da re +le ys +il o +inter iors +sc outs +bar t +en zie +don o +bro ck +christi ans +ense mble + · +cine mas +new port +air line +win ston +le igh +cont ents +pre scri +ur ge +tr out +fic ally +il ia +sub si +are r +âļ¾ ï¸ı +w ounded +ðŁĻ Ĥ +pe pper +ðŁĴ ŀ +fit ted +af f +re sur +thursday thoughts +z ero +archae ology +di v +je e +i on +awa iting +co zy +beauti es +bal d +dat a +gri zz +stal k +kin ds +cle ared +jess ic +regu lar +ali ens +plac e +bo s +bi zar +thisi s +ðŁĴ Ģ +totten ham +ma fia +s lam +ari ana +car roll +back pack +care y +uni v +r g +pe p +dig it +tatt oos +ag on +volunte ering +diffe ren +consu mption +ka thr +head phones +t shirt +o b +ele ment +re tail +sh ru +al gori +contain er +consci ous +fi l +com ing +ra sh +u rope +def ine +gi or +femini st +flow ing +rout es +gl aci +fer t +somer set +ant es +twee ps +$ $ +h our +endange red +year sof +ro h +po pped +bac king +ba sil +bra ke +mon aco +lgbt q +pra gue +ut ility +cas si +gate way +haun ted +sch ul +ðŁİ µ +shou ld +walking dead +comple ting +dann y +montgom ery +pengu in +ss i +mer chandi +ðŁij ij +chur ch +h ates +cap tain +brea thing +ce t +fair ly +approach es +compan ion +surpri sing +kany e +pe y +hin di +targe ted +lor ds +de ut +di gging +ger man +ru t +ener gy +close st +y un +apo logi +ภ± +s ack +ru p +dd y +port al +d ough +b ats +ðŁĵ ° +at ur +graph er +pi res +mo tors +ðŁĮ ¹ +j c +dan g +tu k +clu e +us c +pag e +d less +bro ws +ju s +ad ing +re marks +oo m +car dio +ste fan +arm strong +âĢ¢ âĢ¢ +ni est +belgi an +bi op +so y +lo f +í ĥ +q t +flashback friday +ce e +ģ ภ+wre ck +mar ines +amend ment +wardro be +vo y +bur ned +guit ars +ra inf +li fel +ssi l +oun ce +exter nal +c key +me sh +she ikh +inv itation +sugge sti +pop corn +phenomen al +an onymous +tun a +chic ago +o val +del y +loc als +( & +pro f +no vel +fin der +spar ks +la ven +in fu +nic ks +qu ant +ra e +exe c +dist ingui +st ances +mu tual +sh al +unve ils +edmon ton +zan ia +a dio +vie wer +brad ford +audit orium +qu is +re act +htt p +l ero +chee ky +impac ts +ta k +ed t +desper ate +t ay +ì Ħ +sett le +bar gain +resu me +un ite +thro wn +ke st +se ys +mar ching +am it +decl ine +sch ar +me tr +stan ford +lin ke +ber ra +dol ls +rug by +jam i +b or +road trip +dino saur +mi k +sun der +re m +b k +over seas +nau ghty +imple mentation +iam srk +lun cheon +fir ing +mi ami +pere z +the e +z on +gi fted +con version +ceram ic +¡ ï¸ı +pe dro +ì Ĩ +v ick +! @ +he ed +si d +b w +docu ment +pl un +gr ants +fant asy +predic tions +vali d +car ved +gradu ated +ðŁijį ðŁı» +nation ally +ch y +af l +re sso +blan k +ri vals +j ig +e ties +om ics +une mp +b ound +sk o +inspec tion +par al +high s +cri sp +b ans +ob a +[ @ +co spla +costu mes +rec all +mou th +ni gel +b ts +ter a +ko v +do cs +west minster +dic t +gra vity +kar i +ro gue +t ted +war k +ida ho +w end +aw i +queen sland +proce sses +cli ffe +m ick +com pens +op ol +the y +cl ari +wiki pedia +salman khan +haz ard +pre ston +swee test +pd f +che es +tr ilo +south africa +bur nt +( $ +con tain +t p +sub mitted +sound cloud +at u +re z +word press +corru pt +n f +ma ker +í ķ +par as +adv ent +ri al +ca fe +fo ssil +!!!! !!! +co ws +c j +sp ur +institu tions +land mark +ent it +re ut +h is +alz heim +we mb +regg ae +mo squ +st at +identi fied +deal er +re am +re land +ten sion +ðŁĩ © +wra pping +deep er +fr at +red dit +ar is +moroc co +.. " +b low +ma pping +pri orities +ing a +swa p +re wards +conspir acy +creati ve +c j +congre ssional +vau lt +ple x +sophom ore +shad ow +ele ss +ðŁĺ ħ +dar ts +aldu b +anno ying +pro ps +n as +alumin um +h bo +offen se +j ill +oni ons +la ur +ta e +har dest +sh ro +ga ining +meas ure +ed tech +cyp rus +tar a +ang eli +car lo +go on +all i +im plic +ju pit +resil ience +ha il +bal anced +) ... +joy ce +gr a +th eli +defin ed +shi pped +main ly +min a +l m +sac ri +o ber +p im +claim ing +ent ers +co rey +bo k +cri ed +cool ing +dani elle +pharmac y +thor ough +ca ke +k lo +outre ach +z ens +digital marketing +val ent +sn p +her b +mr w +caf é +cap tures +no tre +triu mph +pan cakes +cu mber +spi ke +d ation +bi gg +sp er +crit ical +am al +too th +foun ding +a stro +' # +quan tum +th ames +un c +pri de +air bus +kno cked +un defeated +mediterran ean +cal cu +clo wn +sens or +ham mer +for give +cu shi +ber ry +maje stic +elec t +polit an +g ta +k ari +bur ke +sea hawks +volkswag en +re i +landsc apes +cas u +grand father +list ened +/ / +star trek +rainf all +fur ry +vi er +star k +rif le +ff a +leg es +hillary clinton +min us +correc tly +architec tural +pre ce +up side +box er +ðŁĻĮ ðŁı¼ +is ai +de t +pro vo +tis sue +spoo ky +ve led +re con +prospec ts +que bec +âļ « +ig no +anat omy +shap es +w p +p interest +hor e +an es +pick up +ti p +pra desh +hu gh +co e +po k +gram my +well ington +sti gate +ri gh +lea p +king ston +scen ic +go sh +v ani +au g +s ary +zi er +bure au +lin son +con te +fra gr +all an +g aw +lan a +colli sion +surve ill +ren ais +ar range +s ali +do in +br ance +bren dan +our se +in coming +suspen sion +à ´ +l la +educ ators +in tri +da e +bio graphy +bul gar +villa in +go thic +rw anda +e w +may or +meet up +democr at +mor gan +su dden +te sco +car rot +bom ber +mck in +re ne +fun day +agricul tural +haha h +show time +form ing +col a +scor pi +quo te +po ppy +s life +d az +tu b +ne n +mo t +ðŁĺ » +s ore +elder ly +o ve +skin ny +um i +anc o +man ship +we re +g v +k ah +fol ding +ne at +samanth a +dan ish +uk rain +humid ity +nu tri +jak arta +cand les +oooo oooo +at ile +streng th +i bra +bap ti +charle ston +fr ames +girl s +clear ing +glu ten +# # +super natural +ju bi +ph one +he in +dr un +le ak +invest or +y er +dom ain +ball room +mi sh +app li +off shore +bla ze +dor o +âĺķ ï¸ı +win ery +shar if +ad ore +n ir +saf er +si gh +as cri +strong ly +trac y +ck er +ol l +faith ful +ey ed +deli ghtful +vis m +karnat aka +tit an +wh ar +jer seys +re fur +heav en +gri p +pan ama +pre li +glu ten +o dd +cont ent +pon ti +tion ing +e commerce +feder ation +flaw less +ge ar +ti res +by r +pol ice +cu ban +tri butes +tic ul +chur ches +nur sery +di aries +muse ums +snapp ed +i van +wi ght +touri sts +ramad an +t rent +prophe t +won dered +focu sing +hi d +ic ons +i q +ambul ance +pi st +fun niest +time less +sr ilan +bu ys +ki ds +colour ful +a shi +ch ir +mu m +ðŁĵ ļ +let ter +x en +reut ers +pre serve +in ting +ste p +fu ji +uni ver +i u +show down +po ems +surveill ance +suspec ted +ta e +sol ving +tom b +mother sday +car pen +recru it +pil ots +bro c +mix ing +fri days +ty r +represent atives +tra pped +abdu l +free style +clu ster +âļ łï¸ı +k d +sk ill +pit t +ex o +commer ci +muse um +loc ally +g ina +no bel +immun e +fr ac +cap su +main ed +attemp ts +bull dog +be spoke +sing ers +sp elling +seg ment +nat ures +tic k +lip stick +clean er +gett able +preci sion +âĢ¼ ï¸ı +th ood +re ef +no pe +bill y +di gi +mu si +ri val +figu red +tal ity +sun ny +ber k +aw ww +awa its +un real +co pen +asy lum +ex otic +bu en +mo ck +en able +arch y +fr a +pla stic +al mond +amp li +displa ys +abbo tt +s me +x p +ðŁĻ ĥ +graph ic +i ved +mar a +cau tion +lea ks +en berg +ul u +unic orn +cann on +appren tic +ðŁĺĺ ðŁĺĺ +b ball +wil low +at ics +am as +manufac turer +campaig ns +port ers +flo ors +l su +ty pe +ke j +honor ary +it im +to le +min ecraft +d x +ma sh +ri o +consequ ences +ron ald +go ssi +suffol k +mu se +r bi +live music +i van +ðŁİ ¤ +le u +patri ot +man it +lan ca +home decor +de ar +sig ma +ti de +str ings +v ita +sequ el +try na +inve stigate +bor is +ve gan +barri er +mind fulness +web b +hu stle +in da +tan zania +str ay +tex as +c ag +diagno sis +wom an +g w +ob session +l ative +nu fc +fl ynn +moment um +sof a +wal d +vege table +tu cker +supp er +se ab +ar ro +se ag +ven ting +counc ill +sp lat +cal cul +.. # +com fy +odi sha +sto pp +war fare +ca es +à ¨ +co y +price less +in sec +ðŁĺ Ľ +contro ls +empower ment +datasci ence +per pe +gen ic +e res +tru deau +man o +sla very +expand ing +ma he +fa iling +s aga +photograph s +cre st +re on +surf ing +hi e +ðŁį Ģ +ja e +fel lows +south ampton +sol om +ce ster +tab ility +hor n +se ct +he e +cole man +at las +explo rer +consul tation +copy right +organi zing +den ied +mon keys +noo dles +br is +fl or +dou gh +bon ds +sho cked +eco system +care fully +w m +apart ments +cur ve +san diego +must ard +comm en +cere mon +e ch +ru th +ðŁĻĮ ðŁı» +hawa i +fil med +te ar +as ingly +ca ir +wat t +instru ment +ou tta +ye ol +river side +ë ° +. : +nor wich +alo g +migr ants +new man +ri de +spr ink +targe ting +beli eve +tor ch +reflec ts +per mission +ff man +ene mies +bas ics +se ized +sun days +le i +hass an +en do +h c +st ad +le ments +kk kk +nan o +shar k +man a +on ic +treat ments +ear ly +collabor ative +shu ttle +bran ches +mis ses +mained cm +ap ers +ky le +carri e +leis ure +sh et +bir ding +adv ances +ðŁĵ Ŀ +popu lar +di ane +a be +re war +neigh bour +k pop +remem brance +play ground +ru b +krish na +e bola +inqu iry +ep a +lu min +organ isation +abra ham +norm ally +pre ten +jan et +w t +ðŁĴ İ +encoura ging +a stic +bu mp +syd ney +s z +ss ss +gar rett +ðŁĵ » +consul ting +roman ia +spo tting +chanc ellor +ar ma +presti gious +ðĿ IJ +t ad +cry st +compe tit +rati o +cat aly +bro w +j ur +vi king +commu te +y day +la yers +du mb +esc al +genoci de +f ill +gu pta +ste pping +se i +fo to +wild cats +col i +projec t +ear nings +st r +ge ons +comple tion +b m +decor ated +craw ford +af ghan +sc are +visi bility +hi b +direc tion +stro ll +christ ina +alter nate +cl are +sty list +be hold +s ance +leop ard +acqui red +narr ative +ash i +the a +?? ?? +pe as +at ch +sli des +le en +renew able +eng lish +qu ir +co aster +r x +fo ols +match day +mis m +amaz ing +z ig +ke ting +won t +to wel +di ab +sta ke +n m +mel t +e than +gra pe +polit ician +sm en +í ĺ +re o +wedd ings +cat cher +or acle +me mo +ðŁĮ ´ +ec k +rob bie +norwe gian +oper ator +am or +se wing +ju l +x ie +u v +fif ty +me ga +tatt oo +liber als +u pri +traffic king +richard son +su v +ki p +mess y +tremend ous +gl ou +cour tney +la d +stere o +my ers +i dio +^_ ^ +man ning +dy e +w d +thr one +jun k +as u +provin cial +k ook +wr c +fine art +hamp shire +renais sance +b red +fall out +s j +sn l +al am +tor ture +fy i +sh ines +pa w +ch ar +hen ry +c row +aci ous +di an +pa ige +ba re +stock holm +scen ery +ðŁĩ · +jef frey +pu sh +decor ation +ne d +cu te +brig ade +laven der +inv ites +e sports +vo ir +dri ed +tran spl +sur geon +no vels +pul ls +son y +lun ar +man e +i vy +fru str +dor set +sa i +tor res +ssi on +shut down +suggesti ons +writ ing +e o +battle field +u ga +ðŁIJ ¾ +vac u +spl ac +g it +u g +high land +% ) +mer maid +sacram ento +ta ils +p w +ka h +t ell +enh anced +ì ķ +auck land +cru el +ðŁ¤ © +au dre +sail or +gram mar +g love +de on +infl am +fresh ly +k ell +zi p +christi e +mil d +di xon +instru ctor +g ence +ãħ ł +sub jec +constitu tional +crow ds +in visible +ru ins +da k +si p +pla que +p ouring +comple x +z ine +ste ad +f let +trans mission +lo way +ar un +incre asingly +au d +transp aren +cro wned +sc oun +blizz ard +lux u +fi ers +achieve ments +hun ters +rock ed +bas in +vio let +pro ves +achiev ing +pro sper +se ga +flo at +vi an +xi v +pol ic +tur a +approxim ately +wander lust +keep ers +geta way +co d +pol is +br yan +col ts +tal ents +yo gur +gluten free +wri st +gr y +cze ch +ðŁİ Ī +ev ille +ðŁı Ī +to x +dani els +am er +bi ds +weare one +me tab +g t +boy z +pd x +pos session +pu shed +shr ine +reali stic +tri gger +na vi +ru mors +n af +jen kins +tr un +comm uni +Ã Ĺ +gam ers +arm or +moham med +bal cony +y ah +stron gest +rhy thm +unfor gettable +k p +ho bb +custo dy +greg or +r ita +aes thetic +il ation +sponsor ing +n ay +kid napp +sh s +ra jas +me g +signific antly +butt ons +la c +ver sions +essenti als +opini ons +k ro +d printing +wi dely +d k +ur an +y al +reque sted +c n +cur ric +plu m +gr un +v m +dev on +m yo +rel ation +juvent us +rou ge +min ority +min es +jupit er +n ine +oxy gen +fran kie +une sco +fab ric +disgu sting +sal man +dete ction +lan ka +d ac +ðŁĩ« ðŁĩ· +argu ment +shel ves +cel tics +rober to +pi gs +he dge +fau l +pow ering +butter flies +fi r +re make +att i +com o +emp ha +kend all +poke mon +se ating +d ans +bald win +ðŁij » +lesli e +one direction +ti mber +im an +fon t +e der +di on +ste ph +for mat +gre gory +pro p +he x +ru in +sor y +inf er +n aw +bar ak +sd gs +kar ao +lu sh +v ander +end ent +g is +a fro +soc cer +ay an +t uni +lun g +da yof +alex a +mar ath +addic ted +ag ile +hy gi +light weight +ì § +mand ela +jo ey +anc y +hu m +bi r +memor ial +jim in +ging er +v ak +jav ascri +cro ps +orig ins +d ari +pi per +im port +aggre ssive +predic tion +re pairs +cr acker +voy age +ni ke +mu mmy +linke din +country side +bor der +gla ss +per t +s als +sho e +autograph ed +wal nut +colle gi +sal ary +pa iring +ðŁĮ ¸ +cath ol +swee the +defe ats +streng then +roof top +impro vements +barri ers +ur u +t ally +ru led +ðŁĨ ļ +nai ja +emo ji +per cent +gi o +pro bs +on ce +adm its +pa ths +li ar +day tona +pe ters +cal i +cal li +mu g +o sa +ap h +ab y +hy de +eth nic +pla ins +ol f +haha hahaha +holi c +?! ?! +su bli +bl acks +mo t +gh ton +lo vin +b rent +bar u +l ati +de w +ate au +q a +pain ful +bu sters +st atic +ðŁĩ¨ðŁĩ ¦ +note book +out fits +si es +r f +floo ds +Ñ Ģ +thro at +su ici +ro vers +beng al +pre pares +blo g +mini ature +Ø ¨ +am phi +com b +r sp +in timate +green e +Ì ĩ +al tar +surg ical +ves sel +... ? +gav in +g ator +threat ened +z ar +rob bery +di er +promo ted +y g +x s +su bs +inter viewing +threat ening +do zen +me ado +water fall +nintendo switch +cal um +mini sters +dro p +univers ities +war ned +tac tics +ðŁĩ ² +refu se +ad ju +v ast +ðŁĺ ´ +mc fc +lib ya +no filter +distribu ted +re ser +ron nie +de co +javascri pt +mon k +intere sts +fle x +mar tha +sti es +oo d +ðŁ¤£ ðŁ¤£ +e un +b ali +g omez +sti mul +moder ate +d ity +ir is +stra w +consist ent +direc tions +adop t +sal sa +cro o +reco vered +black friday +lan caster +accep t +weareone exo +buil ds +free man +air plane +diti on +bel ong +jam ie +pit ching +li f +om in +cri spy +pre pping +ve g +chan g +accompli shed +graci as +dolph in +elec tor +culin ary +super bowl +wal a +pur suit +black berry +be an +cardin al +pro ved +immigr ant +stric tly +holocau st +pass age +ha us +cou p +pur se +har ass +< < +le ed +ado be +st ad +legis lat +par ked +pri yan +sil va +kri st +s the +fun ky +ig a +sett lement +ph s +t mrw +stre ssed +hun t +ho ckey +treas ures +cham bers +ol u +hu t +mar ley +tex ture +wilder ness +mm ing +poten tially +om aha +ju dy +to es +spo iler +distingui shed +feli x +ah u +recommend ations +zom bies +hit ler +tri ple +colla pse +motiv ated +ulti mat +gg ling +so y +ci gar +fo ren +vine yard +gl itter +fin dings +colon ial +hun ter +eri k +den s +beet le +lot te +sub tle +s matter +tru sted +experim ental +nam ents +ðŁĺ Ĩ +regi on +acquis ition +bre eding +quarter back +am reading +oo td +ru de +initi atives +st out +hy ung +out come +al fred +mic s +exper tise +bacter ia +pengu ins +jump er +valen cia +bar k +ing day +sell ers +contrac ts +hou ston +commissi oned +adap tation +swan sea +santi ago +common wealth +ju dging +sub mission +sco rer +tom my +ñ o +ex quis +fil ing +explan ation +alli son +wemb ley +ri dge +chev y +san tos +own ership +cogn itive +favour ites +sh ed +phil anthro +dele ted +go dd +s nor +gui delines +ff ing +je ep +cli ps +sw amp +an or +guil d +bol ton +spring field +munici pal +goal keeper +ye on +ðŁĺįðŁĺį ðŁĺįðŁĺį +ãħĭ ãħĭ +water front +gra ve +contempor ary +ar ity +ÃŃ a +sle eps +sy rup +al am +pi re +co yo +moto gp +ty son +kej ri +cir cul +sing ly +cr unch +complic ated +nostal gia +k op +mo ve +k ale +mac ro +mid west +h ans +tri bal +nu de +௠į +bey once +congratul ate +cat er +leagu e +ðŁĻ Ĭ +la dder +cra shed +tech nic +karao ke +harass ment +ro ts +experi encing +kri sten +ðŁĩ ³ +ðŁ¤ Ĺ +reflec tions +guin ness +illustr ator +ðŁĻı ðŁı» +cen ter +nar row +comm ons +regul ations +Ù Ĩ +har m +cro ft +cu ssion +hong kong +st ical +intern ship +zo e +cho p +hoo ds +estim ated +batter ies +berke ley +smooth ie +shau n +cro s +~ ~ +cam pe +hu mp +b g +proto type +cl ick +shaw n +re viewed +tem pl +p f +jed i +blo gs +ray mond +as th +ba h +av ail +scot ch +leaf s +nik ki +to k +hol low +ur ges +of t +un like +lat in +u e +cat ering +mil i +alter nati +ma ver +Ð ¸ +ag le +pre order +lu x +cu cu +ðŁijı ðŁijı +t art +âĿ¤âĿ¤ âĿ¤ +arab ic +rapi dly +ar rang +all en +travel tuesday +pa ws +flo ws +st ability +flu id +ca pp +can berra +uu uu +sp ani +demon stration +m la +plac ement +m w +presi dents +awe som +bever ly +ani st +ne al +father sday +referen dum +la hore +o aks +deb bie +half way +gho sts +de bor +matthe ws +fi at +t fw +pre sen +rob i +de d +bro ck +laugh ed +am ounts +bam boo +kinder garten +eat en +mtv hottest +break out +u sic +fra ser +legis lative +p ang +modu le +sam my +go ver +ear ns +expe dition +gar h +concep ts +char lie +la va +bachel or +veg gies +deter mine +el lie +un locked +fru it +dal la +cou pe +wash ington +depo sit +iv ory +pau la +chic ag +gu cci +ðŁİ ĥ +cul tiv +pier ce +li fted +stu mb +re cover +musc les +conduc ting +cb s +mcla ren +sophi a +cel lu +oce ans +up loaded +game play +mal dives +kim ber +avo i +rac er +ca ine +cav s +h ana +li ga +ra ven +inter vention +inaugur ation +oo h +at traction +merchandi se +tune in +li king +juni ors +int ended +att acking +aqu arium +i wd +comp onents +sur ing +cent u +yogur t +ðŁı ĥ +show room +op tical +ty our +ju dge +yi eld +an to +pl c +transparen cy +recy cled +chi ef +ar om +ambassad ors +plan et +âĿĦ ï¸ı +om ed +vaness a +cour t +mar gar +hal ey +v r +reg ina +pd ates +hi span +live stream +âģ £ +ya hoo +gal la +secu red +w ir +bene ath +off l +n il +am b +ye g +out let +u te +pe ep +lind say +bent ley +... ! +he el +trilo gy +vo s +ty re +there fore +tor onto +ab i +simp li +ja e +exten sive +eleph ants +s or +orient ation +im peach +re play +constru cted +peter son +pa is +por ted +custom s +colla p +ad u +high lands +sal em +shel by +ko vic +stra in +ro sie +sen ators +snap s +bo bb +suz uki +bla des +k p +lo lo +gener ate +si ght +ma e +struc tural +predic t +jump ed +ah mad +sun g +just ice +gla m +vol vo +jubi lee +de tention +lo sses +pu ri +every time +Ð ° +ra o +ed ge +li mer +rese mb +har old +re tri +sacri fic +surpri ses +am c +srilan ka +bar bie +men s +fin n +ag s +ukrain ian +em brac +î IJ +flav ors +hom er +lau re +ou th +pr iced +ver de +fir m +ah s +cu b +tre y +par anor +pro fit +in dv +who a +har sh +al ot +crit ics +hu bby +fi gur +gi ra +ca stro +chan el +in put +origin als +ten ant +yy yy +ture rs +lincol n +co on +lear n +ch ou +ac are +o les +din er +hy p +bizar re +mc r +let sgo +decor ating +ðŁĮ İ +al ison +ar vin +f d +reha b +mccar thy +lot tery +da h +minne apolis +eli gible +diagno sed +emer ald +destin ations +s ans +or y +bla zers +n v +ba il +digital art +no c +mal ta +sol ar +pi pes +alleg ations +no ck +po pe +bri d +premi er +n x +present ations +ef a +bo ws +val ve +opp onent +Į ë +visu al +ing le +cate gor +e ter +po is +dan i +at tract +neu tral +th ene +cra shes +fred die +ut ili +c st +awak ening +slo ven +quali fy +pro of +fair y +le v +fre ight +enjo ys +cup cake +flav our +â ķ +protec tive +ðŁijı ðŁı» +is u +ad mir +h mmm +continu ous +ai res +rap tors +showcas ing +y uk +pa ste +follow er +instru ctions +sp ru +@ __ +the o +debu ts +ve tte +sto w +es of +ach ed +sul tan +sand wich +som alia +franc o +car ne +flu ffy +al pine +jas mine +he ated +viol in +ple ss +divor ce +per former +phi es +port sm +dar a +kir by +lo p +chill i +for th +sky pe +ðŁĩ®ðŁĩ ¹ +celebr ities +ed y +ve e +po ison +ey el +gra bs +ssi c +un o +wester n +rail road +am er +numer ous +s v +fo w +fi st +âĢ ĭ +reque sts +mar tial +em my +accept ance +lau ra +ภ´ +er up +hyun dai +out lander +u tt +wrest le +esp resso +demand ing +g dp +geo graphy +sas kat +tro ll +confe der +su es +se m +be ts +t ful +to sh +teach es +col oured +gal way +mac y +dis orders +bb cra +at em +fen der +lit ter +e sh +provi ders +renov ation +nomin ate +ps g +nomin ations +jen na +shar p +some day +z ur +bra ins +che shire +pre y +hu go + ¿ +to ken +r v +car r +tac tical +zel da +kay la +fern ando +photograph ers +j our +umb rella +woo dy +congress man +du mp +le vy +ju an +d azz +sign als +la in +an u +mic hel +por ch +al den +sibl ings +y ale +pe el +sw ick +gg in +ll c +k ale +s con +il d +pat reon +re el +qu in +wit t +mar ty +moo dy +ton i +der y +g ators +speci fically +dd in +ly on +tr ick +meado ws +p j +bor gh +vi k +tu r +bron x +pu ff +lan tern +ðŁ¤ ¦ +g ently +be stie +fac t +refu sed +fas ci +mp y +ðŁĶ µ +cross over +mead ow +indian apolis +duc ation +sle y +loo m +mix er +new music +film maker +prosper ity +li m +week end +cre amy +neu tr +lu ther +h v +nor thern +tw o +h ra +cat ches +appear ances +ha bit +kitt ens +n v +illa c +inf an +regar dless +liz ard +dun k +cur tain +ac om +in tu +ve z +e min +fl ats +calend ars +em power +ru ined +hun gary +vi d +we x +u lum +aber deen +o sa +k t +ma ssi +se emed +s den +' ? +tele phone +de fi +insp ires +me ow +z ones +bl ind +pl y +tuc son +advent ure +ge d +oy ster +ðŁijıðŁijı ðŁijı +out put +tt t +metal lic +sma sh +ucl a +sco ts +perfe ct +lu cy +regular ly +sp ic +rel ative +ath ers +mis e +batt ling +deci des +mat a +occu pied +random ly +cat softwitter +gi an +ball y +al ties +al lies +im men +sy rac +ðŁĴľ ðŁĴľ +l lan +au r +k ut +lam ar +affe cts +n ra +star war +ðŁ¤ ĺ +sc ram +en chan +pro cess +luxu rious +ar ray +sher lock +comp ati +dor f +stre ss +m su +s with +sal a +sof instagram +fo il +under stood +qu ay +r p +c ade +ja w +en ab +en coun +ðŁİī : +do ck +satur n +mu ll +lay out +ra rely +happ ily +fix ture +or ph +over looking +her bs +m itt +pil lar +nol an +pe tty +str y +u i +mu k +o res +o vers +á µ +re creation +we sley +ri t +kejri wal +sto cking +g v +subscri bers +moo se +ma e +ber t +opp re +assign ment +u ro +high lighting +cal vin +we igh +cambo dia +av on +ke m +dis abilities +read y +char gers +p ads +iz ing +illi an +tru ste +col leges +associ ates +alban y +mil ton +cr on +bu r +har dly +si ghts +anti ques +e cho +surpri singly +ha iti +cap t +ph p +op io +ine quality +equ al +ken y +sch mid +autograph s +ren t +qu er +cit rus +challeng ed +te c +epi de +fe st +z hou +li me +citizen ship +cry stal +convin ced +mess enger +copen hagen +âĿĹ ï¸ı +war ran +develop ments +ï¸ı âĥ£ +fore x +hi ro +sne akers +xi de +vi va +stere o +bat ting +ss el +ho st +beng al +critic ism +q c +cr un +attemp ted +ry e +determin ation +cre ations +d read +label s +pos se +anc er +joh an +si ster +partner ships +les bian +k st +guaran tee +bar o +fix ing +ma son +m ous +chem icals +t less +bio diversity +par o +bhar at +ac ol +refu ge +en te +t iti +dys sey +respon ds +lef to +in er +se vel +rahu l +ol ine +frank fur +cho reo +enjoy able +c to +strugg les +wood land +heavy weight +gen s +rece p +ac cred +ðŁĺ ¡ +trans formed +list en +at op +n k +sur ge +be re +gover nor +prison ers +clau de +t ill +mu lator +emo tion +water loo +star t +ðŁĩ º +clean ed +grand mother +fear less +afric an +astron omy +ðŁı ģ +ภĻ +the world +su itable +anth ony +k and +tt en +meaning ful +disc lo +jaco bs +à ¸ +tom linson +ghe tti +ty pho +sub stan +as co +te k +nag ar +mu d +am on +vacc ine +f ty +fle sh +no el +infl ation +portu gue +glam our +tra m +v re +te qu +roun dup +w yn +rejec ted +mosa ic +si ghting +cal f +o ta +com position +go pro +gonz ale +e ed +b ard +tu e +effec tively +we en +al to +ri bs +rel ate +thir sty +fu rious +di m +ch ard +perfu me +s ny +chur chill +k of +master class +wa ve +ðŁĶ µ +er in +own s +to be +sk illed +te m +go f +en i +tor i +cra zy +l ick +resi stant +ici al +ag ar +! : +g ali +del aware +bl itz +koh li +pu ck +avail ability +hi malay +influ ential +cro chet +victor i +read ing +ho bby +vie t +j as +en gra +sk ul +ðŁĩ² ðŁĩ +educ ate +tech no +distric ts +blu es +se tt +seven th +lear ns +ee ee +apocaly pse +hang out +cru el +mu tu +bru h +hel en +she er +c tion +kle in +tex ans +ce real +sh ine +ne red +gra s +am bro +f ella +hin du +matthe w +li ma +mir anda +je wel +so ho +euro vision +neighb ours +chand ler +be sides +ðŁ¥ ° +ast ros +thu mbs +ren ault +ra ve +hi red +ðŁĸ ¤ +it ary +z or +bla zer +k ine +ea u +kat y +dc comics +pe c +ro dgers +water proof +kill ers +super int +pre serv +as so +brew ers +promo tional +sc am +villa ges +sket ches +ju icy +for life +au dit +so lo +fundam ental +len e +philipp ine +t end +conserv atives +sponsor ship +dd le +a ine +h tc +os i +hul k +w af +ภĻ +evalu ation +ant ine +sle e +robert son +roo sevel +ag i +sophi stic +emplo yers +bubb les +ko wski +inter action +sh u +bou le +ic an +j are +han k +leg itim +k nicks +kar ma +recei ver +per ks +u h +sta ir +sun i +labor atory +gra ves +voc als +oo t +c ture +thri ve +tic o +ãĥ ³ +b w +carto ons +mcdon alds +dra w +y ung +pl er +li d +eth ical +groo ve +ent a +international womensday +pat ron +wor ries +ðŁİ ħ +ðŁij ĭ +ka therine +di az +tor i +bach chan +tru st +min eral +ic om +buil ders +bor n +col oring +lat te +ca se +revolu tion +tra der +ox id +chi pot +inst antly +sou thern +se hun +pro b +her nandez +lis bon +hu awe +p ong +me a +ro oney +wheel chair +ke en +be tt +cor in +regulat ory +di splac +ka ren +sch em +sun sets +wh ales +remin is +he p +hi de +mar cel +pand ora +do yle +th fc +ot to +no kia +trans gender +ko v +hawai ian +sha ve +so vere +exc er +nick i +pu g +st or +ro th +wee t +leg al +dig nity +po w +hom age +ðŁĩ³ ðŁĩ +s re +can on +la x +wo ah +quart z +ñ a +gree ting +flick r +nai robi +advoc ates +an c +vi i +eu gene +th ra +c re +el an +pen sion +th letics +ton i +re agan +x v +sto re +ben ch +har lem +todd ler +sent enced +âĻ¥ ï¸ı +glob ally +che aper +u f +ma m +nic o +ik u +tho u +ni st +dam i +th ala +rho des +sal e +bow ls +â Ī +las vegas +sanc tions +adm ire +mat ched +un able +travel er +ele ven +straw berries +âĢĶâĢĶ âĢĶâĢĶ +stu dio +jac ques +im s +valu ed +s no +cheese cake +n xt +e os +s x +f x +ton ic +hat ch +chic ks +gra ds +hand ic +r ory +as p +ri pped +denti st +n en +lu fc +âľ Ĭ +di ge +hop kins +sher man +f da +for all +ash ley +str and +h y +liqu or +buffe t +ess ence +phar ma +suri ya +ðŁĴĻ ðŁĴĻ +festi vals +z an +re fresh +pur ple +uni forms +kenne th += ) +as an +hel sin +transform ers +k ali +person alized +chal k +bo bby +â Į +the mes +depar ture +prin t +illustr ations +qui et +agre es +gri ff +Ø ³ +m iti +toge ther +conven ience +ab ar +car lo +turt les +info sec +some what +ar lington +scholar ships +emir ates +mu ms +st ella +auton om +fe ather +g ore +nom inees +fragr ance +Ñ Ĥ +w ong +thea stern +gr e +z illa +is i +bump er +go o +do zens +ab duc +âļª ï¸ı +o ils +don ors +sil icon +i pod +fortn ite +ðŁĴ ¨ +tor o +spark ling +consci ousness +pal a +nu m +moun ted +ffin s +thi eves +team mate +pra b +om er +ta pes +bo d +mit su +ste w +e re +p bs +tu sc +lo we +ra de +parliam entary +h m +ed gar +ðŁijĩ ðŁijĩ +to a +a gh +hon i +s late +ge ek +ap t +hard t +ta p +horiz on +grow th +make over +hi l +paper back +id an +reha bil +gi u +possi bilities +let tu +fran co +bo ss +ach er +does nt +mo e +ta ker +huss ain +ml k +di l +th ia +ham a +real ised +raven s +curric ulum +m ith +k night +ted x +r v +isai ah +cumb ria +birth days +f ing +pre z +mu barak +exquis ite +clear ance +y en +par i +ev o +à º +modi fied +app lying +imple ment +disco vering +chap man +indie game +dis k +crowd funding +mach in +li vel +sty led +âĿ Į +ma king +rehear sals +nutr iti +subscri ption +and ro +cre ators +car ries +ky lie +cam den +appren tice +tax pay +c ca +tuesday thoughts +pis sed +er man +dete c +freed om +mer i +.. ! +psal m +sun light +per spec +be ings +book store +rock star +fun ctions +p ence +fav es +z n +obam acare +sp ill +coven try +pi geon +pi vo +ba it +kol kata +av al +don or +wa h +privi leg +tra ditions +rajas than +ten ess +portugue se +yn es +tack les +de fic +tor n +pol ling +thor ne +in a +bened ict +bar ry +cal ories +ver dict +save the +nor ton +off ice +main stream +impro ves +fr on +respon ding +real tor +scotti sh +de clar +r l +shi v +supp lier +re sting +swee ts +qu i +. âĢ¦ +whit ney +startu p +thank you +teach er +h alls +ha ve +hand made +pro ving +quar tet +ro chester +li an +virtu al +mend es +of icial +mid lands +x box +meas uring +o vo +accommod ation +bri des +collegi ate +intellec tual +in car +ni ag +ðŁį · +sf w +coco a +co ats +civil ians +presi dency +mat rix +sweethe art +tri athlon +wag ner +ra dic +plann er +the o +execu tion +k um +the walkingdead +sc ar +ro tation +blo gging +bom b +re son +bb les +st are +assi sted +e do +brand ed +war nings +thor pe +acknow le +satis fied +sho res +ri d +dor a +phys ically +bi gh +appro ves +ha h +ric al +vers atile +pret end +lu m +ab hi +ye e +sp it +ãĢ Į +dj s +ash tra +j t +ven ues +gram mys +cy clo +tr acker +over watch +repl ica +el yn +nr l +lind sey +hom o +ballo ons +kitch en +si s +am os +ende av +ðŁĴ » +a rec +thu g +hoo ked +hr c +new york +bur gh +americ as +patric ia +ug u +ap athy +ha st +psy chi +cor k +petro l +ðŁİ ¬ +ak u +po pping +psycho logical +au x +g ma +cad illac +wa ste +auth ent +bri stol +nam e +que er +to ber +jer ry +com in +ch ant +privileg ed +op ar +lo ser +tex t +mar ker +stri es +equ ally +ak i +christ mas +gare th +ble w +em ma +imag in +se als +che at +conditi oning +j ana +ren s +dar ies +o asis +disc ounts +coun cil +i ka +shir ley +vou cher +al ps +w x +q r +dri ft +attemp ting +ut c +Ø ª +gonzale z +m f +jo ker +paralle l +pa re +aspe cts +proce du +n p +am a +rale igh +bright en +gu ire +radi ation +cre scent +ho b +il le +str and +v ore +n ard +che st +di wali +av atar +al der +d ling +pa thetic +ðŁĴ ĺ +spir it +jor ge +film making +ðŁĻı ðŁĻı +challeng er +b j +down town +ht ml +ade qu +twi sted +in ely +( ' +wra ps +oper ational +y ne +n us +mag net +market place +health ier +snap shot +dam on +inter ven +fe derer +ow ls +biscu its +j p +ro deo +blue berry +lec tion +fron tier +summ ers +re yes +pede strian +go l +caf fe +refur bi +bou lder +me ghan +speci alty +la ss +e i +suspec ts +appro x +rr r +ra th +st im +cru shed +he d +wh un +lo af +cr ore +river a +gene tics +so ck +wa sted +ny pd +answ ering +do ve +bel la +ol in +du n +fi ji +pre tty +spar kle +y un +j d +euro pa +li fts +am ber +mu r +te k +boy d +roy alty +in do +ri b +go tham +ti est +inst alling +ke mp +the photo +cos mic +) )) +whole sale +loy ment +eas y +su ing +sett led +af p +pro ver +suppor tive +re es +ne ath +deli ber +c é +wel come +pic oftheday +new born +pat ty +sun s +si est +fl int +diffe rently +spo ilers +troop er +g ins +cor y +look out +equi pped +ta pe +to by +resear cher +u sh +ke yes +al ma +induc tion +k w +k har +sl ick +bri de +e ur +cra ving +book ings +ch es +tr unk +vern on +sp her +cryst als +rel atively +pom pe +uni ons +val ley +par a +w ant +ok c +de af +ser gio +len non +sh ay +cr a +v at +he e +t we +liqu id +pol y +ðŁİ ģ +b ent +be aring +motor sport +bar be +te sti +han i +fin ancing +astron aut +water colour +ri sh +comic con +gar t +wr ong +ber n +it an +ste pped +fil ters +c low +me x +dem ons +all o +expand ed +comm and +et ers +go ats +si ri +y r +pot tery +mari on +i le +el an +san to +person a +du ke +hom eless +li ghted +wheel er +chang er +cab bage +sur real +ham burg +sma shed +str an +k not +i art +ob i +be dro +di al +th ick +b ingo +fu s +vacu um +con ve +ati ve +accur acy +accoun t +re fer +ri z +spider man +ban a +r ite +u b +ab s +medic al +lin k +si em +> >>> +be tra +g lowing +re actions +pupp et +spa ghetti +ang s +re medi +pray for +roy ce +char lotte +£ ï¸ı +gh et +affe cting +ro de +soci alist +mo ses +az i +o it +re porters +cd t +ap ing +s nat +minim al +wa ist +sie ge +>> >> +ri g +schmid t +h are +ec a +thor n +he mp +es the +cly de +th a +don ut +moham ed +ling erie +le gg +carpen ter +perform ers +de a +imag ined +cur se +la sh +ct r +agu a +ro ar +gr i +ro le +j fk +resur rec +roosevel t +maril yn +sm alle +will is +wa ited +char ities +the res +li k +origin al +car i +c ough +cru ci +la gun +contra st +k ou +arm our +re moving +t ent +maz da +bri ghter +thi ef +cor ner +tequ ila +buzz ing +al bi +p am +az ure +disc oun +pixel art +possi bility +ham ont +tra des +bu da +hi ve +vers y +fin ch +tran spa +em i +terri fying +in qui +g ba +sub stitu +collec ti +plac ing +cin dy +k ann +pa tho +diamon d +mour inho +guine a +anthro po +air s +pu mps +ì ļ +pas o +cur ling +an ita +resi dency +ne wh +jo on +cigare tte +que ue +ex trac +gam es +spl en +ex press +public ly +bon nie +tribun e +ba ek +reason able +c or +timo thy +she eran +Ä ± +f dn +su tton +concentr ation +carav an +x avier +al ger +cy lin +freder ick +ner ve +pe ak +lettu ce +j ail +pre game +kav an +up graded +eco logy +squad ron +gra pes +goo g +pa stry +ðŁĹ £ +ãĥ¼ ãĥ +mil ano +awa z +presen ter +ðŁĮ ¿ +her d +king s +tem plate +fl our +h v +k ley +i ya +spe c +at er +frankfur t +co ch +tex ting +del i +communi st +regi ment +ele anor +anticip ated +ðŁijĮ ðŁı» +thephoto hour +ran o +survi ving +simul ation +daw son +ar in +aqu a +m or +âĢ¦ . +cin o +ira qi +sh az +dun dee +we s +dra u +hann ah +s news +occup ation +ste en +x m +ang les +sett ings +gur u +kno x +or ca +shap ing +w ent +dr illing +zz ie +br i +kis sing +fin d +ma ine +âŃIJï¸ı âŃIJï¸ı +ðŁĮ į +lar ry +bu sted +ta vern +acti vely +- " +replac ing +no d +un lock +. " +âŀ ¤ +affili ate +to w +l n +happy newyear +di f +j m +green wich +contro versy +daw g +con dol +sav annah +compens ation +touch down +te o +amb itious +embro i +convic ted +iart g +bar ack +tr ance +testim ony +au dition +thum b +my ths +be x +que z +orch id +den y +entit led +hoo d +gr ant +in box +blue jays +r illa +smalle st +bur den +in famous +divi ded +boun daries +t ter +el t +wy oming +be verage +me sm +one ws +budd hist +y ana +as sad +is ms +bar rett +predic ted +back to +tw it +e there +cap tains +escap ed +ay o +lam borgh +gard ner +la ps +k al +adverti sement +insec ts +na po +am en +ac y +r and +g k +te h +k athle +tri dge +pan cake +at ro +pyram id +bu la +paral ym +gau ge +en cies +tom y +biscu it +but cher +quali fier +coun ty +ke i +po ols +dar ker +should ers +ðŁĩºðŁĩ¸ ðŁĩºðŁĩ¸ +sp re +( " +writ ers +g m +ðŁİ ĵ +k nit +hu ff +mt b +philli es +o st +den is +g art +licen sed +inter face +ex cel +d well +from the +co fficial +az zi +appear ing +fore st +n ana +ke ith +manufac turers +beck ham +) ? +e se +col ony +delic ate +ut ter +mc in +transpl ant +pre ferred +par d +ari e +hu b +po ds +perspec tives +pic t +del u +app er +be than +p mo +crimin als +femin ism +sh ack +circum stances +fel las +prote sting +wa x +sugge sted +t ator +dre w +om ni +fa ke +kath y +re b +del ine +ber ni +mi sty +ðŁij © +er able +break through +men swear +millenni als +chan yeol +la z +inser t +rep lies +phra se +n x +ihear tawards +audre y +gran ite +rac ec +ori e +ter ra +innov ations +britt any +at eral +pe ar +bio logical +sh ments +institu tion +m sn +frequ ency +d man +neg lec +t f +ste fan +fox news +ty po +comm s +sequ ence +car men +wh ites +econom ist +exe ter +se um +re sorts +cas ually +bun de +divi de +Ø ¹ +ga g +cre ed +reti re +cau cus +rapi ds +wrestle mania +tul sa +sunder land +fundam ent +o di +yam aha +v ary +intri gu +el se +be acon +an gie +tra ded +tran sm +g ents +kn itting +gal ac +ðĿ Ĺ +u to +sea side +hol t +re rs +far go +train ers +mon soon +b ale +sou ght +mad die +h w +co li +fr an +fav s +ðŁĴ Ķ +int ent +r ally +s bs +lemon ade +barack obama +bre ad +stick y +explo sive +chel ten +t j +as soc +ram en +hom ies +v log +mi ster +lor d +âĢįâĻ Ģï¸ı +aly ssa +sketch book +ru mble +cat ch +migr ant +discipl ine +un likely +chronic les +fl ora +sl ams +am id +s boro +coo p +ju mps +tran qu +mel is +sof ia +en ri +gab e +sy ri +nicol as +cha i +w v +be cky +foo ty +ta o +suppo se +ðŁĺįðŁĺį ðŁĺįðŁĺį +plu sh +ri sh +ðŁ¤ ĵ +k ha +satur days +ac cent +he c +lim it +carl ton +wi red +taylor swift +ðŁĺ ij +sq l +har ro +recipi ents +g at +go p +th of +amaz ed +gh an +ðŁıĨ ðŁıĨ +por to +cla re +di stant +na c +ohi o +ðŁĻı ðŁı¼ +mt n +anti bio +dino sa +me sa +par tial +b v +lear nt +lov ato +questi on +ex tract +gossi p +gi bb +niag ara +ðŁij ¨ +displa yed +so oner +ste vie +nug gets +ml n +bro m +tur b +give aways +stu pi +bl ink +c ili +conven ient +mo h +vi ve +f ric +cau se +cham ber +cu les +ne arest +is se +small biz +t j +canadi ans +smar ter +bra sil +ra re +que tte +w ha +cand le +at omic +ðŁijį ðŁijį +warri or +relax ed +stri ps +ne ur +k ka +r fc +jen sen +reco vering +respon ses +sal am +ortho dox +acti ve +ell ers +n it +âŃ IJ +metro politan +centu ries +vi da +gra ding +transpa rent +sim ple +do ts +superint endent +elev ator +autom ated +red skins +ima m +summer time +jona than +ge aring +michel le +confl ic +m ice +to te +publi sh +pa x +) - +na iled +á ´ +tele scope +ser bia +ba b +ape u +st ically +sen ti +r ats +isol ated +grou p +hat red +paranor mal +stan ley +ali on +safe ty +l s +ठ° +nex us +alexand ra +mas ks ++ + +tr on +au k +brother hood +brow se +mix es +sim one +mu sk +appro ve +lo la +ex p +per th +fu turi +un seen +d m +chel se +sc outing +o we +portsm outh +k ram +mi ze +di spen +su p +d lc +adver t +tere sa +is le +cy cle +met all +shi elds +marin ers +ra z +ing en +fun d +an go +jon es +o ka +mad den +broc coli +domin ic +situ ations +mer o +cric ke +puni shment +d b +sha king +ðŁĺ ļ +m q +ari ans +le h +cla w +we ds +d ure +ni el +j elly +gour met +tra ders +le vi +w ages +kne es +wi se +heaven ly +avi d +melo dy +z ack +ban anas +apprentic e +pro p +fun ny +o de +respec ted +me gan +fe wer +dra fted +med it +gra pe +us army +cru sad +vo cali +prepar ations +non sense +us age +th r +ro th +wiz ards +insi de +promo tions +mon a +red sox +si g +eleg ance +ch ia +univer sal +ãĢ į +ra ja +un ga +pol lin +filip ino +ak a +t sun +ik on +bi king +decor ations +z ac +cade ts +hum our +ag m +re ppin +vac cin +elo ve +u w +dia be +galla gher +az er +do l +a while +pro minent +wel sh +t ann +' ) +bi en +wa g +in al +c wc +wic ket +ur st +q anon +x e +out door +dun n +star r +co logy +ric ky +u efa +reb ounds +s music +inf ant +ðŁĻ ĭ +so p +u mber +hand ing +beg in +sor ting +ha sh +sp ati +re k +buda pest +black hawks +dele te +ro m +can did +auth ori +de bris +spe cul +inter section +marri ott +im ran +ðŁĺģ ðŁĺģ +cru ises +ram sey +rafa el +aware ness +vas cular +beyon cé +ru g +ðŁĺ Į +festi v +ar am +s able +bas il +p ill +flo oring +un beaten +implic ations +u f +w ound +for ge +poin ting +po ts +popular ity +ðŁijı ðŁı» +mani pul +s lots +deb ates +abs ence +ver mont +never forget +wri st +gl oria +ren ce +hu sk +mel ting +ðŁİ Ł +br aces +tim ely +transform ing +am ps +ma k +po e +ah an +gener ally +nd p +ale ppo +unic ef +pro fs +nor d +ma sk +jackson ville +v v +sh ells +bloom ing +oper ators +char coal +ne ville +ma gi +chi p +sam a +ir an +re forms +accu mul +ru e +æ ľ +web sites +ga on +devast ating +sto s +glaci er +ra pp +chipot le +pr a +or ous +rom ney +seas on +decor ative +c isco +dit ch +compla in +ll o +assu me +ðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤðŁĺĤ +n els +cent ric +ft w +car rots +tat a +can ter +per ience +li ers +demo s +bl unt +oper ate +reserv ations +le ah +sub stance +di son +an te +elec tion +v ue +squ are +non profit +ca a +f su +y am +ãĤ ¤ +v ladi +comple tes +mar i +philli p +ne ill +er as +ka it +men do +mahar ashtra +g p +dan e +provi dence +ther apeu +juven ile +me mo +in corpor +aa aa +seven teen +teen ager +à £ +or ns +wi de +cu teness +tw d +ff les +bar a +com edy +over time +y az +bar on +unemp loyment +ðŁij ĭ +exter ior +den se +cent res +match up +history month +artif icial +qu it +e sk +war n +cr itic +j af +ðŁĵ ² +inform ative +fu els +recy cle +nam ing +stri pe +sol ic +mole cular +dee pi +con vo +s sel +na e +de scent +ti z +accoun tability +ter ry +r ito +sl ay +em o +dem ol +sens ation +co v +tor e +round table +y ol +excu ses +ॠį +tur quo +hh hh +pod casts +cele b +me ssi +li o +man n +contribu ted +u z +gener ator +ele ts +veg gie +indu l +en suring +detro it +pun jab +tran spor +instru ction +ad d +por cel +pan eli +cir cles +persi st +clay ton +sp n +dog softwitter +is nt +sp r +retail ers +p w +hun gar +el ena +mon aster +gu atem +je ssie +an z +ra shi +fle e +car ving +fau x +l al +hen ri +d jo +du ll +s ana +lar a +glo be +cri mson +com pass +pau se +na b +lion el +ba ths +u fo +invent ory +sin gh +sat an +ðŁĩ ¸ +ce ments +in form +gener ated +bi den +av g +tas ks +de er +sa u +ja iled +pa stel +sc c +na il +steel e +per is +lamborgh ini +pur sue +mar gin +u ch +bo sch +dra in +cl ara +bo m +lat ino +web ster +rose mary +r ha +s oun +billion aire +not ch +percent age +con or +' " +hom es +earth day +h ort +big gest +di sin +wal ton +edit ors +im ma +om ar +equi valent +pharmac eu +ah med +cam eo +han ni +under rated +ge ment +micro bi +v oo +honor able +obe sity +âļ ¡ï¸ı +limer ick +invol vement +st agram +boule vard +bur g +blackand white +liber ation +fi ve +inter im +sm m +rival ry +cap abilities +stat ements +thu mb +ve d +sw ans +bar ber +e que +seren a +hel m +noo dle +sam pling +n awaz +sing le +thunder storms +sh on +in ev +ë ¯ +to pp +orch ard +bi an +ðŁĺ Ķ +door step +salv ation +marke ting +r ons +cle mson +ra vi +in take +stand with +sin a +ha iku +ple y +elector al +ph illy +la ys +electr ic +cap turing +u pp +er gy +believ ing +cul tures +es day +inva sive +ed ed +spee ch +end ur +viet nam +boy cott +pe de +deli ver +ðŁĴĸ ðŁĴĸ +mer chant +st ir +den ies +poc kets +o ti +cu ddle +ro land +mm ed +den ed +lear ners +hoo p +sour cing +h acked +di m +environ ments +ben son +jud icial +wor cester +pear ls +govern ments +arri vals +cor ners +tun ing +la bour +y m +or dering +le wi +i fe +hygi ene +thou ghtful +indone sian +campaig ning +princi ple +assau l +ru bb +at v +wil ly +en tre +il i +ph on +du ties +âĻ¥ âĻ¥ +sn akes +lo op +am ar +conver tible +bon ding +ment oring +max well +ethere um +destro ying +ax is +ca iro +fin nish +sho ck +ðŁĺ IJ +cal eb +com a +pe dal +co re +contin ent +el son +temp o +helsin ki +ac p +tack ling +st ated +bl a +dou b +sma shing +a ja +camer on +disru ption +warm th +being salmankhan +bullet in +o de +syrac use +ar an +mc gregor +bul k +an ton +confir mation +sp ine +im ran +instru c +jac ks +chi o +pal m +str e +embarra ssing +un t +elimin ate +to ss +c ise +a ws +oni sts +sh inee +jo s +ho se +li vely +opp onents +mo vements +recogni zing +sandwich es +sh akes +exerc ises +se at +profe ssion +merry christmas +lu gg +adopt dont +mar vin +byr ne +un le +he t +ku wait +rah man +aspe ct +humb led +gen es +f and +long time +) ; +cam pu +an gus +ðŁijį ðŁı¼ +q uran +sle eves +s lic +¸ ë +twel ve +your e +i ke +go gh +b st +dic tionary +reflec ting +to on +yar n +em bed +ðŁı ´ +re serves +floo ded +ver iz +du sk +estab lish +pro li +au d +ritu al +or bit +declar ation +recor dings +cam o +cas sette +good luck +cu tter +bo p +b ho +che ating +paci fic +ma res +tim er +col t +tr ous +tomor row +han sen +ci e +w ang +ban i +circu lar +ac ute +far mer +co ys +p se +ir ving +w j +haw kins +b ison +ur day +cru ising +o te +k ath +whi stle +your selves +ant is +sla sh +thorough ly +ke sh +ser ie +ex em +en ig +guil d +sh red +ho gan +ap o +ä ¸ +pu zz +ne tball +au ssi +panor ama +ws j +av is +ar ming +hum ph +brow ser +cri es +fo ggy +mat te +ðŁĮ » +it er +tal lest +by ron +cap tiv +je su +any ways +flag ship +p ton +we y +fay ette +financi al +f oul +solom on +jenni fer +cucu mber +ar gue +tex tile +wrest ler +john ston +pa stor +ðŁĺŃðŁĺŃ ðŁĺŃðŁĺŃ +cac tus +edi ble +re served +ric hie +met res +ingredi ent +h ella +un to +ch ol +cele bs +po ets +gra ham +hay den +coinci dence +b aw +communic ate +flet cher +/ - +tole do +ecu ador +coun sel +s laughter +line ar +at p +os u +jo el +ev ed +conqu er +ru stic +plic ity +recogn ise +room mate +cr acked +jas per +ph er +ðŁĮ º +wo ven +mo ist +ff c +ste ering +ni sh +stand ings +frequ ent +ar di +haz el +as msg +bau m +d art +si dd +nat h +ch ero +card board +c ss +n sfw +pa ir +ðŁĺį ðŁĺĺ +occur red +homeless ness +mal one +ph e +xi a +pad dy +decl are +theat re +b f +per sian +ta d +ax e +susp icious +lam b +mu cho +sen ior +st as +k ite +st ing +gra d +k af +wat ering +Ø ¯ +spi ral +th ms +educ ator +jer ome +of c +clo ck +su l +pe mb +.... ..... +park way +de aux +restric tions +m ons +need le +e j +le agues +water melon +am an +pl enary +max im +w ab +coming soon +bry ce +vi gil +super market +fortun ate +turquo ise +presi dent +li v +inter ns +feel in +fix tures +stun t +st aged +premi eres +lo k +prac titi +shor tage +log ne +ve c +con cor +roc ke +li g +com posed +syn thetic +di p +cam ila +ch is +j ou +su san +eye brows +supp lement +satis faction +moham mad +ti bet +house of +pu n +as sam +shado whun +psy ched +se duc +mand atory +her bert +sc allo +stream ers +proto col +block buster +produc es +sch nei +lau rel +tri be +time hop +pl a +mod elling +tv time +mtv stars +wi dow +me tric +ch am +con do +flow ering +ale c +d ms +inten sity + ¨ +mccar tney +islam abad +k b +f fi +ph al +anal og +f ond +h acks +positi vity +treat y +sub marine +conne ct +sel en +categor ies +cu b +organi ze +si k +quote oftheday +remin ding +am or +loc king +ðŁijı ðŁı¼ +comp ound +et te +b out +rec ur +fe rence +mi zz +tren d +hip ster +for tress +forth coming +preli min +o dyssey +ang p +del ici +even ings +ðŁĶ ¹ +i q +d w +da ir +kathr yn +christian ity +moon light +ha b +wh oo +f bf +se th +genu inely +pa x +char ity +deplo yed +b nb +bu cs +ju dg +con ge +plant ation +im press +car a +sc lub +sco py +land ers +compla ints +b ama +re build +x y +real ism +sh our +le in +brac elets +mer a +assas sin +an chor +ðŁijĮ ðŁı¼ +lin en +con fron +chronic le +comm ent +cat alog +il les +gor ge +me try +jung kook +love my +sent in +se em +fit ness +alli ed +ts man +digital transformation +pr an +lo ft +min ton +alden richards +en vel +cher ish +certain ty +zz z +rhin o +per kins +en rich +cape town +ome ter +sec tions +ske leton +def enders +ðŁĺ Ŀ +pen c +bri t +ja h +capital ism +ðŁ¥ ĩ +baz aar +re me +ex t +kk k +conver t +stor my +b ye +kar an +chry sler +ad os +pre ssed +syn c +ation day +dang er +bad ges +refu ses +em powering +ly m +ex ports +adoptdont shop +ðŁĩ ¯ +th c +awa ited +focu ses +fin ed +o at +haha hah +âģ © +n family +fi ona +luck ily +thr illing +ty ping +out break +di es +he u +craw l +ne sses +o ath +scri pts +gee ks +ðŁIJ Ŀ +p b +mathemat ics +al is +________ ________ +gymna stics +acti vism +recommend ation +gre n +wa in +cour ty +n apol +cau li +hor nets +g als +jo ckey +dir ty +at ar +enor mous +pe st +greg ation +an os +ii ii +def ends +black historymonth +at x +mb c +lugg age +wit ch +co b +la sts +cu m +gg g +ba thing +n ar +ce bu +ðŁį ĥ +navig ation +min e +re jo +ðŁİ Ģ +gif tide +re ta +use less +pu ll +defic it +al lu +ati me +it v +tr illion +pu e +ac ies +proce dure +l ori +jen ny +c ad +ul ously +dr ac +promo tes +ing the +can u +woo hoo +na omi +zar dari +ts u +be ir +sd g +le ver +we ber +ab ud +lun d +crow ded +deplo yment +ter rain +ken ny +ho f +witne ssed +lo ch +j k +bul ly +w ren +poe try +do ff +ww i +mo red +din i +cul ture +promp t + ¥ +maur ice +to pps +r m +cor respon +ab out +jewel s +gi br +eag le +ðŁĺĺ ðŁĺĺðŁĺĺ +l ending +sou ven +ç Ķ +contemporary art +establi shment +j ong +âĢ¦ " +gat or +patri otic +mc coy +v ape +human e +feli z +coach ella +re posting +ste als +fu ller +n ering +at ra +( - +bla ke +he ather +wor ms +discipl inary +rede mption +y ard +am in +" @_ +d nc +t ds +k appa +ne wark +comm its +spe ars +j ams +t and +msn bc +inter medi +aim ed +at ic +teen th +observ ation +kash mir +kavan augh +ou l +san francisco +re u +bel ated +cho w +pass word +st ills +deta ined +sar i +day ton +dar ren +itali an +ar th +amu sic +ar bit +w m +v m +he m +dou g +my r +a sho +pre v +vin d +bra h +sta g +ภµ +pre views +gu k +con taining +leon ardo +sad dle +ru shing +st av +lon gh +gam bling +ve gas +reserv ation +end ale +bal a +fl a +vari ant +he dge +bulgar ia +nat ali +we aver +sol st +encoura ged +ap c +as parag +ne st +cycli sts +fe l +ìĬ ¤ +overwhel ming +pey ton +j it +a post +mb le +ble eding +neighbour hood +a very +expre ssions +mac donald +gi gs +mon ds +illu sion +n ct +cam ero +over head +my th +ol y +vi o +et v +lau rie +unve iling +pri or +con n +iron man +di ff +day in +crit ici +con go +re vision +wal e +direc tor +p ines +black pink +gar ner +cur ated +manit oba +h ac +common ly +bar ton +.... # +mor tality +live smatter +philos op +shor ter +con vince +fre ak +vend ors +insi ghtful +el ly +sens ors +e led +s berg +weight loss +u kip +sp ur +priv ate +qu a +ss c +, ... +supervis or +advis er +amaz ingly +less er +at es +mah on +oooo oo +sar as +pmo india +waff le +un ders +toler ance +sculp tures +her sh +kno cking +smo ke +cathol ic +gri m +tra veled +fli p +ge off +dinosa urs +sle pt +scar let +ok i +compla int +ob sc +nam i +la g +cross fit +u fc +mc cain +refe ree +sad ness +pen ny +li eu +mo de +ki er +vol s +w is +el on +she a +ba o +son ia +cla ire +em manuel +moist ure +di gest +vi ii +t eller +ch on +access ory +night club +foss il +aw an +hu sky +ab original +brand on +ffici ent +cou gars +ste d +ad mitted +igno red +content marketing +ag as +v ase +execu ted +negoti ations +she ad +n and +tab lets +go th +ts al +d fw +on ep +protec tor +sp ho +gaz ette +andre as +ss er +comp ilation +ha v +contain ers +bro ker +soc al +porcel ain +hy uk +air ing +ðŁĴ ° +publi sher +scen ario +spart ans +re viewing +itu des +ed el +pear son +ba sh +mau i +a ad +ðŁĮ Ĭ +li u +ul ate +program mes +fav our +web design +real ty +motiv ational +cro sses +' ... +bus ch +adjust able +ar jun +mist ak +dimen sion +pi stol +weigh s +en y +unve il +indy car +gor don +f ade +fran ken +qual ities +bet t +loc ate +ker r +sp c +confu sion +ne e +luck y +bas es +dep ends +fire fighter +ol a +re t +mar oon +ðŁĶ Ĭ +w am +defin ing +whe at +bi l +é s +b hai +psy ch +ta u +ic ans +thi k +ob ile +inspec tor +ìĨ Įë +ill on +go s +ev angel +fa i +si st +voc ation +bur ge +chi stan +renew ed +enthusi asm +en ting +ag ri +ike a +m sc +aero space +sens iti +memo ir +hosp ice +co caine +der ry +mechan ics +Ħ ภ+tin o +reduc es +collec tors +in justice +supp re +v ana +ab un +nap a +su sa +os lo +e ff +en core +lic ence +ched dar +z al +moun t +ðŁĴ IJ +threat ens +!! " +archi e +fu tsal +scu ba +jo s +gn on +se xi +s official +compar ing +domin ant +tof theday +fa it +propos als +gi ft +y as +cn c +l r +ha b +reser voir +beli efs +gener al +mar ti +t d +est e +ì ł +wi l +ðŁij ¯ +ðŁĶ « +sp x +et work +excer pt +e instein +hir o +sil hou +team ed +per ception +corri dor +mental health +hin ts +ben ny +induc ted +sw x +wi desp +spe ak +cher yl +dru g +ðŁĺ ķ +h f +asparag us +myster ies +fitz gerald +off er +therap ist +care er +dam aging +ts d +per u +wei bo +y ay +phoeni x +disc re +mac book +bar ker +stig ma +sp read +roc kies +kang ar +bri dg +pa i +bi shop +ta iled +capsu le +ðŁĴ ĵ +ge of +roy ale +short listed +o ste +ash amed +ch app +key e +cl a +screen shot +austri an +nati ve +en ight +juli et +michel e +ðŁĮ ´ +travel ers +pi l +football er +win chester +ðŁĻ Ħ +azer bai +gold eng +organis ations +interpre tation +predat or +ofthe week +lo gan +pok é +mari e +cal la +t nt +cin de +ge tic +fit fam +gra v +ow ens +ðŁĮ ± +shoot out +sal is +commissi ons +co he +p tic +ni xon +hi a +amb ition +mar ine +cruel ty +t k +cru de +sal ty +jim a +mon go +ir ony +on wards +arre sts +strang ers +ig er +cycli st +ra g +exten ds +tra dio +bour g +mo i +el la +e able +lex us +au l +der a +histor ian +mor ton +ti ff +man ner +ko t +d k +po inted +mar qu +a an +en ey +du blin +on poli +em ili +secre t +fl o +âļ ¡ +ba j +ste ep +accompan ied +rum ours +dev i +purch asing +fi g +pu b +sch oo +autonom ous +go alie +x ia +autom atically +re vers +ter o +fu ku +titan ic +shoo k +sand als +see kers +exc av +nor dic +bigo live +ba ke +r att +z ak +ne p +ðŁĺ ¤ +cand y +billi ons +book worm +pp et +à ³ +sur faces +sc ars +phil ip +do gg +ci gars +co te +transl ated +cur ator +sin dh +han gover +bre wer +on es +el ton +ðŁĴª ðŁı¼ +mar cu +elli ot +righ te +di oce +ru ss +rail ways +grand son +as cen +apo logy +awa it +mob ili +re spir +parti san +oli vi +stri ke +yo o +white house +expre ssed +pu ps +bed ford +cul tur +fro gs +fly ing +cav ali +c ds +fri ger +street photography +re solve +tali ban +kan g +cru shing +ju m +ðŁĺ Ĵ +william son +tan g +cur ly +t man +veter an +fa ire +artificial intelligence +un anim +pre n +back drop +fr ances +oc cer +doro thy +work ing +ar thr +conver ted +day light +serv ant +pad dle +compla ining +thir ty +nad al +ak u +ibra him +ad dressed +p iss +green house +batt alion +si mulator +out lets +embroi dery +ðŁĵ ± +fis cal +ger ard +sas sy +ðŁİī ðŁİīðŁİī +vent ures +mer it +public ity +ðŁij Ī +sophistic ated +c tu +conven tional +condol ences +isra el +tra dition +ar an +te ss +gla d +ðŁĺĬ ðŁĺĬ +correc tion +ge on +am d +or ship +be ast +ch ment +ì ŀ +nic o +wk nd +wel s +cushi on +beli e +vo c +idio ts +under neath +pu ma +corn ell +en ation +lu l +swa ch +ab ig +u rer +mi e +form erly +ca f +er nal +chor us +juli us +sen ator +âľ į +wh ir +salv ador +ph d +uni fied +boo ster +graph ical +w rec +son ny +mi z +dere rs +s all +ven s +tusc any +wi d +y ong +kur ds +w az +trol ls +mac ro +cat urday +pre ssing +sa sha +cent ennial +gu sts +em c +be fore +den ise +cu st +ðŁĵ ¢ +lo oo +base l +eng land +y olo +ar du +manife sto +do ha +ì ľ +kni ves +bourne mouth +bi bl +bar b +al icia +Ø © +com er +cycl one +g it +ane ws +character i +vent ura +in tra +sf giants +hu t +be a +dar win +ell er +al v +re ese +bl y +kar an +conclu sion +man ny +fla kes +unite blue +nad u +co pp +ed ges +lanca shire +i als +o tta +philipp e +l ent +che e +ment ors +festi val +an ism +compli mentary +r j +pu g +d ine +we i +cli ffs +sar my +ti veness +treas ury +il and +after math +rabb i +ou n +bou quet +herit age +zi on +sur render +shen an +in ks +kar l +gh ty +pol icing +exam ination +ce y +per su +measure ment +hydro gen +lu han +âłĢâłĢ âłĢâłĢ +war i +о Ð +j y +fow ler +mis h +al fre +âĺ ij +bb naija +cat alogue +recogn ised +sa ver +hu skies +col in +mun do +si va +p ng +discoun ted +man utd +fre sno +de vin +prelimin ary +tro phies +pla stics +du g +pro cu +indi go +g ard +dy lan +pit ches +ground breaking +in son +bl ac +an thology +f h +expl ic +r ard +admi ral +so chi +la shes +splen did +en vy +ad v +sex y +festiv ities +stic king +bi b +thr ill +op p +ari el +botan ical +endur ance +fe males +br icks +vat ican +black pool +ber mu +br ough +roll er +bi d +sue de +sloven ia +mm ing +ml b +med alist +di ans +rehabil itation +ne on +s go +li thu +ram os +z ed +pi anist +inten sive +broad band +stu dy +peter sburg +lu ca +ah hhh +phys ician +dill on +tele com +gri ef +mu n +ac ro +si ded +s ly +blo ws +classic cars +tri um +ar gy +? : +h ri +marsh mal +âĢ ĵ +to pping +war saw +tran sc +preserv ation +b av +re friger +experim ents +ä º +gl it +sli ga +g age +fac tor +flav ours +br ony +sp o +cook book +carri age +aw ay +ny fw +on ian +w g +simp sons +ro lex +ðŁı ¿ +cro sby +ãħ ¤ +cre di +syn dic +pu bs +ali fe +poor ly +mac ed +ðŁĺ ŀ +behin dthe +w enger +n ats +ðŁİ Ł +rubb ish +procedu res +typho on +opho bia +er do +fu el +vi era +bu mps +millenni um +new zealand +lec tures +it on +mil ky +respon ded +ê ° +landsc ape +.. @ +bo ther +âĸ ¶ +z hang +huawe i +tu ition +s worn +in u +y or +pa olo +au ditions +ab il +malay sian +ho ps +fe athers +mp le +au ts +ã o +boun ty +ic he +ì ĺ +sh q +pin ot +ge ars +disapp ear +video games +t na +alzheim er +ðŁĮ ŀ +a ji +under wear +swit ching +sign age +o scar +ec on +dro w +cl int +pl ated +gun dy +emb lem +ho es +ici st +nel ly +juni or +road show +miner als +at le +alexand ria +ac claimed +v ell +shi va +ad he +en ne +amne sty +h ounds +councill or +ðŁĴ ¦ +aes the +part nering +influ enced +mag no +fl are +extin ction +civil ian +maje sty +va il +law makers +rac ks +mc c +ori an +sp ices +er rors +may er +co ca +pa i +s ooooo +reti ring +ba thro +ðŁĻĮ ðŁĻĮ +âĸ ª +su f +endor sement +buil ding +broo ch +pal la +arvin d +ag ent +kar ate +r hi +c tv +ta ine +um m +ba x +reig ns +uni of +enterpri ses +adel e +fla ke +at tire +bru ce +ba hamas +gra vy +sa in +che ek +tri vi +lo v +e en +bb lo +lady gaga +itt a +. "- +du stin +observ atory +eigh th +bloom berg +kh s +f cc +gi st +commemor ate +ve er +sexu ality +ed c +nic ole +vac ancy +u ser +son a +:' ( +dipl oma +t end +up grades +Å Ł +jura ssic +cardi ac +dr s +widesp read +à ł +dail ies +vend or +sim plicity +wi der +len ses +supp lements +de pos +ob served +vin es +parti ally +renew al +collabor ate +ali g +fin ity +ph u +zz y +pe tit +ðŁĵ ħ +z in +i gu +sm ack +fall on +ðŁĵ £ +back wards +comp onent +o so +compati ble +bin ding +zur ich +thom e +w ounds +ly ric +fresh men +sne aky +fi bro +di et +emplo yer +in sect +h ated +sch er +raz or +n sw +boo ker +califor ni +av fc + ° +preten ding +pep si +al is +un titled +k art +grand parents +e the +o ck +lux emb +visu als +small business +abdul lah +min ho +su baru +h ra +reve aling +heart breaking +clar ity +am g +sl r +** ** +âŀ ĸ +recor d +ici ary +min ded +ye h +exce ssive +knu ck +icec ream +tru th +ev ic +ta stic +ant arc +ren dering +, , +mit t +loren zo +st patrick +bound ary +zi g +vo cab +osa ka +fur n +tu n +gu l +s ounding +blo gger +utter ly +g af +adv ancing +l cd +mar gin +lifel ong +solst ice +sh ra +wa its +ple ar +bre ach +en ligh +ad er +itt le +c ation +ho on +stu died +?? ??? +k ash +ev angeli +ps l +wei ghts +met als +ty res +tur no +wi e +car b +g ale +se al +sun ite +am ic +patter son +á n +eu ph +up stairs +quali fiers +khali fa +apple music +ìĨĮë ħ +vau ghan +al ter +cru iser +mu a +t ana +kat rina +id ols +spo iled +secre tly +fi bre +part nered +um es +gi ov +com et +screenshot saturday +k eller +fil tr +fe t +con way +pe u +bad minton +gi d +m ound +don key +bu ff +lea ther +lar gely +bro ch +int ments +am use +r k +sto ve +impac ted +con t +cr acks +prison er +bar i +contrac tor +ori oles +domin ate +pol ar +am elia +dr c +ðŁijĮ ðŁijĮ +vi st +su arez +injec tion +blo oms +ðŁļ¨ ðŁļ¨ +sti ff +pay pal +sno wing +thur sdays +goo se +we dge +educ ated +weak ness +de cker +abud ha +bree zy +Û Į +hope ful +o bi +rai der +gh am +de u +se ve +par tly +fu t +infu sed +mer ri +than e +some time +hu e +me in +cre dit +sli ding +ran de +cher ry +dead pool +sh ol +ar am +under wood +sky e +distur bing +m nt +poli shed +guardi ans +ha dn +pic asso +ari us +ak shay +ir ri +j h +happ en +la kh +dal ton +at the +s well +mar sha +re h +cour s +j kt +top us +serv ice +r ink +hack ers +dono van +hor o +tc m +may hem +cha se +dev ops +ken sing +sc up +sh ere +quali fication +c live +ton g +n ancy +mar is +der dale +ber man +cinde rella +jol ly +ci c +loo t +collecti bles +hom icide +g ge +epide mic +su ites +mu ddy +gi mme +e rec +- * +tal la +lis le +embro ide +ðŁĩ© ðŁĩª +veriz on +ve ctor +be anie +arti san +ga in +flo res +vi gil +u so +ðŁĻı ðŁı½ +grin ding +gh er +air ports +respon sive +shaf t +can cel +ceremon ies +e me +at ari +bru shes +eag er +bo hemi +children s +yan kee +ma a +suspen se +mor an +mac ar +sun flower +cre w +vo id +ke ar +fashi oned +jen nings +sunday funday +sub missions +me ad +her man +wa i +crit ically +le um +baek hyun +for cing +co bra +ãģ ® +acqu ire +al k +ge ology +pri mar +import antly +ire z +bunde sliga +curi osity +sen a +stric t +con soli +win ters +ven om +chelten ham +ðŁį º +cen a +t at +ba in +glo ver +under cover +as ses +car n +memorial day +am eli +i rene +ch on +syn thesis +spe edy +mitsu bi +sla yer +compos ite +under stands +pe w +inter rup +hen ri +mor row +an om +thof july +g lee +thre e +ðŁĺ ® +and hi +ch att +renew ables +ye s +trans fers +!!!! !!!! +bab u +du ter +lo ops +pe ers +o ilers +pau lo +ic ation +h mu +war a +mer cer +hom eland +fu ji +ale y +year book +re m +re en +ab sur +bo is +] : +caes ar +shot gun +kur dish +o ren +ra e +anci es +ty pic +f h +def ault +re plic +lu k +trans actions +r ys +infan try +ðŁį ¾ +cho w +chick ens +ba gh +wy att +ay e +gg i +bre ws +ed itions +mi ra +commen cement +pre su +peris cope +ic hi +guatem ala +zam bia +pain ts +wit ches +wan i +un dere +cro y +vo ws +us mc +hear ted +theat res +shu ffle +le vel +mul tic +squee ze +fer n +app et +post al +mal t +on board +ld nt +co o +s sc +k ac +ðŁĺ ĩ +sc rap +mar cos +deal ers +ann u +mill er +co ve +ul ary +vladi mir +be ef +th ur +pick led +se same +bengal uru +mo tt +kathle en +hi st +no tor +dr ank +du chess +snow fall +e ff +tin y +j n +sy our +speci alists +scot us +bay lor +eve rest +mali bu +pre m +harm ful +l ali +b ates +g ye +differen ti +and ra +geome try +el over +black out +== == +ko ta +inter act +asi an +la yo +samu rai +fi del +exhau sted +gla di +pd t +spher ic +anti qu +guit ar +stu ri +ho pper +ang le +f ills +sla p +mi th +rod ney +ong i +in som +pre venting +cassi dy +ap ho +ore gon +lo in +ham mond +contribu ting +f n +gar ri +ori on +comp elling +escap ing +aim ing +plu mb +bi stro +be asts +concer ning +bo e +do pp +shop local +stumb led +âĤ ¹ +naz is +âĢįâĻĤ ï¸ı +gest ure +war ts +us open +hi ggins +char li +hang s +bom bers +° : +fe eds +c ch +st il +nic ola +ðŁĵ º +clam ation +tro pic +af ro +ou k +expen ses +der rick +al ine +fa w +reg ard +im er +sat in +thi um +ry der +pear l +te ss +mm mmm +sen ses +ðŁĩ ¹ +positi ve +exhau st +occu r +nor ris +lil ly +is les +direc ting +yo fficial +count less +sam ar +on stage +flo ck +mir rors +arch er +mo i +k d +vi v +in os +si kh +le i +sen sory +br its +kno x +chest nut +op y +coli seum +z af +di vin +adap ter +:) )) +tem ple +ku n +hel mets +t df +gu ide +m old +o ids +lu ther +he is +monaster y +sp ree +k lu +brit ney +jagu ars +gre ats +c cc +ky rie +machin ery +cric ket +re ro +ab o +aspir ing +semi finals +ale ss +sig natures +var d +me th +her bal +hol den +king dom +ap or +reg gie +ore o +palestin ians +em mys +sec tional +ro i +ney mar +qu el +cu ll +l ka +haz el +estim ate +ul ties +go w +be a +purch ases +bel ts +protec ts +m é +gue ssing +bb o +clau dia +fr acking +jon ny +el k +cel tic +al mighty +ra je +courty ard +ig i +can es +ðŁĴª ðŁı» +bank rup +le thal +âľĮ ï¸ı +graphic design +vad er +penc ils +rough ly +dan te +m fg +const ell +cam el +j b +bloss oms +en to +balo chistan +cine mato +ill ard +jer sey +con sent +dent ed +con templ +sch er +hol i +lou gh +st our +a yo +begin ners +cur b +v hs +a jax +du ff +av eng +dom est +commit ting +ai red +cha p +hedge hog +disappo inting +freel ance +in land +char ms +ðŁĺį âĿ¤ï¸ı +ai sh +m x +buck le +ti dal +per mit +bo ating +ra cha +kend rick +b ello +b hi +ple a +estim ates +l b +apo logies +jay a +bb l +ast oni +inter state +main taining +el bow +mu p +ep it +ðŁĺ ¡ +viol ations +def end +be h +sl c +am ir +pur i +ti um +fi fa +blur ry +scri m +ðŁĻı ðŁı¾ +ma ple +rel atives +âĺ Ŀ +cho c +con nor +⾨ ⾨ +whi sp +list ings +ma ze +than king +ri dd +grass roots +shi fting +desper ately +gor illa +den i +ju les +stra th +g ley +ja in +bu ick +t anner +ðŁĴ Ŀ +ga e +pri m +it ors +n ano +separ ation +armen ia +bor deaux +ðŁ ħ +pj net +bu rial +e bon +glo ss +re new +gri er +spe eds +comic books +sym boli +pur poses +ãħł ãħł +spati al +no table +ci on +n ps +ho ffman +nor man +rt g +du sty +situ ated +tr an +k fc +em en +nic kel +hast ings +sett ling +gr it +l ena +w aw +art s +gu m +ca regi +le wis +sapp hire +rememb er +embed ded +t lc +bl at +serge ant +el sa +boot camp +bow man +photo graphic +pill ars +direction ers +classi fied +no is +ve er +barre ls +wh oop +ðŁĺ± ðŁĺ± +fe male +petro leum +medi a +e fc +poké mon +ठķ +enthusi astic +var un +pro files +pedi atric +acci dents +con rad +jan g +jo jo +ac or +ob server +l f +live stock +for gi +fo s +el m +an and +go e +c ere +avoi ding +gri t +om an +thank fully +scat tered +nick y +cylin der +chees y +di ver +mahe sh +cav es +ear liest +qu inte +subjec ts +b end +gul f +vocali st +glu e +pat ches +un stopp +sny der +demonstr ating +pi o +hor ns +wic kets +and the +r ama +yo on +stra ight +bed time +or ang +bul lets +sa urus +min ers +inci dents +! ... +ðŁİ ¸ +ag ers +hand les +stat es +in ity +d ons +incredi ble +emin em +avi v +ru dy +moz art +folk lore +appli ances +mt l +fre y +di as +hu a +page ant +stri ve +im prison +bul lish +r ana +al erts +bb mas +hy per +derby shire +re cre +re dd +debor ah +cosmo s +law son +mel anie +psy cho +ho or +doo dles +sni per +shad y +man tle +canadi an +new year +inter actions +separ ated +cor ds +spiritu ality +ap u +it o +p ct +pel osi +rebel lion +se iz +wor cester +sec tors +ul i +san ta +Ð µ +ðŁĩªðŁĩ ¸ +bi ased +class ical +gam ma +dee plear +emer ge +back er +sur ance +hand crafted +ðŁİ ¥ +franc is +mill an +ic i +cro wn +wo w +stri ped +un fair +relax ation +³ ï¸ı +embrac ing +she alth +pale o +martin i +dist illery +wr ink +or k +na th +hay ley +cour thouse +si ber +sa di +quiet ly +mel t +m sm +me h +smart phones +rel ent +pp ing +war wick +co logne +gli a +cot ton +pro g +lon e +ip sw +star ters +expan ds +u mp +su ed +ski pper +infe ctions +ing le +à ¡ +cler k +demonstr ate +ac ar +ðŁĺĤðŁĺĤ ðŁĺĤ +ti bet +bun s +alo m +demol ition +ssi a +g st +[ ] +so ar +âĺ Ģ +ðŁĺ ª +ðŁĵ Ĭ +dee pest +beyon d +are t +att ends +activ ated +di mit +âļª ï¸ı +high lighted +magaz ines +rum or +az za +steph ens +dol ph +sho ckey +mat s +we av +mel an +serv ers +tra um +ku sh +æ Ĺ +bab ys +pa z +a al +la use +break ers +canter bury +ul ture +mi ri +euro s +tane ous +impre ssions +du tch +il d +gh i +pur due +adequ ate +l p +sy ner +ang ler +du rable +gal ore +ro wn +mg mt +ðŁĵ Į +lu cia +âĺij ï¸ı +zay n +bor row +. ( +north umber +cru sh +eng a +su sh +extra vag +t out +ma hal +ali stic +ther mo +gall eries +es se +chi bi +attrac tions +lex ington +legislat ure +docu mented +resi den +brow nies +w f +st ool +plan ets +sho ppers +conduc tor +ms p +tr icky +fru ity +end ra +feel the +whi pped +hair style +re fer +oo k +oc topus +audi ences +ku mar +after no +op tim +c fl +ni p +gen i +alpha bet +ann ab +lam in +accep ts +l ng +ðŁĺ « +t ine +ac om +cheer leaders +t k +gr on +v g +k ung +ja x +dha bi +r ss +mack enzie +beir ut +clean up +gy psy +st ell +bur ger +hurric anes +educ ation +st ina +âĻ¡ âĻ¡ +unfortun ate +jere mi +bad ger +at ers +: âĢ¦ +ter ra +subli me +stu d +y mca +mr u +duter te +bren nan +bul b +mel o +yl on +hack er +c red +gu d +as an +pad illa +embroide red +vietnam ese +pione ers +projec tion +re boot +id c +an ey +pri mer +suff ers +win ding +p on +sto day +mor n +u ch +all in +adid as +eliza beth +tu ck +o graphy +ðŁļ Ģ +be g +os borne +ghet to +r h +cn n +ir ma +ma kin +cab les +mur ders +oc ks +inst a +al as +si k +cu ff +la re +foo dies +o vic +at om +geome tric +em pathy +ภµ +cent enary +newsp apers +administr ative +ðŁİ Ĭ +sti ve +contrac tors +le tt +tas mania +awesom eness +den sity +ve en +prince ton +frequ ently +re ject +gh i +modu lar +ceram ics +sh ag +ki wi +can vas +sweat shirt +an j +ti mm +napol i +il er +appe als +hamil ton +ma yo +we ave +arrang ed +whar f +occu py +b vb +as aki +ot ter +nor m +vi es +de tox +tion al +dere k +id ad +ad missions +constitu ency +u pper +woo t +allo y +se ve +lu b +un comfortable +ed win +ab re +d wight +ar che +virtu ally +sp ol +pri e +ai i +er r +swit ch +bar ack +se ok +cou l +wn t +pou l +o live +caffe ine +cardi ff +notor ious +de mp +ex cess +bar r +t ford +a jay +bump ed +my thology +shel ley +fal con +shakespe are +must angs +no ted +bon e +civil ization +sy d +par sons +un official +hy ped +sp ends +oppo sed +v ings +space x +noti fication +deci ding +bio tech +out si +sal ah +! . +fe d +ss y +c ms +bad gers +cr o +ela ine +n ba +dy our +n ant +honey moon +climb ed +conom y +ath a +m ell +ne bula +nature photography +juli e +bm x +inve sted +mon o +lieu tenant +wat kins +techn ician +o se +ka e +ì Ľ +mc queen +pre ach +trav eller +flexi bility +ze bra +reta iler +p ant +ben der +brand t +squ id +war rant +veri fied +cas s +pier cing +hon ours +t ying +mor ris +kis sed +op rah +panor amic +me i +splat oon +wich ita +ari as +gal li +indy ref +good times +athe ist +confe ssion +ow ski +re pping +ad ditions +mechan ism +z im +j ans +su f +cho pped +beg innings +vitam ins +ãħ¤ ãħ¤ +or th +po les +ru b +antarc tica +indie film +web cam +ket ch +bre tt +cle ment +her on +defe ating +hydr o +buc ket +wand ering +sid ney +future of +b inge +on ies +knock out +administr ator +syn the +l ent +jan i +bar ley +premier league +ner ds +cr m +bra s +bot any +evol ved +rot ter +ro wed +tum or +weal thy +Â Ń +mon arch +li shed +da hl +ðŁİ ĥ +bu ch +ken yan +Ø § +red ness +assemb led +se mit +hud der +shro p +ran i +lear ning +mor y +iti a +geo graphic +worl dof +f b +pho sp +boo gie +am ped +? ... +che w +dwar f +ar us +s sen +ru sty +recru its +h k +gar de +app lause +vol umes +invol ves +ta c +hand bag +trans late +ffe l +se ym +aqu atic +trans fer +zo di +and r +acade mia +cr ater +te z +ar se +adap t +col oni +snow man +mal i +hang in +di schar +oy sters +pho e +colon el +w ba +hispan ic +thri ving +sh y +ag les +sales force +cre me +so les +la fayette +â ī +ter ia +ach a +sp erson +go go +car ly +the ore +am ore +vo x +af t +ãĤ ¹ +stap le +mu ffin +di agram +ino x +su stained +av ent +me ta +arbit r +dec ay +ado le +Ð ½ +ec ol +ph o +n k +o cu +gr anny +ç a +luxemb our +stad t +alber to +le vit +am as +d x +or phan +co bb +as c +lo gy +immen se +chan ts +off line +p ent +bre x +w inger +plan e +i el +nichol s +ca thy +nar uto +low ed +/ // +ignor ance +cat astro +you ts +sch en +buil d +haz i +s ine +critical role +du g +dete ct +lo gs +en amel +stpatrick sday +ed die +co pa +cigare ttes +ho ff +kay a +la goon +ra pha +air borne +choo se +puer tor +ke v +gui ding +fro sty +bor ough +mir a +ðŁİ Ĭ +cade t +anu sh +yo gi +e ger +fl ing +slo pe +nin th +we ston +foot wear +f n +may weather +a am +pla in +stair case +witne sses +work outs +ro bust +dex ter +co hort +ðŁļ Ĺ +sp ell +ha ze +o om +organ ising +wild fire +cont acts +av on +min o +upd ating +ðŁį » +li thium +ing ual +k is +au ga +lo com +de duc +u da +th ak +boy le +mp er +hot tie +eri k +re vised +is la +travel photography +oo za +en qui +confe rences +clo ver +g room +cur ves +live on +per f +displac ed +bo log +xx xx +ðŁĺ© ðŁĺ© +te al +ve ssels +rain forest +cal ci +pan ther +gira ffe +ta sted +imag ery +pad res +day time +bas s +ri pe +opio id +nu e +vin yl +invent or +sen s +process or +mu t +gad gets +bibl ical +shann on +jacqu eline +car y +the resistance +ali en +n vi +co sy +bi har +fo ley +ren d +mu gs +fa ken +cl one +ni allo +gra bbed +chi hu +power house +n tt +chero kee +spon ge +imple menting +rh ine +le one +ðŁį Ģ +pret tiest +infra red +impro v +swit ched +tu bes +con tr +bl k +projec ted +be aver +yo t +bbcra dio +thi gh +per secu +apologi ze +w ack +po ster +oli ver +az a +lou d +( ?) +f the +women shi +spar row +blu sh +us able +sc ales +it ative +peu ge +ne eding +legg ings +glam orous +mat ur +c z +wat t +da b +tam ar +et sym +bau er +heart felt +h n +else where +bir ch +alu mini +hu ck +e me +j l +traf ford +d z +por tions +ana sta +arthr itis +esp n +ber gen +viol ation +yo shi +c z +northumber land +clo sures +ðŁĩ¯ ðŁĩ +smi ley +r w +tel ugu +inten si +gre gg +ve ga +dun geon +south bound +ba il +domin ican +semi final +chap ters +h itch +van ity +trans iti +recomm ends +sati sf +bar ca +queen s +( ( +de struc +stra it +ra vi +dess erts +in tru +har am +k os +fo e +fat ty +pais ley +magn itude +dri dge +com ey +schem es +vision ary +our t +down loaded +ðŁĻĮ ðŁı½ +gd pr +lan i +p wc +gu ad +nic est +stake holders +re ferred +george town +arvind kejriwal +schnei der +in doors +all star +strand ed +gen der +ze pp +ma sses +ðŁIJ ± +pati ently +bl dg +z ab +we arab +vi vid +he ck +d ella +sy mb +je opar +la ger +à ª +comb ines +ne c +br ay +flo p +tx wx +jo ys +pon t +pro found +sur round +mad hu +ma ble +ay r +te as +n sa +open ly +er nest +ãĥ © +to po +g na +anti oxid +ti an +e tr +c ello +ma thi +gener osity +b iting +man ic +kel sey +chee ks +ten der +w th +pron oun +ultimat ely +gu sta +ari anag +ger ry +ble ed +red dy +mic h +mitsubi shi +oper ated +sex ually +ma u +cl lr +vi ds +co c +mel ted +ðŁĮ Ī +q ld +ite ch +instru mental +end game +ðŁĵ ĸ +ener gi +brow nie +tam il +at in +domin ated +pra ises +fire place +sens ational +men a +k arti +un prece +ru pt +ori ental +mc cor +tour naments +scen ter +re eves +prescri ption +sam e +fra u +tru ffle +em bo +roman s +bla sts +techno logical +pr at +b sb +y ar +tren dy +ac l +al ad +ðŁį ģ +o hh +bankrup t +tho ven +regar ds +is er +war wick +vine yards +real m +niallo fficial +do ta +ge mini +to do +v able +¨ ¨ +la u +wre ath +ju ve +nat asha +le ver +lor i +hor ser +cc tv +air bnb +es anders +sin clair +ema biggest +high school +con test +optimi stic +t te +ðŁĴķ ðŁĴķ +ss d +ye e +hel ena +con sen +ric ks +jes se +an ic +ðŁİ ¯ +re acts +ro be +independ ence +vol tage +m ington +s ant +à¸Ļ ภ+-------- -------- +sentin el +ke tt +rehear sing +aaaa aaaa +sof the +stir ling +sear ch +wi gan +stand out +sna il +pent agon +Ä ģ +ch lor +cru st +net any +chemi st +disapp eared +ric ardo +sp iders +bo se +war ren +me ssing +bann ers +gu el +par ach +ma id +coun ted +epi le +bon fire +speech less +se tter +meas ured +rejec ts +nik ki +le ster +foren sic +fab rics +alo ha +pre served +wat ford +deta iling +dar th +bo u +car ly +... ' +tail gate +noti fications +å ¤ +pas sive +trous ers +balo ch +ro ther +typic ally +à ¥ +sp it +wi z +sic ily +technic ally +ex pose +st age +hu bb +cre am +cap s +po ke +sle ek +ju ne +tempor arily +de z +awak ens +l ame +_ - +ji ha +tues days +advis ed +advis ors +exi sted +dis agree +news room +lo sers +world tour +dr ying +al di +har ness +foot print +hobb it +p mln +i ro +que red +asse ss +gaz e +sa b +th ian +í Ĭ +ti f +ob serve +ev il +dra wer +swee p +cor y +co dy +kyo to +cal lum +n inj +lau rent +be i +sket ching +custom ized +du r +regre ts +knox ville +ìķ Ħ +mess aging +grac ie +abun dance +bi dding +bre wed +fl ouri +therapeu tic +alt itude +ho gs +bur ner +elec tro +wonder fully +he ater +post pon +li very +r all +ad as +a ac +sau l +brook lyn +play house +âĻ¥âĻ¥ âĻ¥ +char itable +in y +z ah +compet itions +be av +plu gged +o is +do om +astron om +speci alized +max i +ta ps +cellu lar +depre ssed +folklore thursday +cri b +e mul +ë° © +fi gh +ru z +car lisle +spe ar +side walk +de i +depend ent +lac es +nh s +ðŁĮ Ļ +reali zing +net work +ric he +re gin +re fresh +st ral +pa thology +pla id +psyched elic +hin d +u ka +algori thm +lin king +progre ssi +fe y +d ade +hydr ated +b ant +fam ed +cot sw +bo ise +as c +rac ing +ja vier +ww en +mar lins +poo p +swe pt +toni ghts +we f +ani me +slo vak +âŀĸ âŀĸ +cla us +lem me +cli ppers +re ls +arianag rande +r te +ko t +thal apathy +hungar ian +zu ma +y von +is u +jour neys +clin ics +be be +ww f +n ws +super heroes +er it +sle ague +identi fication +mo tto +ba i +sour ced +ill er +ap i +pri se +unprece dented +dam as +tuni sia +dra in +undere stim +e ther +quarter ly +rewar ding +al ham +wolver ine +cab ine +hyp no +nad ine +hav ana +da e +ðŁĵ Ī +dr on +read ings +b ati +pic o +mer ci +iti an +wal kers +el ope +mi key +god zilla +bur lington +abu ja +social ism +at ility +sh ell +harry potter +g no +ab ur +re leg +fel ici +ro gen +neuro science +inst in +ath am +vou chers +j arre +fu se +def ici +monte rey +de port +mid day +pp ard +fre ed +ame ter +wil t +n ingham +pr att +liber ty +slo gan +o to +pr i +co ated +c pd +ne tt +il las +mal awi +evol ve +accessi bility +ðŁĶ¥ðŁĶ¥ ðŁĶ¥ðŁĶ¥ +or nament +b p +el is +son line +chi ro +fl ick +ib m +ar ak +en ables +gar land +san e +cu ties +tri p +rotter dam +n ys +lam ps +lu cas +bo g +ra ils +travel led +hic ks +en u +sab ha +scru b +hi er +hart ford +fo o +fer nandez +tre vor +mat tress +appo intments +ale j +fe i +o logist +saf ar +oc ta +sr c +sha un +ambi ent +dri c +bi ker +she e +must ache +h ta +bo one +her ty +car dio +bra kes +rec ital +consi sts +overwhel med +cau l +robb ins +im it +al th +ur l +bi bli +on ne +black livesmatter +diffic ulties +tel ang +tall er +ðŁĵ Ĩ +deb ating +bur rito +mo vember +strength ening +bo e +te stam +mirac les +base ball +re nee +ðŁijī ðŁı» +al fa +âĺ ĺ +unstopp able +ec s +g mo +giftide as +path way +fen cing +ðŁİ ¤ +b ham +ra s +sk o +d led +thel ast +magn um +bin ary +wil de +wil der +wh ati +barbe cue +h ism +can oe +kur di +eli ve +advant ages +mad ame +bi er +mis sing +enter tain +air force +y ama +c is +hash tags +j is +ve il +dream y +ten se +may ward +ch ateau +hunt ington +âļ ĵ +v all +up on +bl ouse +dun es +ðŁĺ ´ +fert ility +m ole +curren cies +st u +ber lin +toa sted +div as +wal t +lar k +por a +hit ter +um er +chil led +bal ancing +fa is +y in +or tiz +east enders +h ate +ur al +ap ril +tim el +à ± +per o +sto cked +respec ts +th t +best friends +giving tuesday +be ad +inv ent +im i +nap les +comb ining +tok ens +thir st +ma sc +par rot +sp u +dent on +* -* +t res +subur ban +wid th +si ve +con tender +siri us +lo k +troop ers +outra ge +tur bo +frag ile +me ssed +do h +disc ord +netany ahu +re sign +forgi veness +mo han +mun ch +cam ou +identi fying +enab ling +hot ter +thorn ton +jai pur +ar ya +ðŁı» âĢįâĻĢï¸ı +mu staf +maj ors +o ke +du ffy +roh ing +til t +ðŁĩ®ðŁĩ ³ +rock star +she ep +hend rix +ra v +in vention +do u +lagun a +gru mpy +sw is +im pe +) ' +you ths +bun ker +st ache +oppo se +indi es +acceler ate +ml p +ed en +w ann +k ail +akshay kumar +su pt +pol ym +midd leton +extra ordin +wil son +australi an +alumini um +way ne +alum nus +mat ics +gri m +er nie +opp a +competit ors +rand all +h ence +decla res +pre aching +sha he +can e +sustain able +stap les +le dge +ad ena +doctor al +bur gundy +decor ate +ren dered +ri sen +pr ank +di or +bee thoven +flo or +ac com +to t +ho dg +touri sm +say in +objec tive +mar kers +premi ership +en abled +camou fla +gi ant +Ñ ģ +smo key +ric ket +pan g +de pending +s ation +evol ving +inter cep +cen sus +tof the +re en +mendo za +trum pet +marke ters +an it +ðŁĻ Ĭ +north western +v la +foto gra +blackand white +che wan +wi g +tro om +ginger bread +k n +ro mero +n fc +or chi +fun ko +sour ce +f s +ra ped +o st +tar ot +ann ually +ðŁĺ ¬ +r ill +del av +.. !! +se s +can n +medic are +ph el +ape x +guardi an +rema ined +r pm +a ñ +story month +instag ood +neighb our +p ing +sem ite +my stic +as cot +mat er +hand ful +dang ers +ti d +ana heim +opol y +sh allow +nami bia +tor ia +procu rement +big bang +announ cements +prosecu tor +beng als +sal le +en roll +ga stro +sugge stion +ba k +ha ul +budd hism +berni esanders +flu te +fati gue +cyn thia +cho i +ir win +gu a +str ous +h p +ba p +satisf ying +play a +ðŁİ ¼ +inst ap +al ice +t p +irri gation +ðŁĩ¬ðŁĩ § +in tric +clu es +ple x +sa x +he pat +dump ed +signific ance +by u +medic ation +pro v +tough est +corn ish +âŀ ľ +kel ley +u v +si zz +si bling +me st +di stor +diplom atic +aun tie +b hat +son ic +bren da +pump kins +ro ch +black burn +ur ged +shi a +arrange ments +floo d +sa unders +lec turer +nou ri +popul ations +diplom acy +consist ently +ðŁ¤ Ļ +t mund +cauli flower +l ily +vocab ulary +vari eties +coo ker +up town +qu ent +mo sa +re inde +velo city +spru ce +social medi +i ber +volun tary +proce ssed +bal tic +y ang +leban ese +d p +dol ly +arrange ment +y uri +cran berry +kal yan +elev ation +cli ff +pu shes +ìĬ ¤ +sil ic +co wx +eter nity +sla ves +vine gar +glou cester +con tained +breaking news +aga inst +renov ated +norm andy +hero in +ys m +mo ds +gre ek +un di +tren ch +v h +encoura ges +head ache +gr ange +: ' +ever green +Ù Ĭ +reck on +ab used +th ru +cho ice +ti dy +col der +scho ice +ha in +bru m +li ars +bre it +yor ker +sh ack +he idi +micha els +sco pic +fasci st +play ful +ca c +yas ss +sh ad +.. ? +qu en +ram irez +clif ton +pr s +best fan +âģ ł +gener ating +head set +disappo intment +abstr act +bo iled +paren thood +azerbai jan +exhib iting +bom bay +oli vier +ko so +un lea +mat ernity +iz er +si ves +r hu +col l +saskat chewan +fre akin +de k +na g +stab ili +ðŁį ķ +organi zer +bo sses +ar u +u va +at able +ta un +after wards +fert ili +ver ge +az i +mor ph +๠ģภ+jer k +cosme tic +ko w +stru st +ap ache +post cards +for mul +ì ĭ +spin al +jack pot +elec tri +Ã Ń +lo y +gra der +diab lo +ar di +he sit +f w +arch ery +pa sh +the ories +repe al +re live +per cy +âĺ Ĩ +im in +syn chron +sham poo +coup ons +o to +la i +thou ght +luxembour g +mo v +ðŁĺ ¥ +ge mma +se ated +m ga +strat ford +un certainty +shi fts +est o +fo ol +fire arms +cor rie +ki ki +appa rent +p ills +olym pia +fi d +elev ated +de cks +ignor ing +av alan +ro v +whist le +p tsd +milit ants +robo tic +pac ers +quil t +bankrupt cy +lic h +per cussion +celebr ity +al s +( ; +su t +pokemon go +h g +off s +gibr altar +scre ams +billi e +gen ome +mar in +be ams +arch bishop +em in +bedro oms +g ated +ol ly +warran ty +at own +cudd les +gun na +k ic +vi ve +cy mru +nar row +pro b +le o +refe rences +manufac tured +cho pper +brun swick +sem is +don ia +r ye +man o +hur ting +? # +hol li +investig ations +c els +ðŁĵ ŀ +le ster +temp les +sto rey +mc mahon +toi lets +wo of +ï¸ İ +le verage +at om +night mares +victor ious +haun ting +custom er +ag i +yo ongi +mon ty +ver onica +w ur +inti mid +blan kets +volu tion +j m +âĺ İ +am on +jud ith +ðŁĺİ ðŁĺİ +distr acted +dri p +hurric ane +and es +revel ation +tro op +ab leg +col lin +tibet an +wor rying +inter nationally +eat er +camero on +brad or +y uk +ðŁĴĹ ðŁĴĹ +tra k +slo pes +ci er +ne a +ol er +ta ka +albi on +volcan ic +am n +a fi +ob stac +face time +ger ing +n pr +metall ica +organ ic +ðŁĴ ¡ +ki dd +d ances +pemb ro +wash er +m its +om er +emo tionally +tan go +ip o +do cks +scan ning +spec s +tho m +the ology +emer gen +om i +g pa +selec tions +un necessary +ima ge +ter s +induc ed +gi gan +rent als +supp lied +m fa +shan kar +lat er +pa jam +cla ve +Ù ģ +ma hin +carl son +avi an +ano va +kati e +aj ith +design ated +chocol ates +investig ators +gla zed +prin cess +er ry +ra gn +ou rable +hr u +sun dance +peuge ot +steam punk +gh lin +gre ase +hi res +z ap +per ce +j ill +tom e +he hehe +joy ful +mae stro +ni shed +gene alo +v ich +p its +fox es +good man +emer son +lo bes +con verse +o ats +thom son +ra him +mal ware +ah i +man kind +re sin +im g +sw ood +kin der +sc roll +ar a +sak ura +ro bbed +xi on +ny a +c ism +ce dar +be in +mour ning +tor to +heath row +done gal +bar b +hydr ation +k or +elim ination +su pdates +hill s +appe ti +star red +ko m +gw en +dd d +cra y +sc anner +personal ised +seren ity +re design +meta ph +box ed +judg ment +no se +ë ¹ +er ad +ac ne +supp liers +ener getic +v om +as ap +ðŁĶ ¸ +ir vine +hat ch +la ss +ad ren +waff les +accur ately +ici o +itt le +se un +occup y +web cam +thene w +ent es +ga i +j w +accoun table +vis or +ir rit +licen sing +hudder sfield +gen ie +ðŁİ ¾ +atmo spheric +ten sions +spart an +clif ford +ol an +north bound +ame en +cen sor +u el +ster y +$ $ +far rell +hy ster +cl t +se dan +rep lied +descri bing +micro wave +sla b +pro sp +assi sting +ru bio +e than +hh hhh +gu ay +z man +ra ise +roll ing +o e +n ile +ambro se +scar borough +hero ic +coo ks +mor t +chop ra +ðŁĮ · +to b +shav ing +stac ey +dor m +motor sports +wi ki +fol ds +sp iced +stress ful +liter al +fu dge +pe ggy +wa ite +tre sses +se sh +pr ic +ðŁİ ħ +fri ght +r va +mumb ai +po m +tt v +cel lar +tom e +andro id +dor is +tsun ami +tin der +o ec +m wc +dor tmund +no thin +l iti +so u +believe in +at u +kno cks +mag ni +ss sss +ro hit +ine ws +ang i +m andy +ke ttle +intermedi ate +av ant +cur l +endor sed +ori o +ur t +consider ation +wi res +shel ters +b ino +vik ram +imple mented +ly dia +bu k +paro dy +c news +under graduate +canu cks +sam i +polit ically +ro tten +gh z +tex tiles +over load +moder ni +recre ational +fli r +bat on +typo graphy +ov ation +intrigu ing +pilgri mage +al ge +ad ays +tcm party +sp elled +cur ls +boo ze +ste m +ann es +ir ls +spon ge +sho pper +sig nation +bra ss +mi stress +le ah +beg inner +lau derdale +augu st +pre school +ta ping +tai pei +execu tives +b d +rhe tor +esc or +immun o +deeplear ning +stat ues +it us +manu script +ly ric +cor vette +mol ly +la ge +de p +cn bc +le st +je ssi +fi fe +griff ith +oppo sing +ran g +dr ills +respec tful +p ity +d ell +har ding +play boy +blo ke +shut out +k ili +o sp +se attle +bc poli +mis es +journ als +team ing +es ther +fre ddy +Ķ ï¸ı +metr ics +no tre +gar ry +for ty +navi gate +perio ds +bened ic +j id +da w +ance stors +restor ing +con g +aller gy +tit anium +c ence +lean ing +ab bas +v ast +uc f +roof ing +e man +seve rely +vo gue +ve au +in bound +d z +tane ously +stret ching +man chester +dr yer +dav is +kan th +the game +it ted +re tain +el les +conge stion +frat ernity +ol lie +lo ki +fre ely +cho o +pon y +sc ep +tab ly +bal t +rock n +di me +lo gging +ðŁį · +ad u +ha voc +water ford +char is +swee tie +run ning +ner d +erdo gan +z ara +weigh ing +fif ty +pre cise +low ell +kurdi stan +r yo +or th +syn th +lin ers +phenomen on +art illery +il legally +constru ct +nostal gic +gar th +al ta +shel ton +a sean +w ander +dur ban +di versi +bon o +cl on +le man +sh un +obstac les +appet ite +fe eder +respir atory +di xie +formu la +an to +so ber +extin ct +au c +ing les +legitim ate +; ; +min nie +ipsw ich +dram atically +ðŁijı ðŁı¼ +ingh am +milit ary +mon et +us navy +for k +dun no +play er +q otd +st oo +ex or +ethiop ian +film fest +pe red +c ate +sau di +in ner +sin cere +tion ality +ale e +de eds +cooper ative +ir onic +cro cod +br ary +post season +cam per +can ary +e in +exten sions +nb d +sher wood +spo kane +hu mp +jit su +ê ¹ +dar yl +p si +stab bed +offer ings +expe cts +cav al +body building +fr aming +f ca +ye arly +bom bed +sk il +resear ching +jud iciary +gree ted +tu dor +mil o +innov ate +ðŁĺ Ľ +r hs +ru by +contribu tor +fam er +soci ally +m lin +fi ery +ut ter +beau t +it os +de voted +rain bow +bar ney +pe ren +ar jun +r na +gab by +ut i +hann ity +pick le +ser v +qu akes +pp e +fe m +wh itec +j n +victor ies +ðŁ§ ¡ +gol fer +congratul ates +resul ting +mechan ic +ur ve +cen tered +kie v +an s +in cub +< < +c mo +bestfan army +dap h +en ham +on cology +ku sh +t xt +ori ented +fashion able +c sr +sa hara +r ack +pd p +han son +ภĩ +ti ers +ra r +pan am +in sky +sa hi +testam ent +asth ma +in her +fisher ies +or der +ho we +gall on +ep is +suz anne +drow ning +paneli sts +ðŁĺ ² +ë ¦ +al ach +commemor ative +at tribu +ðŁij » +mo o +visi onal +week sary +gu st +ak in +poin te +ee e +di spar +ni pp +dent al +st all +pi an +bor e +ul ster +tic k +ir r +tae hyung +micro phone +bermu da +ga ard +el er +plumb ing +hu gely +âļ« ï¸ı +race way +cam bridge +mar cel +burn ley +to ast +holly wood +fa sting +me red +hib ition +ca pped +benef icial +ow ning +cont amin +arab ian +to on +cap ac +hul u +sm ir +nutri ents +se in +graph s +con ditional +ðŁij ħ +or ac +play in +nor the +tor nad +mar ian +ju mbo +lex i +incredible india +road to +uk one +confu sing +sp h +shan k +pi ed +mq m +positi vely +sher ry +path ways +consi ders +tof u +argu ments +resil ient +che tt +with dra +ter o +ated ly +sw ana +he b +fli ght +har ley +decre ase +kind le +book shop +³ ï¸ı +marty rs +sm ur +mc cl +concer to +sti me +rejo ice +app lau +cle ment +mer kel +jai me +im mortal +isle of +mar co +youtu ber +stal king +me too +st ack +sp ouse +u st +lu v +âļ¾ ï¸ı +eque strian +ev ing +fl in +nick name +the big +as ar +st acks +wal ker +bor a +kidnapp ed +hur ling +humb old +rec alls +co pper +ann is +se o +mer ger +mu ir +ad dy +ðŁĴª ðŁĴª +be x +cr acy +con an +congratul ation +mid st +âĻ ¬ +for bi +op tic +cr ate +crocod ile +mad agas +secur ing +ast on +o gue +savi or +salis bury +love it +fuji film +cast les +as st +ar rows +sp acious +tr s +poly vore +progre ssion +m ri +nel son +bi m +indic ator +o da +pe pe +re signation +gu t +sne aker +log ically +az y +are lla +te aring +jo shi +ssion ism +q pr +mari ah +p x +ble ed +mi an +med ley +we iss +ker ry +gat ory +at al +madi son +av enger +nab y +pl and +gi les +fresh water +d ington +ta j +demonstr ates +n tv +bul bs +sunday morning +pe ake +souven ir +wa h +ton nes +m kt +complex ity +con den +ross i +b ing +y ds +su k +n go +mid land +ol y +life is +ri pple +mo reno +dd ers +tu s +á ĥ +bou l +x a +hol dings +wn y +shadowhun ters +ke i +asp ire +m ous +ow en +so ak +skir ts +moun taine +stor ming +ch rome +ri ots +sar ato +amaz e +less ness +nav ar +crit eria +ra fa +indul ge +ay er +por to +nam o +........ ........ +yi elds +val le +j h +mac ron +sa ins +dur ant +tra ilers +wo t +confeder ate +sh rin +id ol +form ally +ten e +motor cycles +than g +no de +bang er +dal y +p ats +enroll ment +au ctions +at al +ar bor +lo gos +de arest +trans action +dom ingo +fle a +ser mon +de ck +sin cere +questi oning +juli o +was p +pre tz +armen ian +k ham +inflam mation +picture sque +acci dental +film makers +ðŁĺ ļ +ðŁĴ į +ca sey +so b +yee zy +good will +parag ra +ss ly +fe ather +dy ed +assassin ation +na de +b cs +app lies +femin ine +fe u +ext ent +depu ties +l ack +psy chic +go i +kill ings +pse u +ðŁ¤ ª +un c +mar l +tan e +mck enna +sur fer +influ ences +free way +hack ney +mal aria +el and +te au +rema stered +Ø ± +raz or +gg y +cor ro +lak sh +fla ir +honest y +hoor ay +de pp +am c +wedne sdays +q a +ed its +- $ +se villa +dou bled +human ities +c cot +som os +r ine +af a +si oux +re construction +wel ding +th reads +am ish +encoura gement +po der +bo ck +bal m +p tions +stand up +accompli shments +guar ding +convic tion +ac ion +napo leon +depic ting +att ack +su i +wear able +âĸª ï¸ı +pot ter +esc ort +vis e +to ts +bo on +event profs +angu lar +womenshi storymonth +bar row +sch i +ac comp +ti k +l end +kensing ton +wol fe +st acked +cra shing +exhi bit +wing ed +sab rina +ma sa +k ms +alway s +et t +pla sma +counsel ing +pick les +nfl draft +mr s +inev itable +coura geous +staf ford +writers life +ho s +e j +gh yun +trade mark +adri an +influen cer +coron ation +ra ging +explo red +usa f +excep tion +eu x +tan ker +sw ami +pac ket +ðŁij¨ âĢį +f en +she en +a ero +j l +re gal +nw t +au ster +meh ta +char ge +a ste +b ate +inf eld +racec ourse +collap sed +fle ece +z il +al lie +alternati ves +geor ges +ðŁĵ į +quir ky +fc b +nat geo +philanthro py +bra i +every day +ðŁIJ ° +ach ers +ja an +fin es +q i +fisher man +distin ct +gri mes +nation alist +comm ence +ro wn +âĢ ³ +z ing +f ter +hr w +baro que +bl ender +kitt y +hoo ks +c ited +w anda +consen sus +reinde er +an and +supp ly +me ds +v n +ol ph +rat chet +shel don +secur ities +ë°© íĥ +cro m +mosqu ito +j eric +im mac +dimen sions +â ¤ +di ssi +sponge bob +dami en +steven son +jo anne +del ish +yi kes +than x +surve ys +postpon ed +alco holic +al ised +ðŁĻı ðŁı» +do ch +sen tim +mered ith +com pares +b ago +happy days +mo ss +ãħ ĭ +ne c +gn ment +frustr ated +comb in +ri v +ec lec +col lo +compli ment +actor slife +ct to +nic ar +op hon +apar the +man t +ja de +trol ley +optimi zation +eye on +eco logical +qui st +ep he +ॠĩ +cin co +appo ints +old school +c pr +behavi oral +min aj +:- ( +tag ging +ev al +jo aqu +ðŁĺ « +ha k +de me +jama ican +so s +hy att +hand book +libr arian +hanni bal +pump ing +ch om +f man +ga i +hu ll +respon ders +green ville +n us +vau gh +ðŁİī ðŁİī +ta xi +gold berg +man tra +te ase +forbi dden +metho dist +ati vity +* *** +ec t +mc gr +Ħ ëĭ +se b +amid st +disapp ear +thy ro +phili ps +er ina +v icious +stream er +million aire +ma p +str ick +hack athon +gh a +ed ic +mi ka +pe ck +ill i +anto ine +ar ca +op tic +ma ure +ðŁĩ¦ ðŁĩº +cla shes +man ly +âĺ ģ +al var +and res +me i +el m +ww ww +al tered +l te +ê¹ Ģ +mo jo +for rest +thal ai +non t +spee ches +acknow ledge +ign ite +x factor +ðŁ¥ Ĥ +mead ow +disru pt +debu ted +scrim mage +pharmaceu tical +fi dd +found ations +philosop her +et al +publi shers +bo ys +c ke +ru gged +opti mism +re be +phil harmon +nar cis +ral lies +lu is +go blue +fol ded +un acceptable +optim al +li sa +pol aro ++ . +en za +âĿ £ï¸ı +mon opoly +grace ful +dair y +du a +diffic ulty +judge ment +o si +mer sey +flu x +new found +ter ns +dimen sional +in vic +al ba +am it +abudha bi +alger ia +autom obile +the ad +lo tion +acceler ator +vac ant +iti on +lu f +al ic +pl l +bla zing +ba z +sen e +ðŁij ¼ +villa ins +direc tory +eis en +to ck +broch ure +ri pp +hb d +zayn malik +nic he +lo lol +certific ates +mor se +fac up +x ham +un wanted +im ports +carne gie +fan sign +mo u +r alph +destroy er +sw ing +trek king +cili ation +pit bull +g aps +ho well +defin itive +mc le +f ps +et z +bol ly +lyn n +gan o +at ure +fur suit +co il +na v +but ts +tro jans +eu re +en ko +sch umer +horri fic +install ment +br b +subur bs +a bel +vi r +de sh +cun ningham +ðŁIJ » +span n +sch we +ke mp +tr u +ste alth +qu es +le w +deli ghts +ko ch +hu mili +cr iti +il t +sp ells +mi ley +car ic +ðŁį ´ +lc fc +substitu te +oun g +? !! +af fir +predic table +class of +er r +cy press +chand ra +age ing +__ __ +ther land +don caster +el in +yo shi +sail ors +har ris +jo anna +niger ians +h ers +pla gue +pro cra +k no +can ton +busine s +un h +pra kash +c in +bow en +co ating +m als +be gging +smith son +ponti ac +sp ies +dam ian +pl ine +und ant +al ta +one ss +shame less +da q +bb m +wal es +stam pede +ser um +Ù Ĩ +cataly st +x n +ab sc +free zer +ch un +ari os +mc cre +fore head +he ars +damas cus +tac oma +ardu ino +encoun ters +stan ton +lg b +ab as +" .. +ke te +drac ula +ele m +g ne +zepp elin +la brador +pul p +op tional +or n +russi ans +san itation +hil ary +etsym ntt +pen alties +au st +ig ans +olympi an +medic aid +vers ace +va pe +re stra +pe ep +sexi est +st alls +di le +the a +punjab i +pupp y +tuesday motivation +ðŁĵ ļ +the flash +roc ket +mo dest +chihu ahu +on na +k sa +hur dles +ca ve +fail ures +sp lit +bo ho +gur l +disappo int +ho ward +nug get +fran z +stal ert +kaz akh +for getting +sch ri +ag ate +am at +eve rett +du et +veter inary +juli an +ch ills +bra ve +ghost busters +lan do +gre ets +profit able +d é +ti r +ze e +om en +pd x +gray son +har i +fix es +stab bing +swim mer +symb ols +compli ments +po se +func tioning +th nx +gi r +corpor ations +bar low +lo e +off season +distin ctive +marvel ous +nik on +enri que +ky u +ja ws +amo to +lom bar +travel blogger +fa h +ouri sm +tri stan +so e +ce ase +ðŁı ħ +z ac +mck enzie +taxpay ers +swim suit +bl o +les ley +kan sas +w ks +ki el +provo king +my les +str ing +kangar oo +galac tic +fif th +s ke +we ir +ll is +mat ory +ðŁĩ ¿ +un ci +re productive +roo ting +ti des +gad get +.... ...... +alex ander +bow ler +scre w +apo log +eri ka +wal ters +shet ty +lan e +ban ter +as ant +me so +v ain +" "" +us i +fer din +accomp lish +man sfield +bom bar +collabor ating +cla p +it ure +s da +smo ky +na k +im person +car la +com ra +bur gl +lo co +ti es +in hi +trac ey +se is +diss er +rr rr +dra y +prote ct +cor ona +hun ger +ck en +c eli +trou bled +predat ors +fic tional +shav ed +riche st +metab oli +ful ham +gro oming +mono chrome +wa sting +as co +ast e +ti sta +remedi es +ung soo +south end +perman ently +bu mble +procra stin +ident ical +practic ally +ma scul +su ke +assu red +val erie +devi ant +grizz lies +thi er +pur a +ne pal +not ts +bil ateral +spo il +car mel +cine matic +ph l +ni fty +ma o +hypo cri +la ser +pan try +mathemat ical +el isa +coordin ation +bel mont +a it +radi ant +bo iler +man g +f ag +cr c +h ams +br in +â¬ĩ ï¸ı +famil ia +âĿ £ +sab er +ru pert +gg an +rit z +mic h +sal ford +le vi +gra l +ðŁĴ ¤ +n ino +ce d +business man +ul tr +sim ply +compre ssion +pa ins +hal t +ë°©íĥ Ħ +landsc aping +n f +croo ked +er d +itt in +ddle ston +sur passed +ino a +da g +bl en +exten ding +at ing +al gae +ball er +u mar +snoo ker +col lu +flo wn +thu b +ridic ulously +ki sh +op le +di re +as ser +ari sto +sc iss +h ating +trou ble +syl via +suc cul +plo ts +sincere ly +al er +laure ate +br ack +att n +rif les +me to +collec tible +cu omo +conte stant +consist ency +ant z +rang es +abig ail +de b +mini ster +grow ers +an oo +hoo ver +dream er +nu cle +resear ch +mi y +sha hid +ma v +d honi +cin i +do j +hin dus +part ying +dal i +alon so +inform al +clark son +it ton +ki an +cit yo +mor i +la sted +as pen +libr ary +susp ici +qu at +den ial +fol der +ch ori +swee ping +eni x +ðŁį Ĥ +Ø Ń +nas car +handmade hour +mou l +heat wave +em er +exam ine +ib n +gr ind +po v +tion ist +m bo +she ila +integr ate +om es +take away +cer v +con nie +tic ket +ce led +bi en +visu ally +madagas car +sor ry +gu i +park run +tra its +la be +pois oning +ॠĢ +vi able +bohemi an +denti stry +bad os +spr outs +mask ed +te ddy +ðŁĺ · +sa f +sa as +ji ang +ti ght +spe aker +withdra wal +bc n +as signed +class rooms +fle ming +ðŁĴ « +super girl +tot als +table top +e books +horizon tal +cra z +flu sh +j ard +c dc +er son +ãħ ł +green wood +ni h +co x +ad a +lit re +go ing +v icky +cur ved +lou ie +gra ins +hy e +lon ge +reme dy +tra inee +san jay +super stars +ma ser +man u +s age +wh l +ðŁĺĤ ðŁĺŃ +ðŁijį ðŁı» +m sd +en z +rab hu +j oo +gh u +ac er +e po +resurrec tion +justice for +bl ended +mo da +avalan che +france sco +re spective +g s +ye ast +wel ch +devo tion +ge tin +athe ism +am ic +carol yn +lo c +ld nont +ave c +us da +le gged +bra very +b lower +cow boy +he h +sti ble +buff al +chann el +run chat +âĺķ ï¸ı +ide ology +best seller +y oo +pe anu +bon ne +fel ic +edi son +fr actu +naren dra +pp ets +seym our +ri viera +he ctor +necess arily +bi anca +soci eties +the best +w g +sent ences +win k +vacc ines +pal ooza +jam ming +as f +mp us +agre ements +ec k +ba c +hon ore +com pul +wild cat +im posed +yo ga +hud son +can celed +l ich +fu zzy +es que +ch uk +w vu +se k +fli pping +r hon +wi shed +wh a +cap ability +len ovo +ìĨĮëħ Ħëĭ +vi vo +tv d +nor a +sil k +pas adena +yo semite +valu ation +clo cks +u ber +mr c +dar kest +au bre +ss o +bell y +wrest lers +kill in +lou der +buck ley +ge el +ad on +un s +appe aling +ðŁij ¯ +semit ism +list ens +fit z +ãĥ³ ãĥ +ny lon +ar ty +seem ingly +hal a +su ited +et y +she ds +mu ffins +ap ric +um ents +u ta +jam mu +chelse afc +star z +yo ko +roo t +clean sing +di ar +pione ering +ihear tradio +dig iti +fin dyour +can o +ðŁĴ İ +z ol +spac ecraft +six ers +moi sturi +b ile +ti sts +hor ton +rang ing +colum bi +mete oro +senti ment +ep l +foo th +text book +drain age +r ly +sc ue +imran khan +ðŁĴ ¸ +margar ita +ed dy +predic ts +gamer gate +advis e +growth hacking +love you +ug and +v f +beng hazi +s later +ne wor +ch el +independence day +p np +cul len +hoo dies +num bered +brit t +t sa +kl tu +s ages +mom o +onep lus +col l +gu ts +w ta +mesm eri +enh ancing +chiro prac +j is +teen agers +m one +constell ation +sweep stakes +e ze +slovak ia +la ye +pear ce +wa ver +po gba +k ron +sur geons +mar x +ti d +gg a +desc end +p ours +upri sing +wal la +sab bath +bachel ore +mack in +k am +peter borough +hor a +ðŁĮŁ ðŁĮŁ +think big +r j +hy drau +sp al +univers it +ðŁı ī +mail online +league of +ten ants +w ally +lan ce +heav ens +dd r +bol ts +am ir +i phone +ci gar +en du +re i +el abor +r inging +john son +characteri stics +sal oon +algori thms +tal kin +m tn +di ve +region als +ff ice +hat i +deviant art +so tto +shir o +l ama +k we +f aded +por ting +tu mmy +est ates +buen os +ðŁ¦ ģ +beli ever +pen etr +dar n +sp ite +can opy +fashi oni +t illa +pet als +eli jah +bra wl +marty r +ë°©íĥĦ ìĨĮëħĦëĭ +mid town +eric h +d apper +sm town +me gam +ww w +le le +on s +cat fish +fir th +fossil friday +ball park +th aw +pot ent +illi e +cre ep +car p +so ap +gun dam +infe c +yy yyy +ठ¨ +z ag +rit t +calcu lator +bo ca +ok o +to ad +threat en +refin ed +olym pic +accompli shment +bacter ial +a ji +tat um +feli z +she ed +j at +th ic +jam al +ðĿ ĺ +lin a +ðŁIJ ¯ +jo king +yot po +pin ch +ak ron +her b +motiv ation +li a +ho stage +cre ek +gam ble +russ ell +patt i +fo tos +c pc +bro ken +back the +cla ys +u mm +stock ton +mat ernal +ü r +la kel +cent ury +be k +infe cted +ภ¡ +smack down +man ned +ta hoe +sm es +bas a +su la +augu sta +. * +rohing ya +gre ed +counsel or +silhou ette +gra vit +cla use +' - +bo bc +occa sions +now adays +dic tat +be ard +n ally +brigh test +kab ul +inc india +dhan ush +archae ological +che ape +mizz ou +d hi +ov ski +bax ter +asse mble +à ¢ +gi gi +ac am +wis ely +haz ard +north ampton +âľĪ ï¸ı +me th +bla sting +re unite +mu lus +ali zes +t read +mil a +ed ward +ko va +pe sto +ðŁij ¶ +vit z +hydrau lic +refurbi shed +mo tel +isab ella +hom me +sever ance +uph ol +mis erable +f ari +lat ter +ef er +crack ers +es l +ac io +yy j +in an +ec b +z ind +pan as +tru cking +re ed +sh aker +burge ss +em pire +ag nes +n ington +art works +fr s +ti le +bi ome +eu n +ch ong +americ ana +god father +go blin +i shi +! ). +temp ted +gen omics +mand ate +ck y +ðŁĴĻ ðŁĴĽ +som ali +br andy +in ven +spoke sperson +pc b +yu an +h g +fa z +starwar s +ro wan +blue grass +don g +d day +trin idad +er ton +ban ning +re tention +cu red +tober fest +re set +we is +deta ched +behindthe scenes +immun ity +ph a +bra y +ðŁij ½ +ran cho +ram say +est onia +nd tv +] . +cab aret +tar o +d v +show cases +plu m +ðŁij ¸ +son oma +pre pa +memor ab +e stu +drive way +u les +magn us +x r +nn n +much as +en ge +stre amed +fore stry +audio book +tro y +reck less +kil om +ru ler +ra k +proce ssion +i ons +po ole +noc tur +wh s +farm house +per a +par me +hypocri sy +s ics +v ant +cas k +holi stic +au st +Ð ¿ +in do +ðŁij© âĢį +di so +disp atch +ol sen +make it +en nis +cent re +ar range +ðŁĮ ¼ +sal ted +ea siest +f ate +reg atta +mo zz +ac an +sin i +g ically +ch ops +chick en +work in +ha gg +invol ve +wee ds +book day +wake up +ky r +michel in +fu ss +re juven +vac ancies +incar cer +m st +sc ents +sovere ign +kick er +à § +bo d +âĢĶ > +sa h +mob il +shrop shire +oph one +dress er +mis suni +hep burn +i mo +foli age +diagno stic +as san +cycl ing +guil t +c sa +puertor ico +win elover +wake field +do ggy +k he +pa pp +co g +al lot +cu ck +poe tic +mi o +re vit +mag ician +ç ¥ +ant enna +west wood +mber g +lux e +oat meal +Ø ¬ +te at +ffe e +sear ches +l ly +plu to +el on +let tering +inno cence +fa i +ann on +telang ana +ma it +neu ral +can ni +ar oma +a stor +fe x +co cac +mon etary +f ent +un sure +' @ +indi rec +teh ran +isol ation +li bs +make up +merce des +ff y +he tero +de o +sco m +cur sed +veteran sday +franken stein +shre ws +de co +ge ese +lefto ver +ha did +vari able +acade mics +carol in +under going +vari ation +na h +ssi er +gamer sunite +pur suing +emer ged +ll ers +control ling +ro aring +mete or +vol t +daw gs +be aver +is life +bathro oms +aci onal +pre vent +lake district +in als +y ani +gra bbing +sac ks +le z +sw ay +k ool +time s +klo pp +la de +con cord +resul ted +revi ve +recon ciliation +ol and +az z +gir o +mand arin +de en +nutriti onal +is coming +van i +aw www +der ived +love your +stop the +shou ting +nov ak +ðŁĻĮ ðŁı¾ +lo af +displa ying +sunday with +ma guire +ch eri +ðŁı Ł +re match +qu ic +Ú © +y in +ðŁĺ ¹ +ili ve +z ip +our ke +down loads +sw at +missi ss +care rs +t ment +proper ty +hahahaha haha +gi bbs +sur rey +ar ise +tic ism +sti a +ir ling +fro g +co se +bas sist +fore ig +lea u +pil lows +hol la +eli e +disclo sure +peanu ts +inte ch +ww c +plun ge +trium ph +cor i +sli ppers +ðŁĻı ðŁĻı +neutr ality +ma re +hair y +gang ster +hu mming +cust ard +mer lin +ale a +s by +dam p +mo han +ver bal +j st +gu tted +b jor +un finished +ðŁĩ¯ðŁĩ µ +un happy +âļ« ï¸ı +by pass +at su +fis cher +sa v +afric ans +re use +mid way +demo lished +ger rard +her cules +Ä Ł +medic ines +cl icking +sur round +jo ong +wav ing +tri bes +wet lands +offici el +argu ing +l le +do va +su zy +club house +ne gro +ob tain +ga o +gl ance +assi st +ch os +ãĤ ¢ +âĺ ķ +adri d +occur s +st ans +par don +livel i +emplo yed +re visit +ff xiv +bb le +ne aring +min er +ðŁĺ ¹ +giov anni +up to +mar vell +mar se +to wels +cb n +engine ered +y elling +spart an +si ans +ðŁĻĮ ðŁı¼ +se v +coyo te +sta di +t cm +app en +shenan igans +open access +so aked +ma squ +le vine +stro kes +l k +aparthe id +hipho p +char don +may may +ha asan +stri pped +fr o +scri ption +f ton +h f +pri sons +marsh al +ķ ãĤ +an cho +com promise +classi fication +buzz feed +bblo ggers +deser ving +) / +s way +ob o +camp ers +poder nfamily +p oured +bri e +squir rels +se ize +: # +le k +ti mb +st acy +nas daq +repe atedly +br at +mi ghty +competit or +mah one +de si +o ke +bm w +shi e +f cb +cheape st +minim alist +par amount +n ate +har as +insan ity +lat eral +ment ality +mo zam +ta pped +yad av +u sp +b way +the od +bil t +ra ids +em press +adap ted +pat ron +nut shell +ag ra +be aded +sundaywith marsha +vi king +proce ed +main tained +thinkbig sundaywithmarsha +sn es +mus ica +to wer +ch ab +bo k +sm t +insul t +harve sting +windo w +ru ther +be ige +dec al +indic ate +ma iling +ri ft +po le +ander son +ch oral +sp ride +l ili +ev elyn +imrankhan pti +.... " +ke red +un dp +water falls +se ars +le mans +world series +ri el +ani e +app ar +score rs +lam p +a than +phys icians +qu inoa +refu sing +vu itton +unle ash +s la +pat i +shou ts +inten tions +fo amed +europe an +neighbor hoods +me er +man son +du h +br at +con es +bow l +kazakh stan +ठ¿ +in appropriate +del hi +ketch up +ful ton +s ys +consul t +gar field +to go +f ml +f led +b ds +facilit ate +ree bok +selfi e +elev ate +activ ate +bi ble +ca wx +b ys +cam ille +sy ou +sk ool +her t +w bc +ple dges +recor der +po sh +ac re +so aking +mat il +v sco +shoot ings +pla r +e con +ðŁĻĮ ðŁı» +rashi d +u bi +ðŁ¤ ¤ +sw inging +wi pe +rap tor +m su +music video +dur ham +at tic +apar ty +fe tus +activ ation +aa z +motiv ate +ðŁĴķ ðŁĴķðŁĴķ +j al +ठ® +ag on +sche er +stal ker +fo ster +az zo +tele gram +vi gor +s laugh +screen shots +entrepre neu +kri stin +inten tion +ch illi +fr action +don a +ge a +tc u +s ite +la k +em il +d nt +bor o +wil kinson +re cu +ato day +t anya +bl anco +cd n +brilli antly +g cc +ac c +evacu ated +ther ine +den ny +cait lin +she pard +pou ch +hand held +sou theastern +ha a +à ´ +re solutions +led ger +sr in +r ar +shat tered +chim ney +im with +mete or +hand led +ra ke +town send +en han +shi py +duc t +tw x +inflam matory +war hammer +theat rical +gro s +sk ar +sco tty +ni el +tit o +tin i +conne ction +_ . +goldeng lobes +sha q +ðŁı ³ï¸ı +hall way +fron ts +effec tiveness +gla ston +d hs +ex pi +to h +c pl +sc s +re o +ha g +resemb lance +hor an +abu sive +qu er +virtu e +cho lester +a q +shan e +m ce +carri ers +di stress +re wind + ¡ +voo doo +int act +ann o +ðŁĺ ¤ +pi led +adi a +ãĥ ³ +en ow +di gs +light ly +goo fy +turb ine +governor s +con te +re open +pa h +i ve +cra fting +swee ps +jo di +an de +zu cker +kaw aii +o ko +v ai +out line +kri sti +ts n +insp o +qu int +fil thy +lyn ne +listen ers +depar ting +or d +t weed +, & +ale k +sel fish +nor ther +recogni zes +i ps +be s +a ed +w ills +pe at +surround ings +mon uments +ais le +be cker +la v +quant ity +v ah +helicop ters +tu cked +alv arez +sha pe +o bey +ad diti +road side +m ite +bl ers +ep age +j au +ignor ant +b ins +lu lu +x o +c fo +ee eee +apprentice ship +shef fiel +to i +ho k +faken ews +deplo y +aid an +husk ers +ãĢ İ +west brook +mi ster +confi gur +car r +fic a +proceed ings +ha w +ste ak +mur derer +pay day +a jo +p vc +don ates +bi af +nom nom +be it +k ali +x rp +ahmed abad +se mic +che y +x tra +an twer +head lining +squ ares +roun ded +flu ore +bol d +disa sters +am oo +gener ic +cran es +brief ly +gi g +auster ity +anticip ation +for ti +treas urer +cann y +ce cil +dete cted +check list +ภ§ +pam ela +bar bados +an field +hear ty +tx lege +peren ni +arro g +ing ram +âĹ ı +ty ne +spo on +r ation +am ba +m be +cam el +h hs +york shire +reflec tive +fre aks +to k +ju do +partic les +du bs +ban jo +accred itation +prover bs +over dose +inte gral +gu ang +mc s +super car +af b +al vin +ail s +x tre +st aging +tw ent +rabb its +mar o +inste m +dol l +cr ay +sant ana +ble ach +mini ons +che ap +man t +di vers +catal onia +lo is +mat ri +cou gar +kay ak +e gre +p so +a ia +å ® +char lton +tr acked +sc ari +pe tt +f wd +x in +gra vel +br ic +bigg boss +ar den +hu gging +pal ms +st v +li mb +the movie +handic ap +ri me +z ai +stu b +indi a +lithu ania +rhy th +p ita +maced onia +high ered +brid get +schwar z +ske let +hi kes +ant arctic +c ps +mash up +Ð ° +n ell +chand ra +he ir +an us +sher idan +mi mi +muse u +bec ca +an ir +bar rie +dioce se +compar able +ðŁı³ï¸ı âĢį +yuk on +me p +hor mon +mer ic +al f +con quered +christ church +ðŁĴĻ ðŁĴĻ +hazard ous +poo h +cont ing +retro spective +par ame +na ir +con sor +ho tra +astoni shing +cater pillar +u man +ti sm +t vs +serv ic +croy don +mor ales +c g +cu m +te ur +scan ada +s all +magno lia +el ise +th our +à® ¿ +ag omez +phel ps +ë°©íĥĦìĨĮëħĦëĭ ¨ +wh os +weav ing +si sd +pro poses +cro ws +pre sale +econom ies +bernar do +sha hid +air show +mc cann +hor ticul +nr l +du el +mongo lia +tou lou +requi rement +struc tured +ed i +o lives +he a +cu ter +Ð º +enthusi ast +harri et +domin ion +sub mer +ðŁį ĥ +sa ab +nes burg +mo ff +def ended +bur t +rewar ded +gold man +op tics +khali d +house holds +buc kets +ce cil +che ss +substan tial +ef l +oper ation +evalu ate +st n +rece ssion +l ll +tom as +tru ths +ak bar +s words +p act +embarra ss +ha o +ay urve +scrip ture +ny cc +op t +di ameter +sc ented +organi zers +re lat +ha e +dream ers +de se +ðŁĮ » +restric ted +n ale +r hp +dol an +mun ster +ha ired +consult ants +jo ints +hu mil +d ill +relent less +t é +af il +ut ilities +japan ese +condem n +pet ite +colli de +q f +peach es +cou rier +l ore +âĺİ ï¸ı +reli ability +ch uk +ðŁĻ ĥ +stu res +ge ther +ho stel +bi er +- _- +â ĩ +e ze +ta ilo +di ent +blu ff +chu ffed +pil ip +mon arch +e em +bu chan +b ick +op au +ku ps +ภ¢ +pist ons +sp ins +m and +ce st +bur ne +v ile +cher ries +bec kett +need les +pan ch +ë Ĥ +haha h +trou bles +insi sts +do you +g mc +mor tar +deleg ate +in n +g anda +sin atra +ठ¤ +spee ding +pu pil +pre mises +ali gnment +pi kach +as us +j alan +Ø µ +lime stone +fol kl +parme san +ce il +mo y +shawn mendes +ac up +hu st +ot es +med ina +ma di +gta v +censor ship +ar g +swe eney +sy kes +col o +foot steps +cann ed +adv ance +gta online +healthy living +ðŁį ¾ +a ig +p ality +oc s +he brew +im minent +berk shire +jeremi ah +out going +bak er +entr ata +ma ids +gro ves +bo c +a del +m fw +con science +arm ys +nut ella +conte stalert +novel ist +la h +ban ker +marque z +ðŁı ¡ +to ff +out age +gr p +ðŁĺŃðŁĺŃ ðŁĺŃðŁĺŃ +musc le +du dley +nvi dia +mi di +m uni +ess ays +dat ac +car ter +ภ£ +t ans +i ves +public ations +al er +ok wx +il u +cu tt +har p +out law +luther an +br ill +bo lic +do well +green land +be sties +path i +pay ton +gue st +har den +ðŁ¤ © +ann ed +evacu ation +po ised +mc der +b han +o i +envel ope +ci d +ca vi +ta pas +book review +grey hound +âĻ ª +fe ud +lun gs +for te +rai der +ff er +oni x +dep end +yn wa +rel ating +de vs +ðŁĴ IJ +acqui res +d ha +j yo +priv ati +can ine +k b +cra b +sar din +imag ining +k j +em por +down hill +ne z +ta eyeon +nick imin +gb p +à µ +w ap +sec co +ma shed +ðŁĴ¥ ðŁĴ¥ +augu stine +diss ol +dic tator +â ĵ +vi per +ed fringe +vau x +hard work +book let +no x +chi ff +ðŁĴ ¨ +observ ations +xbox one +u sher +ke er +lu p +dal las +cal gary +ma dra +di ous +k bs +wood ward +hero ine +lu mber +sea world +o ws +mc ke +maver ick +gu la +cross roads +fan g +s ade +nik ol +chee tah +me c +pp g +er ick +ðŁİ µ +tox ic +bj j +viol a +sp ire +ch ino +tra vis +institu tional +ha as +low ry +w ac +ea e +hu mid +mp ton +ru ck +je w +c ine +zim mer +se f +bhar at +fre es +aam ir +ðŁĴ ħ +z inc +wan e +multi player +royal wedding +e el +preci pit +qu ery +kimber ly +isa bel +ful fill +ig an +vau l +pan e +sc y +dig it +gun n +u tah +dog day +fi on +xia omi +da c +el ast +cha vez +ro blo +g ine +ten th +ab h +ke to +hur dle +na dia +memorab ilia +ha bs +qu an +h w +hv ac +pix ar +ec cle +kram er +accu ses +ðŁĴļ ðŁĴļ +per se +mean time +wa hl +atle tico +âĢ¢âĢ¢ âĢ¢âĢ¢ +ott oman +no vo +k us +conne cted +tru sts +d mv +spen cer +rahu lg +do ve +sto kes +bolog na +enthusi asts +à ª +rockstar games +ted cruz +du ras +s acked +late x +immer sive +cer t +lu cin +princi pals +fa res +sa ils +far n +am ent +saf fron +quent in +check point +fer ris +ex cur +ðŁijī ðŁı¼ +bai ley +se h +ter re +mad am +s band +wan derers +cumber batch +yy c +digit ally +blackandwhite photography +roll in +moroc can +ðŁĮ ħ +din ner +d well +to om +m ye +ez ra +cp fc +war hol +me er +jon ah +no aa +s gate +so on +secu lar +g ating +ti o +dri ver +si ssy +assan ge +ta th +ed mund +bobc ats +ra ji +po stage +stu ds +m gm +kat o +edin burgh +meet the +shir t +fa a +mens fashion +sp reads +wi m +car ts +phoe be +j ars +bot swana +Ù Ĥ +ed war +sk ar +ri ve +gu sty +c tv +ferdin and +su therland +nickimin aj +k v +si us +bee ch +re z +desi res +on ial +camp o +quar ry +lor raine +gil more +ig gy +µ ï¸ı +ho pping +avi z +ðŁĮ º +uni sex +dedic ate +att itudes +ste er +jun kie +rail way +y b +whi sper +key an +k us +ju g +di x +a ins +sum mon +ov ich +sy ed +her ald +ma ison +me ded +wild flower +main land +ri sky +ru kh +over looked +ki c +destro ys +nam an +ki p +z ano +champion sleague +ban dit +quin cy +smi le +cal vin +open ings +ta pp +ol ulu +spec tro +accred ited +ap k +pra ised +bar nett +pol len +premi ered +selen agomez +tou red +screen ings +uu u +mis o +en se +adam lambert +guel ph +har yana +hu tto +le ar +l tc +po ached +brex it +æ Ŀ +tt c +pa vement +mon gers +ro e +ad ers +ling ton +particip ant +ca red +ga il +y ates +lan tic +dash board +jo o +feli pe +ssi onist +bu m +s end +a eri +thu gs +luci fer +a he +dete ctor +fil ly +gas oline +ham per +hump day +the ta +the band +fore casts +o hhh +lo bb +hol l +cp u +az u +ad ar +hai ley +bu b +car t +quo ted +an archy +pan cre +twit art +al den +st ash +the less +or ni +belie bers +mor mon +partic le +avi ation +⬠Ĩ +webcam toy +sad dened +cru is +ham let +n ct +roll ins +marque e +saw yer +reli ance +a ura +di ec +soo thing +sig nings +ak is +à ³ +at kins +aer op +ðŁĮ ¿ +y ab +sh ari +con nol +du bbed +manufac ture +convin cing +feelthe bern +ra u +pu lit +on ec +gem stone +ur ging +bag u +ga h +aci ds +fi anc +zodi ac +sn oop +her rera +initi ated +ven ge +profess ors +pro di +stron ger +e mission +bb a +hal le +ta pp +haw an +wh im +compe ted +myr tle +ir port +cold play +ach e +ske p +m son +ss ic +calli graphy +swim mers +me y +pp c +thri ft +po c +re places +commu ter +âģ¦ âģ¦@ +go ers +lo gue +para dig +bas kets +sensiti vity +joh an +atl antis +& & +suit case +anxi ous +l h +str i +gal loway +stre ad +war den +gr ounded +ffici ency +li feat +reli c +disgu ise +island ers +f cofficial +classical music +b mc +en field +bi que +oak ley +bat man +sla ying +ner ves +mul tit +calci um +projec tor +scott sdale +ant ino +gri ps +kim mel +des mond +prote stors +hi atus +metaboli sm +conclu ded +press er +ti pping +sli de +e to +hun ting +aus open +ri k +pp ery +innov ators +pitch ers +ag ger +fun gi +z ad +proli fic +rockn roll +bl ames +ct ar +stam ford +q ad +mozz arella +insan ely +den ver +ph ouse +nom ad +ï ¿ +s ris +pro du +hen ley +pag an +am trak +ru bi +in cl +tu tor +sco tia +wo es +sing apo +fun nel +turn bull +know ledge +gri mm +real madrid +we are +missi les +con sol +emo jis +sne ak +smi ths +ru iz +br ou +i el +ha ver +ðŁĮ ļ +kin gof +basil ica +circul ation +prin ters +ta pping +ri dley +dra gged +ha j +writ er +fundament als +personal ities +me tre +stereo types +bur le +best of +n ffc +ha th +mini stries +a ali +trac ing +pav ed +ł ï¸ı +g ic +insp ire +tu g +ha re +repe ated +ex pon +lol li +rho de +pre cin +install ations +instag ram +az ar +i es +sole ly +du kes +mission ary +van guard +fursuit friday +on d +pol ari +ma st +har an +jos é +jack ed +ec oun +al ities +ne ph +ra vel +moder ated +sco w +s fb +uru guay +as o +ni g +au du +p ints +lat ina +ben z +m itting +char ted +mat ology +cit ro +biop ic +ðŁij Ń +djo kovic +fox y +agu il +so to +an ada +sin king +sc rap +hair s +bethan y +fact friday +ðŁIJ IJ +unlea shed +) ( +contra dic +ram on +coast line +y ong +sn sd +li gan +p ome +mit age +ge tt +wat i +ri sk +so aring +bru sh +f pl +av an +å Ĩ +lar son +sh ear +mul til +blu r +multi media +chun ky +par i +n ani +weir d +cholester ol +char les +dream ed +tan ning +puzz les +fr am +hand ball +ch ag +beli ze +al u +bang s +Ñ Ħ +detec tives +mc g +ish q +bo thered +saf c +mp ing +ten eri +g ays +sail or +an gi +mul ticul +gue ssed +ros é +high ways +bro om +chatt anoo +- ' +see ker +on ed +at f +lu c +> < +bar i +per cep +jewel ry +as ph +sor row +sl ing +mam moth +jac kie +ë § +wilt shire +sa o +can cell +im paired +tor ial +bre ed +guy en +jud ice +tit le +pro spective +applic ants +ðŁį Ĭ +epis cop +e id +b yo +stock ings +ðŁĴĥ ðŁĴĥ +ll p +sna g +keep it +l ough +ol son +matur ity +!! !" +cop ter +i sha +bl i +wil mington +tr youts +th ai +ðŁ¥ ³ +pe bble +kra ft +f p + º +ssi vely +li vin +contest ants +tex tures +jo an +h dr +film festival +prov ence +wi do +op end +c si +sto wn +cro ati +ad just +host ile +analy sts +il an +cu ppa +bru m +newfound land +good win +me tt +mall orca +plu gs +bu k +bb hutto +wrest le +sa ire +sho pped +for za +le head +vi vo +ba st +ro xy +reg is +hard working +hon olulu +desp air +young sters +ni g +impro mp +roll tide +de emed +tre ason +ru shed +for ged +ff f +pikach u +bri ggs +do it +ac cent +la us +gla ze +compet ent +a ho +photo g +mid field +le go +har vard +min orities +re illy +slic ed +once upon +initi ally +financi ally +landscape photography +har dro +qu o +mm ers +par kinson +smu gg +read iness +bru tally +glou cester +mp ed +bbhutto zardari +mur der +ye d +dat aviz +sr t +dow ning +bi ans +m ü +fle ck +fli pped +s ly +brilli ance +ri m +k um +bubb a +ko i +knit ted +sor g +ma is +ðŁĮ ² +ti ss +su stain +sen su +ak han +zi est +exam ines +chardon nay +user name +short list +re bs +on o +dar ing +hard wood +che que +righte ous +light ening +dir k +shra dd +du ra +down stairs +sh al +ami gos +ru ff +s law +ri es +red nation +man us +ðŁĩ§ ðŁĩ· +distin ction +u bun +dur an +mi gra +thi ans +la ver +domest ic +k x +jaz zy +justi fy +belong ing +insul ation +color stv +drun ken +chann eling +qu and +xi ii +enligh ten +kan o +fati ma +teen choice +terri fied +p ba +as ley +met museum +dun e +pack er +ki o +ðŁĴľ ðŁĴľ +bo iler +fas cism +ar mored +back grounds +in mates +embarra ssed +defin es +th d +we go +silic one +lo on +el ding +bor rowed +he mp +ak sh +kaw asaki +br y +de af +kill er +dispo sal +ðŁĩ ° +glaston bury +un covered +o xide +po ff +d ant +k j +ku ro +dri zzle +peop les +fe e +pro pri +dd lovato +pi ggy +ot is +aller gies +u bis +pengu in +ser a +vi z +prosp erous +ici des +tornad oes +sene gal +web cast +sto red +enchan ted +bb cone +bay area +entrepreneu rial +rednation rising +experim enting +ang an +lot to +they re +por e +er p +seren e +east wood +bro kers +bar ge +stal lion +timber lake +tailo red +dy stop +b ate +lat ors +di xit +bran son +dynam o +ky lie +shame ful +bt wn +spring time +mix ture +s ounded +lu ton +dad es +mal a +op ra +en ic +rahulg andhi +se wer +~~ ~~ +ky u +nor theastern +ca er +bc u +nir vana +kitch ens +ous y +al m +river dale +hid den +fl int +sp d +pat rons +katy perry +au gh +exhib itions +sm c +shu ts +at ore +da in +some thing +ber th +bo g +por ter +gen to +con cussion +ang lic +ro we +gr illing +scar lett +master ing +mor nin +comm ented +si me +si zing +christ y +ce os +st m +at ry +tari ffs +vac ation +pre judice +p su +paren tal +far age +can a +cap com +koso vo +you re +men stru +stal in +grape fruit +br an +che sa +dav en +exc el +!! ) +๠Į +distribu tor +ce a +bride sma +millenni al +wa in +ob serving +mis ery +plan etary +expo sing +bra ised +comp ton +don gha +q l +spring steen +th ul +syl ve +cab o +pal ad +niel sen +gaz ing +ba ja +r oud +orchi ds +johan nesburg +se man +d ji +oper ative +affe ction +eclec tic +at c +mut ant +aw x +nic e +mel bourne +indu lg +tu lip +dias pora +wel p +big gie +mississ auga +retri ever +or an +tam my +c ta +hipp o +seas oned +ger mans +eng v +marvell ous +im f +rela ys +mon tan +maur iti +me ister +as surance +reig ning +su fficient +han e +no thing +pos se +nav y +in love +brigh ton +en qu +ch ung +sweat y +es c +cal ed +man s +nicar agua +sl ices +mo cha +washington post +bb n +dam ned +grow ing +en burg +lo an +me s +wh oops +believ ers +spi el +vo daf +l at +s led +cricke ter +brown e +golf ers +bar ra +wat chers +lu igi +sw amy +mom s +pit ched +san tor +cr s +si re +sc amp +bo de +ste war +jon ny +ent ity +pac qui +mind ful +min india +bear ded +temp t +scorpi on +eat on +authori zed +ar to +s vp +op athy +cch ini +house music +disney world +âĢĶ @ +pro pose +di y +expen se +ten g +pupp ets +sm el +d aca +per ry +fin n +boo sting +lefto vers +cou gs +satell ites +man y +az e +g ong +fi e +metho do +fer ries +ðŁ¤Ķ ðŁ¤Ķ +explore rs +load er +attrac ted +il ton +godd amn +pi azza +doc tr +sav ing +paragra ph +visu alization +may ors +work flow +ack les +ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ +ठ¸ +twer k +clu t +lo ver +te ases +si an +o te +deter ior +accor d +l fw +swar ovski +nat al +tra ps +k ina +analy ze +laye red +bever ages +un it +ran som +pe shaw +dest ined +astro logy +si pping +miley cyrus +cam ino +marshmal low +bli ss +out back +fa q +int oler +humil ity +po ppin +hallo ween +mon tene +op hy +nu n +tattoo ed +a as +ðŁĮ ³ +dale y +qual ity +du sa +fisher men +swi f +ter rac +st au +le in +trol ling +ship ment +garden er +march madness +head band +gr t +bur nett +w and +!!!! !!!!! +gh e +du x +hu d +war ner +ðŁĩ ¦ +ex ile +rescu e +rat a +d han +duc ati +dro wn +bl ends +spi e +alli gator +simul taneously +broo ke +u ke +k har +comm union +ri ka +ford fc +chin atown +you rown +me y +can al +syste matic +de pri +ox ford +an il +w ut +equ ation +be z +fle ur +the good +lang ley +ad ity +ed ith +al fie +о ÑĤ +en cry +br ill +ex emp +ce sar +mb ling +ab ri +sc icom +j ing +school ing +mi ka +mechan isms +impromp tu +rhe a +moo re +crime a +be sto +wri ght +el ders +ro ds +kam al +folkl ore +be et +mini on +reli eve +thr o +team usa +pas cal +made with +boli via +itt i +free bies +desi red +best selling +l iness +la den +ke ane +mi sts +hipp ie +atta chment +@ / +se w +flan agan +âĿĹ ï¸ı +supre mac +stl cards +si as +q u +rh ys +ste ep +val leys +v w +pav ing +disp at +al ison +por te +id u +new sc +soc ket +mo s +co star +re vo +prote ins +stanley cup +m cal +ear ring +se cs +mc lean +cap ric +nick elo +ad en +v c +shou se +adap tive +maxi mize +entertain er +pro se +gri ffi +six teen +lam ar +mi rage +saudi arabia +awe ather +ru st +in filtr +fashion week +ðŁĺĬðŁĺĬ ðŁĺĬ +selec tive +bubb le +a den +fen nel +deci sive +m ta +mock ing +mb les +st amp +mu le +bernar do +gr in +po tt +j ingle +vet tel +colom bian +cam o +motivation monday +ba han +p ly +dh ary +k ami +x men +sleep er +gar a +my sti +confi dential +conflic ts +p neu +ce s +insur tech +clean se +me rely +va is +tu x +the great +shar on +ma j +hol a +eco systems +aj ay +aa j +hu sh +har mon +backto school +wiki leaks +reflec ted +ðŁĺ ĵ +commemor ating +ac et +buck ingham +messi ah +tu ous +hor net +to be +d q +he ine +mi g +pl ate +nichol son +sp ie +cumber land +nor mal +pho bia +happy halloween +city fc +mc el +gilli an +ke to +lu de +de mise +su ga +str ate +mcgr ath +visit scotland +foo led +cb r +gc se +col ori +po td +missuni verse +fin ances +ma poli +for ks +Ø ´ +cann on +medic inal +ðŁĹ ĵ +kh o +wre ck +pan to +bag el +gu ll +syndic ate +ic y +pr c +ki en +zi ka +ti sh +pe ta +c co +li za +ch ut +ex traction +el g +gl i +fu eled +pos it +respec tively +leice ster +br ink +vulner ability +im ported +e sha +ðŁ¦ ħ +r ural +re ll +gam ing +atlan tic +aband on +no ah +re solved +pro state +aller gic +ps d +âĺ ¹ +dun geon +fang irl +illumin ated +m hs +white sox +d ently +ck o +endor se +over ly +dazz ling +prior iti +night life +ut il +be have +flam en +east bound +ðŁĴ Ł +ilove you +gov uk +mozam bique +alle gi +dr i +testim onial +ath s +ì§ Ģ +mm y +shab by +pro secco +friend ships +cal am +dam ages +off set +jura ssic +jun o +arre ll +ðŁĴ © +interven tions +dare devil +car ver +run away +ran e +truste es +ha ute +dep ths +ðŁİ Ń +me in +sacrific es +con cier +ne sting +i zzy +me tam +ilove my +ur ine +du lu +mal hotra +ve ins +night ly +co at +an di +he witt +lon el +ci ble +wr ite +jen nie +sant ac +ĸ ï¸ı +str ato +singapo re +sop rano +kri sten +cheer ful +flee twood +fa iri +m eli +wa st +tur nt +sfor sale +sc rolling +angel ina +ren dition +jeric ho +nick y +or b +fla vo +patri ot +ash eville +sick ness +re fund +aggre ssion +b pl +ãĥ ĥ +elu sive +thi story +hang er +bu ffs +vil las +at kinson +sp h +ja it +decl ined +wo k +supre macy +oo tball +ey ang +ðŁİ ĵ +s ford +ath i +consu me +road ster +e so +u pro +reci pe +au f +uc i +ar on +oo oh +cs go +re ich +mc d +min ute +ladi es +pun k +rut gers +mee k +ariz on +ta j +land lord +de gra +autu mn +lyn x +us f +b hi +fairy tale +dongha e +bet sy +explo ded +chen nai +op a +pro tag +br ant +ðŁĵ °: +g f +pal li +ðŁı¼ âĢįâĻĢï¸ı +su t +ill ini +colum nist +shir tless +de centr +sear ched +ec or +bu ggy +s ack +ðŁĺĤ ðŁĺŃ +de t +ther i +or naments +bring back +to v +quarter finals +ic he +con stra +gi er +buchan an +vi x +kay aking +mu stread +swal low +mel b +sc af +op al +may oral +har at +ðŁ¦ ĭ +schedu les +id f +ha gue +ro z +a ah +d mc +du plic +ca che +orph an +frac ture +rec on +ch av +bun nies +al ain +mustaf a +ðŁİ Ļ +vac ations +dynam ite +tex ted +broad caster +ðŁĴ £ +ste amed +rock er +di etary +luxury travel +inaugur ated +sa wards +vaugh n +lincoln shire +click ed +kra ja +f anc +remo ves +layo ffs +mc far +bre eds +win nie +jon ghyun +incen tive +vari ations +pat ton +atur day +persist ent +pr un +pi ers +dal es +æ ĸ +breast feeding +r ance +ta wa +Ĥ âĸ +mur doch +cap tive +thi stle +nic a +commod ity +cou ldnt +board walk +graci ous +practiti oners +n gc +scru m +ner o +camoufla ge +col on +he i +phys icist +saturday morning +ten er +si won +colum ns +bru ne +y vr +ba ir +reti res +hal am +cab er +shaz am +min u +cas cade +milk shake +gri d +d ren +vin cent +so dium +plat ter +cheer leader +chen ko +y ak +elimin ated +ty po +y man +re think +âĿ Ĺ +ts ville +bernardo kath +ex tr +ðŁĺģ ðŁĺģðŁĺģ +ta o +re per +mo ths +em powered +c iting +transpor ted +mon ks +san at +cle ars +bachelore tte +camp bell +racha el +har le +hand ler +climb s +inter ference +rele ase +sh and +r bs +hr h +ãģ ª +val le +r é +sli me +w akes +chu bby +slo an +el ves +ath en +attor neys +micro scope +ston er +sc aling +o be +c out +se man +mid week +bal sam +ðŁĺį âĿ¤ +ti ful +v ish +lo tta +ri pping +re mn +ti re +le ap +ha vent +la by +hi mach +whisp ers +we in +ðŁİ ¸ +wild flowers +se le +u cc +li ability +az ine +sw ings +k ya +ta ir +re main +e do +flo ps +poc ket +grand ad +exam iner +gr is +ffe ct +ðŁijĬ ðŁı» +stud ded +heart beat +de acon +firm ly +infec tious +ste f +out lines +le asing +cla ws +sen se +tab s +hoo t +mo sul +spa wn +co a +hog warts +ve in +alban ia +manu el +b ino +vaux hall +scot land +go bucks +mat ty +phy sio +tor ino +const able +investig ated +s lower +mistak en +bay er +wild fires +vo ic +x on +time to +chas sis +bar ric +pi on +bald head +woo k +regi str +dra fts +b hs +li gue +l ick +staf fordshire +baf ta +dar ry +je anne +ven ding +cor p +⼠³ï¸ı +kid dos +fen way +ca o +west bound +ðŁĺ Ļ +dv r +quick er +bla h +goo die +ðŁĴĭ ðŁĴĭ +vo x +esp er +fac ade +cor relation +red bull +rou p +decl ining +chi ve +mc gee +tur o +in der +f eller +fu g +il ysm +mar di +peshaw ar +ki eran +ine ma +meat balls +pe ck +depre ssing +sen sing +gi z +dd ington +spring watch +ro aming +yellow stone +horse shoe +am man +week day +ol or +ðŁ¥ ° +boo sts +spr int +scar ves +je e +bee tro +cl an +all the +ìĦ ¸ë +enlighten ment +ado be +re generation +? @ +cont ag +yach ts +to u +mor a +en voy +r ani +go li +dhanush kraja +wood working +streng ths +se di +disc s +ar ina +sc on +lit e +ano ther +ðŁ¥ Ĭ +ye men +gu ern +sav vy +lo yed +biom ed +heart break +comra des +milli e +pat ch +un f +jar vis +bl aming +commemor ation +ge y +å ¥ +cardio vascular +alig ned +docu ment +. ? +aesthe tics +em u +the irs +le h +ps ic +si f +pl ateau +ex pend +domin ating +rob es +mauriti us +excep tionally +hom er +discover ies +bra un +ten nant +insul in +ðŁİ ® +car bs +te as +? !" +zi e +franco is +brow sing +th ol +cla rence +hel per +ob tained +cas sie +le es +! , +pome gran +hu bs +presti ge +] [ +mach er +bott led +pun ch +pi pe +o ch +gall ons +deliver ies +u ra +un day +mon de +depic ts +re gency +outra geous +khal ed +car o +he arti +za g +develop mental +over coming +stati stical +flavo red +for ds +cre atives +lau rence +di as +sun screen +in ked +pre acher +n ul +impac ting +auti stic +âļ Ķï¸ı +o ss +pel icans +cele ste +v b +ru mp +mc gra +fair fax +hu mor +bbc news +row ling +cal der +seam less +ag ne +p ti +mix ed +t shirts +mer ci +b tob +women instem +genealo gy +pre ven +l our +cra dle +gi use +Ð ¾ +chron o +fair ness +chocol ate +tor y +as da +pre scott +stret ched +al man +u il +re charge +in tre +ob st +hosp ital +hay ward +teneri fe +fried man +vap ing +confe ssions +ye ah +bal li +luck now +cor pse +sculp tor +amp ton +t pp +indic ates +sur plus +tru man +ðĿ Ļ +sin ha +in vo +sovere ign +ke v +establi shing +engra ved +assu ming +ðŁı ģ +sou za +fab i +ton ed +oun ge +del oit +dow ney +no ble +om or +car tridge +ðŁı IJ +u hur +hol loway +succe sses +r sa +âĦ ¢ +ma zz +tw d +disc ourse +. < +y at +satis fy +com pri +ठ¹ +graph ite +disser tation +ar ter +í Ķ +b ally +zom bi +ly ons +a ic +u bc +pra da +e il +da x +cla i +grand daughter +extravag anza +chall enge +ðŁ¤ ŀ +po ver +primar ily +dad dy +man a +bi kers +inqui ries +da un +fel ine +gener ative +he f +benef iting +lind sey +pol ka +demonstr ated +al le +rand y +o su +low key +weir dest +red bull +our y +n ous +wood stock +cre denti +nic er +g ado +aly ss +ap h +prepa redness +station ary +incorpor ated +dy er +sarato ga +cele sti +: " +antibio tics +or gs +inde fin +ap ron +и Ð +fif teen +no f +ðŁĶ Ŀ +ph x +te ga +m z +organiz ational +on air +band ung +pleas ures +mor i +secre tari +rac coon +ca shi +pil ates +k on +geof frey +la o +kam p +depart ments +back packing +an am +à « +crack down +aun ty +on do +li zzie +ph ers +cu n +ðŁĩ ± +k pop +pu t +inten tional +connol ly +bar clays +hs fb +swin don +u ku +s ally +a int +âľ ħ +pen ang +up lifting +epile psy +inter ro +bun gal +go ku +blue berries +ठ¦ +u ssia +sil ky +mou red +i stic +bri efs +me ats +go b +ch aser +state wide +pra sad +gl itch +ar in +ban ff +memb er +ðŁĺŃ âĿ¤ï¸ı +lo ving +hall a +ภ¡ +smo kers +yak u +scicom m +physi o +sw ol +lem ons +gel ato +ch ool +capit als +ki stan +ti ghts +spi kes +trav ellers +ik lan +commissi oning +ar ine +emabiggest fans +empha sis +front line +pad dock +destruc tive +ba ha +l inger +je wish +shet land +mc gin +mon key +ko z +s one +raj ini +te h +y en +c vs +masqu er +gir ly +we sle +was nt +bro dy +termin ator +gil le +mag gi +bir die +jeopar dy +cu bic +vm ware +intric ate +an up +to pia +east on +sab res +investig ates +bu sting +bil ingual +valent ino +in format +fer re +advent ur +hydr ate +for sy +az iz +san to +e de +whist ler +continu ously +d ham +un used +ji had +addic tive +vi dy +do b +i do +fi ed +ni versary +n one +fu er +ðŁĺį ðŁĺĺ +coven ant +prin table +immac ulate +o em +cl t +serv ants +consu med +un released +sc um +pack aged +me re +ìĦ¸ë ¸ +to by +ta f +spo ons +me al +f ball +fair field +jan et +silver stone +dart mouth +follow me +voy ager +kom bat +anni ver +ene w +mag dal +ho ve +sa th +grizz ly +car di +gart ner +sand y +kan ye +post ure +po ign +im pulse +radio logy +horiz ons +si am +aish war += => +no che +tr is +el yn +com me +du i +ce c +councill ors +cudd ling +creep ing +loc ke +manag es +trans ferred +ne cks +di er +dan o +v ick +lun ches +d he +en sures +cri ss +ul ster +bann on +cont enders +sp am +sweet ness +med al +hon duras +arc tic +ultra sound +in fr +disco vers +ei ffel +ca sters +ru ben +du st +awe ed +atri um +lest we +se ared +ðŁĵº : +ty ne +ex changes +little mix +l le +astron auts +hersh ey +work day +kno b +so v +re signs +today show +der man +an th +af c +ta ster +sw oo +sa eed +per ing +narrow ly +rn li +best buy +panas onic +obst acle +farmer s +ðŁİ Ļ +pa wan +ki est +ang ers +absur d +oh my +sin o +pist achi +sp ice +giu li +prime time +ko w +k ens +ex agger +! ?! +u ba +midd les +ju dd +e jec +slam med +pen sions +of a +re create +b hp +xx l +liver pool +thre sh +pur ity +ni eu +hol ics +wr ath +ra do +gli o +am ma +dile mma +cr u +lets go +.... @ +âĿ ĵ +sugge sting +tru mps +hor us +f v +ic om +refer ring +predic tive +tar ts +ge tte +so ck +glo ssy +pin ky +al ec +thy me +ou ra +thero ad +pe tr +cr am +p fi +dv n +me ier +incen tives +tun nels +mobi l +rec ap +extra s +upri ght +rev amp +per severance +, - +ot p +mir ror +ar wx +ger ry +ma her +g or +hom epage +am is +ag ra +made le +best friend +sirius xm +bun dles +admir ing +t dsb +ðŁį ģ +ch as +slow ing +ro h +wall papers +âĢ¦ / +tek ken +gang s +tal a +lind say +shou l +line backer +tool kit +ur anium +caly p +ab rams +mat thi +ðŁı ¿ +hon ourable +da yo +ver sail +tan k +st c +fr itz +spl end +pat ag +anno yed +on day +devast ated +chattanoo ga +national ism +mas sey +jen n +tail or +dev gn +org ans +zu cchini +on fox +sat ire +wex ford +dis grace +no to +vol ta +âĿ¤ï¸ıâĿ¤ï¸ı âĿ¤ï¸ıâĿ¤ï¸ı +à ¶ +home owners +poin ter +m cr +au sten +day sto +mo ons +pal ma +gra zing +e so +influen cers +shahid kapoor +compli ant +measure ments +develop s +y d +par l +p vt +rand olph +tor tured +ger ald +eli as +deepi kap +war mup +hick ory +g ap +co ffin +am our +re neg +moun ting +seven s +ig le +hi er +dec ad +tri ght +esc apes +wer ner +t fl +ful filled +ni ger +sour dough +re aper +choo ses +spin ner +week nd +fil tered +sh uk +kat i +old ham +open source +kh anna +at elier +conne c +opho bic +gla s +complic ations +ar son +counc ils +sm ol +as sy +lur king +ling ui +han ks +e in +Ù ħ +ru gs +n guyen +nou veau +men ace +le v +alad din +ru ining +round about +k m +con or +shoo ps +may day +traum atic +prab has +ka iser +k ita +rou ter +pe dro +re tar +stun ner +spani sh +distur bed +acade my +e learning +wit ty +sen g +fer al +av y +sta b +ke aton +ur du +ko to +hu i +coo ke +ari an +the personal +u ma +se ap +a sting +rhetor ic +hand writing +munici pality +consor tium +ðŁIJ Ł +glasgo w +ra ya +eli za +polym er +bro th +prac ti +correspon dent +addic ts +gay le +ail ing +o fe +p li +hear tw +st itch +sight ings +prie sts +sam o +slo th +good wood +roc co +sab c +summ it +l ace +pres ley +itt en +cin cy +thepersonal network +s week +pe gas +af con +regi stry +ci m +le th +dic ap +cand ice +flu ent +sm ack +pede stri +al oud +car ac +priyan kach +p gh +ir ons +dol ce +lat via +dece ased +thero ck +cla p +cen e +fo am +morris sey +gre t +essenti ally +com cast +be agle +argu es +ing ed +- âĢ¦ +sa g +ha san +ðŁĻ Ĩ +ðŁį ° +nh ra +kann ada +indic ators +on er +bri xton +at as +screen play +sor ority +sha heed +he em +class mates +tain ment +es i +breast cancer +zucker berg +aur or +en cia +ref ers +kae per +vor tex +com part +lym ph +photograph ing +ste ff +rest ling +par sley +mom ento +th man +lac king +du tt +ocu lus +fin o +fren zy +ra sc +der n +dis missed +noo k +met gala +sh ill +rapha el +maver icks +exhib its +eag erly +c pa +amen ities +. âłĢ +exo dus +ern st +lit a +deal t +womens march +i ain +score board +campe ones +c en +ti ki +garri son +fidel ity +bra g +road map +psy chop +lo e +ble u +ðŁijĬ ðŁı¼ +sau vi +spr inger +temp tation +ru dolph +ac ura +wic z +parach ute +stro l +len ny +zi k +dom s +nb af +al pac +vivi an +ro ve +pre et +perpe tu +sna ke +air soft +infl atable +prin ces +ati e +ffe y +pati ent +m ire +chel le +sl ack +groo vy +# : +up loading +!!!!!!!! !!!!!!!! +siem ens +provi sion +v fx +need y +f ats +to poli +bhu tto +sa thletics +alu ms +t winning +south western +adop ting +last night +man ne +la ga +tw ell +ac ia +-- -- +eye wear +hur ley +fle e +sa ch +pe cker +cost ly +is k +cr ates +polic y +ero sion +in go +wer k +ðŁIJ į +torto ise +therap ies +inter net +chihuahu a +ri ps +fre i +ed or +tai ji +t fc +do d +demp sey +christ in +chen g +hi ps +gra eme +com passionate +cavali ers +histor ic +soul ful +crimin al +ja c +vin ci +expi red +sur at +turi smo +k ona +se aweed +ber ts +le ica +expre ssing +a al +wor t +break fast +her ring +am used +rhu barb +mar tian +cospla yer +y ash +stri al +ra ul +refer ral +dw ts +j w +ad ler +cur tains +gu r +val ence +tyr one +sw fc +coach ed +re born +diabe tic +cho ke +nor folk +investig ative +ðŁĴ¯ ðŁĴ¯ +z id +v mas +phi e +objec tives +âľ ĭ +over due +di vers +mat su +ðŁİŁ ï¸ı +casu alties +ภ§ +al k +stand ardi +re alist +arti facts +pand or +ke x +in vin +( !) +ine y +par aly +mr t +fay e +the voice +on ga +de ed +skin ner +az wx +speci men +priyankach opra +nu evo +bar kley +toulou se +resu mes +football ers +cit i +fe tch +è re +lestwe forget +ðŁĻ ĭ +ch unk +dri fting +manipul ation +equ als +pu tt +ky ungsoo +âĿ¤ï¸ı # +ela stic +par ano +fo y +do ping +cin cy +ss ler +interrup ted +al ay +ado res +ame thy +con voy +ãĢ ı +Ĭ ãģ +black list +gener als +sa chin +bru shed +oun ces +non stop +illi ams +bt sarmy +u av +ru ff +bur ma +bi k +defen ce +schul tz +bo asts +lonel iness +go re +trans forms +alum na +@ @ +ra ppers +ne hru +car o +himalay an +wearab les +ge h +pepper mint +re development +flam ingo +cos by +big baldhead +ag ri +bare foot +sco pes +re gram +gh ana +ðŁİ « +i heart +sa die +carri e +microbi al +ku ala +sk ater +quer que +âĻ © +gen res +reas oning +ch ased +as o +sli pped +en can +vam os +ker s +ad verse +mo il +commod ities +with you +sil ent +hy pe +an de +am ination +whi spe +lit z +âļ½ï¸ı âļ½ï¸ı +ri ff +pp y +lam bs +gan esh +ab sent +regu lator +marse ille +en roll +par cel +wa p +by rd +ðŁĩ Ń +tu ber +country music +par l +contro llers +responsi bilities +we y +ch ate +montene gro +chic o +mil an +l ms +tra inees +appropri ately +un certain +popp ies +ed sheeran +nutr itious +gar o +deut sch +awe some +ãĥ ¼ +comfor tably +land marks +et i +re usable +daniel le +ro sal +co les +just ic +c cs +f anny +ni m +mc u +clin ch +at ene +mer ge +im db +ang lo +uc cino +pan ini +an not +bur berry +feat ure +predic ting +fashioni sta +s ask +imag inary +mm o +south sudan +spe ar +hu bble +jo inthe +coyo tes +sli go +ko dak +sit com +polaro id +roo ted +corru p +ðŁĻĮ ðŁĻĮ +bris ban +at z +ah l +re my +tal ent +aval on +ra da +pau line +locom otive +go ons +ne mo +maser ati +ic u +stu tt +histor ically +sm b +pres by +avo id +so oners +rhine stone +w ad +ri sing +tro t +mo des +reg ent +optimi ze +re ece +sm u +ver ti +newyork city +cor tez +ra c +in case +sin c +fiel ding +e tta +tiff any +al monds +sad dle +k rat +mat ter +g low +star ving +gl o +cra ppy +sl ur +st d +monit ors +recei pt +maymay entrata +mc il +un is +rain bows +cal dwell +pacqui ao +j op +a fe +hoo k +es sen +wiz ard +medi an +fla ws +com s +âĿ Ħ +ing h +ha ynes +anton io +tem plates +ou ter +na w +cardi gan +bel grade +ðŁĴ ī +hom o +a ise +ro pes +no ve +what you +tri gge +concep tion +ad ukone +na di +fri ars +sw er +adju sted +hot line +san ity +kau r +down loading +c gi +ten or +eth nic +app alach +ภ¸ +pa g +gol ds +on set +investig ator +car tel +peace fully +jarre tt +cat alan +poli o +n um +fru stration +dhar ma +my life +âľĮ ðŁı» +aber deen +mu sa +bin der +spark ly +fle eing +instin ct +co ping +domin ance +ill ers +er a +u conn +lo oms +living ston +gal i +he s +c ma +bel a +se ley +mon k +la ch +mar x + ´ +m erica +woman in +es sex +ra ina +jim i +nep tune +z ack +chine se +mart ins +chand elier +her n +with us +ear l +asph alt +modu les +st p +ul la +psychi atric +mile age +captiv ating +si der +men to +mor t +tran ce +tal bot +ab by +ì ĥ +âľĮ ðŁı¼ +j ak +daw n +turn up +scre wed +fe ds +blue print +ðŁĴĸ ðŁĴĸ +har sh +er os +insom nia +ban kers +ta emin +mis conduct +hu mber +gi di +edu ardo +con a +musc ular +consu ming +ra sh +don nie +di pped +col lie +samu el +melt down +ðŁĺįðŁĺį ðŁĺį +me z +exam ining +schwar tz +pri stine +ðŁIJ Ŀ +ve it +ful filling +an esthe +gue sses +dra ft +som me +soli d +pati onal +ho ped +evolu tionary +all er +enter tained +sli ps +lud wig +conclu des +sen sible +bon net +cra ze +tra s +haz ards +const antine +ed ics +star trek +to c +occu pational +in cheon +deepikap adukone +pizz as +new comer +de part +oppre ssion +ebon y +foss ils +tro jan +el en +ste aks +k hou +positi oning +ug by +red cross +ak h +dol ce +us mnt +pp en +dil ig +ma vs +call er +cost ello +⼠Ħ +dy n +thing s +rhin os +a xi +sar kar +con vocation +att ers +ss ss +fun gus +eu gen +russ o +squ at +w sb +eli on +william sburg +s off +defici ency +be arer +o kin +key stone +t wain +cal ming +break able +wa res +horser acing +com bs +bun ting +u it +t land +ðŁĴĻðŁĴĻ ðŁĴĻ +ga stron +sab ot +ick ers +commissi oners +sen ate +ii ot +ath ena +nit rogen +an tony +ero tic +di alo +mis sou +hypo cr +âľ Ī +kaeper nick +can v +d roo +clevel and +o sh +mon sta +stefan o +^ ) +sh ul +po ison +ha e +commerci als +ma ul +nit ro +co worker +alo e +vap or +t ents +russi an +qu id +question able +mid get +po ker +girl friends +sin the +erit rea +ten ure +depos its +buc keyes +spot ter +theod ore +trin ity +joaqu in +u cci +follow the +caf c +mp a +ðŁIJ » +plo tting +dom ino +ta ek +sion ally +dicap rio +pa p +car mel +ig er +bt cc +beth le +www bigbaldhead +foo die +bagh dad +mason ry +off ended +à · +ภģ +sc ro +vers es +ori ent +ar ches +pi yu +know your +gre e +ta kers +gu ard +dish on +bucket list +bha fc +war dly +ðŁİīðŁİ Ĭ +leigh ton +pe w +stra y +assaul ted +in hal +ly fe +amar keting +l x +kat z +ubun tu +me o +carto onist +turno ver +mi z +dis like +mul len +mo f +bl and +hi des +emer ges +chori zo +truste e +ma hog +lan sing +paralym pic +fa int +fa una +ch al +sn ar +cat h +bent on +cast illo +sli ppery +apric ot +oec d +bar o +l z +he ming +clow ns +co workers +peru vian +commu ters +y ell +ðŁļ ´ +under ing +v j +tt p +fli pk +w ana +soc ent +Ĥâĸ Ĥâĸ +ठĤ +oo sa +jag ger +di sm +e less +d ham +cali f +a official +ec lip +harro gate +gra pp +com rade +n tr +concentr ate +thi ghs +bit coin +bel arus +ë ĵ +end uring +now watching +industri al +pi p +ar on +ar at + ® +whit by +oooo ooo +sa ree +tic als +mis leading +yo on +year s +sle igh +roman ian +sciss ors +vam pires +ac up +ab ba +th weeksary +cent ri +fl ye +u o +c bi +bu ena +sin d +mar ino +bur r +re building +ठ² +anniver saire +ac ca +ðŁĴĢ ðŁĴĢ +gett ing +tu lips +wolf pack +âľį ï¸ı +more than +ta kin +ðŁ¤ĺ ðŁı» +u be +mon ic +dou bts +mo wer +co balt +don ne +specul ation +argu ably +kak u +htt ps +prosecu tion +din ah +stam atic +disclo sed +bever ly +fl wx +cra bs +extraordin aire +war mest +imper i +o logists +trac es +par c +lake side +am r +ter i +hour ly +domin ation +ar row +shrews bury +ance stry +wr angler +trigge red +pen sac +roo ster +survi ves +a on +bo ko +val or +love is +la g +pe y +fo cal +out laws +bl anc +artic ho +wit s +marsh all +die go +support small +u ca +sa h +je et +syn ago +gover ning +ðŁĴ ¬ +sal ads +cre ate +miri am +cen sored +ami de +no u +z eta +allegi ance +* ) +bl m +ric an +pa stors +oly mpus +blo c +whir l +star ry +pr one +y k +p ne +congratul ating +be v +so ber +love island +sa ir +an ing +tutor ials +q e +lun d +in ist +cle ver +taxpay er +ali z +wren ch +dd ling +cap ri +h pa +ðŁı» âĢįâĻĤï¸ı +na j +o j +futuri stic +jelly fish +ðŁĶ¥ðŁĶ¥ ðŁĶ¥ðŁĶ¥ +cel ery +plan k +fil a +ne me +un healthy +lec tions +ðŁ§ ¡ +rit chie +n ws +mi kha +wonder woman +âĢ İ +hip stamatic +ka g +ðŁĴľðŁĴľ ðŁĴľ +poul try +mo w +wor ds +lo ff +ðŁ¤£ ðŁ¤£ +relat able +re mixes +keny atta +ke m +re signed +fo d +stra igh +j lo +hu tch +box ers +colle en +mag s +instruc tional +ko l +attrac ts +pra g +account ant +go ggles +br u +th ole +mar row +leu ke +oc to +pon ds +bubb ly +he ist +ìĹ ij +im p +a har +ha unt +hall mark +psy ch +kkkk kkkk +col umb +jump suit +cost co +si delines +ag gies +over turned +ni b +key chain +fu k +f af +mi am +assist ants +cy cled +ri der +dam mit +red wings +mag es +kin s +ì Ĥ +ho d +son t +carol ine +" ' +cu le +bra id +fel ony +ar ities +ruther ford +depic tion +isab elle +ro ach +k day +fifth harmony +em y +li gam +bari sta +albu querque +gro ss +ðŁį º +oo ks +ðŁij ¼ +dun can +try in +jag s +g ould +li tho +âģ £ +а Ð +sam my +tun g +cas ser +apo lo +aaaa a +man g +as ics +sh en +p ye +tur bul +ss p +saint sfc +on lin +n anny +he ster +do z +ภĶ +th read +ren ts +kh and +ðŁĴª ðŁı½ +un conditional +rob son +car re +ph on +sacrific ed + £ +auto s +par ker +oc a +log in +kee gan +hard cover +dough nuts +ðŁĮ İ +spit fire +refresh ments +saskat oon +commod ore +j f +rub ber +halam adrid +child care +stra da +io m +ri k +dak ar +ther mom +cro pped +gar u +ali k +ven i +i ft +si ka +ritu als +z ul +e ch + © +su dan +l land +i me +do cker +ì ¤ +fe ared +fa o +wal ter +no g +mutu als +l h +ali gn +mon ia +concep tart +ðŁĻı ðŁı¼ +sco e +compet ence +sw ine +ly me +laun ch +green er +abstract art +inqu is +gran ada +ga elic +flu ff +d backs +grave yard +ba be +acade mic +adventur ous +joh ann +~ ! +bi bi +| # +pl ings +gett y +as b +âĿ¤ï¸ı @ +staf f +religi ons +bang or +world bookday +me gh +de vin +ash ore +meri dian +gi thub +qui z +all stars +be stest +ir resi +ack er +do te +war rington +pol ly +newor leans +cr ou +wi gs +che y +smithson ian +la sag +de tour +bor is +stra ps +mari ah +inten tionally +ko h +ðŁį ¸ +ssi an +mar issa +cor al +episcop al +casu alty +tom o +supply chain +sam p +on go +ro o +cavi ar +p fw +clau dio +buff alo +s ations +mat ty +snap back +l ds +al arms +mat te +âĺ Ķï¸ı +conditi oner +d ors +he x +fi zz +a stri +sus sex +secur ity +qa eda +all star +cocac ola +as one +cl icks +sc ans +mu te +he avier +ðŁİ § +âĺ ŀ +lv l +book boost +youtu be +fla shes +f jor +c su +explo de +do dge +cair n +gonz ales +th ill +pel le +hart ley +renew able +re tin +e stre +costar ica +shipy ard +nc fc +pri ya +a ghan +an ath +plu gin +co rey +re bound +or u +kat rin +hor mone +gi m +mahin dra +s sus +park land +har per +fanta stic +infer no +ep ilo +wrest ling +fe ct +c it +ac oun +to ssed +monu mental +char tered +bu st +pe tra +âĮ ļ +wildflower hour +sweat ers +* . +bl er +ate ch +go wan +demo graphic +bra l +suici de +renov ations +vu el +sin ister +ar mani +miso gy +ph arrell +nap s +un iting +crusad ers +cor gi +insu red +than i +no or +g q +d ada +bicy cles +snu ggle +sch an +ten berg +ss al +fe mme +bo il +½ ï¸ı +re ap +occur ring +hus sein +divi d +sto ke +sh alom +na ia +o lic +frustr ating +Ù ĩ +ig s +gro ver +scen arios +n ds +bru tality +med alli +bu on +sas s +skate boarding +ony x +lor ry +ny u +gau tam +mm ings +gu g +end i +lo thian +comm ando +chal k +ph ora +asse ssing +ti gh +crun chy +ad ay +is l +ci ara +pilgri ms +kam al +p to +brit anni +t ani +sm c +l ure +app store +ab y +golf ing +cl c +fa u +an as +shu tting +regul ated +carn age +scow boys +all enge +c ma +humbold t +rel le +ku mb +her i +refin ery +sound check +d wayne +bos nia +i sp +the alth +anni v +relev ance +my a +bag gage +dre ad +s bc +th ed +bu h +hi jab +lo id +ke w +c te +respec t +lovel ies +cu bes +celebr ate +dir t +sav ers +_ , +gar ment +pulit zer +mas jid +beat port +al arts +encry ption +s ner +ple ads +found ry +sym metry +ru mi +birth place +scallo ps +supp le +pivo tal +t ati +no de +so d +pro xim +tr ics +col dest +bren t +mand u +cla ir +e ach +and alu +hi ddleston +ðŁIJ º +mel ts +v ance +pin n +se ments +scre ened +sa chs +o bl +ic ha +âĺĺ ï¸ı +school ers +heal ed +lo gged +ðŁ¤ĺ ðŁı¼ +ic us +bore dom +b ish +b ffs +tal king +sure sh +hoo kem +de on +de fl +ei leen +ðŁį ķ +women intech +ri sotto +rang er +adverti se +ภģภ+tel ly +la go +dart moor +d ong +sk ates +lo go +un ner +mail box +ma sala +lo oooo +amethy st +che wing +c bb +australi ans +rc mp +game art +# ... +kor n +extre mism +fruit ful +anci ent +pu bg +pol ite +wh it +mur als +m gr +line man +dav ao +ste ms +ten nis +av age +tu pac +gigan tic +hs bc +auto biography +up the +ี à¹Ī +re gal +fig uring +ku l +mis sy +hoo p +gra s +for ums +back lash +abduc ted +p nw +min ic +bu tt +bott oms +at on +ven g +ðŁĮ ı +del aney +prab hu +fan club +over haul +health ye +sy no +aa f +ren amed +kim i +un cle +man city +se u +qu anti +este em +um in +en zo +mel vin +under go +j har +far ah +coast ers +humph rey +mh z +children s +^ . +d hi +disrup tive +integr ating +r nb +over sized +a ide +ne au +docu mentation +ðŁijĢ ðŁijĢ +pal o +hear th +ri yad +pun ctu +abc news +secu res +boy band +bir ch +ju co +tra ff +legislat ors +bay a +ãĤ ¯ +no ises +collec ts +s warm +k ner +bi shops +stur geon +snapp ing +mo l +fre aky +chair person +tro p +lyn ch +car cin +art sy +e sto +cha i +fl ur +inv ali +sau sages +im el +j or +fun fact +wit ter +puni shed +ac ons +h ya +re versi +em c +dif fu +z x +sp aw +cla d +d mit +hol land +fre sco +pay roll +ab undant +stu ffing +mor o +c ny +boy cott +wend y +ele ven +pro voc +pil ot +tr x +be ad +climate action +ri on +assi e +ì ĸ +o sm +islam ic +ho ar +good reads +al ici +afterno ons +spoke sman +jo lie +it as +masc ara +âĻ© âĻ« +pre vail +beetro ot +lu jah +k li +dod ger + » +ru le +l n +scre am +ho bart +col bert +r tc +er m +pat ro +quo ting +s live +que st +non fiction +semin ary +prosecu tors +ve st +express way +g ge +nau tical +et f +ðŁİīðŁİ Ĭ +dur ation +cha ired +the film +fab io +she h +can o +ðŁĴª ðŁı» +with draw +! :) +cor pus +phen om +yel p +la wn +ent om +snapp er +but te +pin ball +pro xy +libr e +alle vi +n ada +gabri el +fo wl +eure ka +daph ne +tu nes +pun ched +wh ore +jo g +ren tial +man ners +o pe +wh ufc +gu th +revol t +sne aker +philharmon ic +ho ste +sovereign ty +ðŁĻıðŁĻı ðŁĻı +fish ing +sci art +fe ta +i pp +dump ing +kel own +gir i +dig its +sal u +san jay +twee ters +sp as +col chester +sc ab +ma dd +๠Ħภ+Ä ĩ +ged don +march for +do p +maure en +un plugged +di do +fashion blogger +up a +mex ic +tar y +pol ye +jame son +v t +grin der +mad dy +consult ancy +¬ ë +leagueof legends +ac cents +um ni +jane iro +tu ss +h ens +ampli fier +to shi +pret tier +pre vents +new town +red wood +vant age +ball ard +ar tof +a she +a sion +lac ey +ap at +gro ve +ภĦ +rw and +real tors +tra itor +bed ding +ö r +zi on +fla shing +cam pan +boom er +secretari at +ab ol +liti gation +cont amination +se dly +shred ded +in for +do herty +bench mark +ro che +skate board +sho vel +i zz +to pper +o ster +laby rin +autu m +k ong +hum mus +vi z +tech news +kla us +am using +socialmedi amarketing +i des +cast ell +ste e +underestim ate +cal ab +pa ign +b illing +unanim ously +g mb +fly fishing +hath away +commerci al +colour ing +skul ls +pivo t +te p +tb c +motor way +x press +construc tive +pu k +under lying +kir sten +mani ac +cha o +se ma +chiff on +ðŁijĮ ðŁı» +ver ona +kom o +stan doff +wi ped +c ated +bla ir +wor kin +m sc +bethle hem +swi pe +unexpe c +pe es +pe tri +orig ami +ðŁij ħ +mex ico +flav or +ru dd +cannab is +mar u +ri ddle +wor shi +sil on +sch at +ap se +tang er +bi ous +e er +questi oned +o zar +dan k +angle sey +char an +bak u +compe ten +re pri +bat ter +sa xon +cal ves +leng ths +$ $$ +âŀ ¡ï¸ı +immer sion +ga unt +car ry +cy to +b anda +shu tt +experi ence +el gin +mous se +ta z +ê µ +in correct +en z +b ham +mor on +so ver +ar un +ti pped +la ble +de arly +bau tista +í Ļ +mor tal +woo p +dt la +sho cks +dav os +ðŁĵ Ŀ +swim wear +her man +ðŁijĩ ðŁijĩ +z ir +neglec ted +grac ed +campu ses +av s +ar ora +swach hb +live pd +ac cra +enqui ries +shoo ters +kur t +vancou ver +brad ley +gar da +g ü +ol la +attrac ting +up ton +ne win +lu mia +furn ace +ev ers +e on +sw a +roo kies +a oc +v ss +bris ket +tor ch +yo da +heart land +tac o +ph ony +food bank +ab bey +bab ylon +u y +gre ate +expre sses +d andy +sc apes +survi vor +ron d +e ci +ha vin +ab el +chil dish +tor que +wav y +ur self +kanye west +year of +ale stine +o brien +al fon +sk ag +kore an +anchor age +val eri +de w +ðŁİ ¨ +land slide +car ole +christ en +go phers +af i +priyan ka +q q +power of +it te +pc so +tw ol +pr y +intellec tu +guer rero +pi les +wish list +w ren +time table +ë ı +prodi gy +gibb ons +. / +ne ur +anz ac +mur ray +vie st +pla ster +la ir +art gallery +inter continental +g br +bell ator +nam joon +mam mals +am el +y aw +saras ota +cam ar +bud ding +sum mari +aco sta +la sh +ey ou +post graduate +instruc tors +ti g +const ant +were wolf +ic os +cla s +glen n +bud ge +ðŁĻ Ĥ +er ta +sta ins +persecu tion +cumb ri +o ch +syner gy +hu ang +scand in +mid terms +comment ator +regar ded +perpe tual +bo iling +al p +lan ge +sch le +fac eli +twee ta +ri dden +ok toberfest +charlotte sville +ik lan +jo u +ch atham +b sc +ðŁį ¦ +stra uss +mel low +xx xx +happy hour +re actor +ww er +distr action +at orial +ðŁĴª ðŁı¼ +twin peaks +fay ette +a or +ko k +bro om +sy fy +ou se +am ag +Ø · +ubis oft +lu lu +hall mark +stu art +it ya +si deline +venge ance +re lu +sex ism +boun cing +un ites +gu stav +te ssa +stu mp +pro clamation +ima x +divid end +col by +ðŁį İ +play wright +un safe +co smo +ðŁĩ²ðŁĩ ½ +cup board +constitu ents +ang lia +ram page +ðŁĺįðŁĺį ðŁĺįðŁĺįðŁĺį +than ked +take aways +shro ff +de bat +kh ur +conduc ts +format s +à © +port age +graph ers +u ten +pre m +mo ines +condem ns +s ous +l ps +f cs +deal ership +leuke mia +bure au +ski d +guardi ola +ca ster +thir d +avoi ded +en cyclo +c sr +vi xx +analy zing +she ar +dulu th +shap iro +chan ting +stre sses +as be +mil itia +ãĥ ª +col lin +arsen e +sure sh +teach ings +yi xing +sh ill +nu des +sv u +clear water +war ped +pro life +artist son +it u +versail les +galax y +ax el +spring st +cal a +hu hu +sc u +commit ments +exe ter +poign ant +mo tion +conserv atory +row dy +rec alled +mu sk +emb elli +so the +âĺ Ģ +sto pper +sch ild +to pe +el mo +zi el +j om +barn sley +snow den +on tour +jour ney +hills borough +par ole +w ts +mo ving +ag ility +tiv o +ff ers +kindle unlimited +g wen +ann an +ah mad +tex tured +hepat itis +dra m +insi ders +tis sues +ãĥ Ħ +fc barcelona +cr atic +na acp +pe can +f gm +custom ize +concer t +g sm +pe g +p one +justin trudeau +super cars +happy holidays +bu lar +ado x +lap tops +digital health +destin ation +gradu ally +áĥ ¦ +popp y +ss l +inhi bit +star light +of fro +glo omy +x per +hal der +im plants +le to +hass el +a as +un told +en ci +liber ia +or an +con tests +il ah +sma g +sc out +mari anne +cr yo +schedu ling +lo s +kan e +stutt gart +ne se +law rence +da in +pho tom +car ou +ภ£ +g wy +national dogday +roa sting +band camp +kentu cky +stret ches +ke rel +ca she +ãĤ ¸ +sta x +tran si +dog gie +at ric +hal le +ci vic +brow ning +lein ster +cat day +high land +joy ous +in cumb +or lando +ro mo +col ton +del ta +car ab +ro tc +aster oid +goose bumps +mo logy +yo ko +an ds +tomor rows +red carpet +sm p +ca sio +ðŁ¤£ðŁ¤£ ðŁ¤£ +se au +rejec tion +rot ating +bi partisan +th un +mat i +bon i +ol l +ener gye +do it +l j +mother hood +lou ise +neck laces +el ite +ni x +l cs +en v +gl u +le sh +cran k +su sie +m clau +so tu +crow ley +rat ri +use d +bre ton +alfre do +ye o +travel pics +ti pp +elli son +sax ophone +me red +heu ghan +ta ine +f es +vi ro +suppo sedly +i as +dige stive +y le +li zzy +wildlife photography +bri anna +west field +ra ined +am her +ðŁĺĦ ðŁĺĦ +distribu te +bott om +pre serving +oil and +craf ty +de scen +col ling +shakespeare sunday +r wc +ang led +ci an +t ations +mon tage +me yers +france sca +ðŁĮ · +wi ggins +san ford +volunte er +car ra +bar k +vari ed +pl in +am u +kap il +rock ers +qu ind +br ane +in mate +ent al +impro vis +michi gan +re tweeting +progre ssing +mercedes benz +smo ker +physi ology +dor ado +watt pad +h wa +sr bachchan +w ga +vol atility +hi re +ac ap +wn ba +hein z +stit ches +kidnapp ing +bur ys +lim b +f itters +thumb nail +ton e +mir and +desi rable +ad dison +tar an +tamil nadu +spec tator +soci ology +amit shah +remo tely +âĻ ¦ +ham id +r ds +g lee +smooth ly +sch ro +er c +lali ga +he als +us f +ni shi +d hu +un il +h le +tro mb +bhu tan +pilip inas +se ung +whit man +te y +min ce +snow boarding +re au +k ker +av o +zach ary +ran veer +ti k +gover n +qu al +beck y +anthropo logy +att en +grocer ies +de bit +war p +sil icon +hawa ii +ðŁĴ ħ +pomegran ate +pe er +orang es +people schoice +end ure +ðŁĴĽ ðŁĴĽ +ãĤ¹ ãĥ +ac ial +a haha +stu k +imper ial +bl ond +pow der +kno ts +vin ce +wood lands +den a +watch in +mat cha +ma hat +galax ies +middles brough +k ö +stre e +resc ues +wal do +lero y +desp ic +real ities +tm nt +ha q +un o +pe c +bolly wood +blin ds +design thinking +he ms +and hra +ab sen +fan s +ste ch +shire hour +bla ine +shak ti +pu rely +ðŁı ı +tra fal +ke ynes +gr ate +to bias +spon taneous +satur ated +caval ry +pri sc +ðŁĺ ij +wh t +pas si +~~ ~ +vir at +patt inson +la o +weir do +sym pathy +ju da +occa sionally +cred ited +stat u +es co +hil ly +esc ape +dischar ge +se er +may nard +sud bury +z lat +or al +we er +encoun tered +sm elling +over sight +ê ¸ +that cher +mack ay +you can +fre ep +freed oms +prophe cy +ho e +ishq ba +dra ke +qu its +pel led +tur k +o vi +wesle yan +new music +leg g +ch eng +h illi +ay y +pan ties +ad versity +ad jac +vaccin ation +ju ke +ga c +exce ed +time sof +sta ining +ep cot +v ital +up ward +bethe sda +apar k +ma hi +camp fire +enchan ting +rha pso +h z +na ver +fa x +vali dation +ac ad +ny r +as ym +coordin ated +depar ted +all ery +var ies +spr ite +chap lin +ss occer +s wat +bre t +relu ct +tunes app +super star +reminis cing +o co +home grown +dough nut +un canny +la pd +thyro id +! âĿ¤ï¸ı +botan ic +bre s +sp ade +i ste +echo es +du lil +bur sting +qui ero +ðŁij İ +loy ola +amuse ment +ha ils +sleep y +burgl ary +âľ ı +ro gue +cot land +mo ors +low er +wic ked +ðŁĶ Ĭ +compet iti +argent ine +yvon ne +karti keyan +ili ary +gat sby +precin ct +six ty +na ji +cam s +practiti oner +ðŁĺ³ ðŁĺ³ +pu ne +neg li +juli en +inv aded +cali br +cla m +duba i +mu k +lan tic +produc t +fe dex +ï¸ı : +eu ra +dari us +s ling +virtual reality +home stead +ðŁı³ï¸ıâĢį ðŁĮĪ +pac ed +in ha +pul mon +la zy +premi ering +ma stered +in he +con gregation +ba jo +sport ing +new jersey +hor ny +lma oo +leng thy +du t +yo gh +swe aring +philosoph ical +pap ua +in ski +know les +dy ke +âĢ ² +to ken +mc guire +ri ot +probab ility +mc con +gro s +su mat +c ite +da a +on da +mad dow +che w +board games +spar ked +re claimed +ad hd +ny se +imwith her +equ inox +boo ths +balsam ic +ha zy +dor chester +ag os +se aw +moder ator +seri ea +ander sen +pilgri m +âŃIJ âŃIJ +itch en +hal li +x ton +nathan iel +mun ition +celesti al +ga f +zo om +mark le +pen thouse +cal e +s fa +bar king +tu cket +em ery +cal orie +li que +ad ar +mc nam +tor tilla +wood pecker +mo town +bad ger +ayr shire +scram ble +dd ay +cra ziest +per rie +cho co +cast e +i ot +wre cked +selec ting +uss r +gra ft +pun t +lab ou +ir st +ba ek +Û Į +su ki +que u +ach at +te ster +aug mented +wc vb +sin ks +ðŁĵ » +ra ke +inter ne +be cause +belle vue +une arth +light en +ðŁĺ £ +turn around +labe led +unemp loyed +twitter kurds +le ia +h ye +great er +ðŁIJ İ +tim ed +i red +e tt +limit ations +cab e +s out +bee ch +anni hil +re trac +yo ona +ang er +den nis +supp lying +di z +" ( +sc ur +gun man +su ho +sauvi gnon +ภ¥ +wi ley +land on +choreo graphy +pre historic +ðŁı ĥ +var gas +assess ments +pinn acle +di i +chamber lain +ì Ī +v p +present ers +deut sche +sun shine +sal utes +r one +bu siest +- .- +motor ists +hemi sphere +al wx +ps p +ow a +den ying +cho c +gu tier +han uk +mus kete +jait ley +se wage +t ame +thin kers +shi m +se quo +pap ar +middle east +k wa +ke g +patag onia +no y +bar ça +take off +he a +à ¬ +n sc +g dc +ðŁij Ī +mou stache +mel ania +thr a +â¬Ĩ ï¸ı +pier ced +ze us +fon ts +ber a +it iner +q atar +contr ary +ire land +i fy +ou los +commun al +fin s +un paid +pa a +ðŁijĩ ðŁı» +ri os +ou p +f iller +cafe teria +à¸ Ń +kas i +cali ber +z ulu +v sco +ts ford +dragon fly +smo kin +pi st +psycho logist +diplom at +we bs +buc cane +à® ¾ +motiv ational +du ne +ba e +c fs +with out +er on +i ac +ate e +pen sion +fra zier +en sis +sk is +par ting +ger y +territ ories +nach os +eni ght +ever lasting +msd honi +tel e +sp un +po di +sab ah +environ mentally +ce ase +beau mont +mar ta +kel vin +ho ff +sun il +n da +co b +sh ale +ree dus +un boxing +u bio +re opened +n all +capsu les +mar r +himalay as +swee ter +ja z +f mr +twee ter +dha ka +na u +de mi +d fs +ta urus +fad ing +it utes +ci p +over flow +jef frey +don ny +car tunesapp +ðŁį ij +prefe cture +danc ed +c pt +ple asing +ital k +earth quakes +ul ation +hi o +ãĢ ĭ +ant an +nutri ent +de ere +selec ts +enrich ment +r iti +tram pol +bl amed +j ia +contribu tors +chesa peake +pi geons +tribun al +mad uro +w su +ilo ve +effici ently +dar cy +war ms +ar ra +ec u +ho wer +strugg led +rajini kanth +ðŁĺ¢ ðŁĺ¢ +hou sing +str at +eli x +disp ro +raf fic +thi erry +na sty +c fb +staf fing +al ma +back ers +hen son +sky walker +reale state +roo s +ness y +chan ce +cair ns +c ci +pe dal +ly ft +cross word +wait er +only in +kru ger +k ir +alej andro +car tier +car rera +re paired +ou at +un clear +un breakable +today in +qu eries +jo dy +gen ital +win ner +to l +kelown a +fascin ated +ãĥ ¬ +sris ri +squ ared +spr ung +negoti ate +priv ately +av en +>> >>> +g ical +gav in +chester field +zu mba +or r +nat alia +impeach ment +mn l +car at +criti que +credi ble +trac y +tan i +musi k +jig saw +gam bia +tol kien +fe u +as per +sav ory +fo xx +f itt +mar lon +l rt +v ell +p br +imprison ed +i om +chu l +wind shield +kay e +ba a +chor d +s art +al gon +minister ial +nat geo +la zio +nor ms +ðŁijį ðŁijį +lic king +fut bol +un sung +dalla scowboys +sh red +distur b +dev ine +be ards +ch f +b day +ro sso +ig or +ay i +si ren +k air +sti les +ro f +mag nets +un cover +mou se +bang ing +si ghted +spe ople +impac t +row land +kir a +environ ment +love the +p sis +mish ra +gl endale +ca jun +o che +de ception +sex ist +stra ws +s ga +buff er +apost le +sp l +pop up +ðŁļ Ĺ +r g +up er +ball in +i dy +occa sional +national park +ðŁı Ĭ +u an +innov ation +ภ« +te aparty +re tte +counter fe +b ha +rec s +ig en +ðŁĮ IJ +humming bird +cu r +ha ven +la zar +pue blo +: : +zi onist +op ath +inver ness +promo ter +carto on +cabine ts +mahog any +surve ying +r ational +feel ing +testi fy +so w +oc on +ภ¢ +ne el +mar is +sol itary +che mo +rad cliffe +sim ons +ros ary +new er +jo die +re tali +pra wn +pad dy +hen ge +k ala +im plant +at y +bren twood +par adox +ene z +re designed +p our +wy d +al de +௠ģ +sol d +biomed ical +๠Ĥ +tt tt +mat teo +ys er +new ton +de bun +ner dy +loo l +wo on +elisa beth +ec c +wh i +ach o +salv age +sal aries +qu ity +navig ating +oph thal +con soles +re built +o pec +ast ers +sho red +set list +kathr yn +rhy mes +re visiting +ash ish +li ft +re post +sole il +âı ± +weal th +sa at +we c +king james +flipk art +field work +se gu +mo dal +bu b +are rs +ðŁį Ĵ +clo oney +pad dington +necess ity +guth rie +pen te +li mo +jo sie +ar tin +en c +l hs +betra yal +info graphics +i er +mo a +hear ings +bon jour +sym bolic +ag ro +wed ges +krist ina +wild flower +athle tic +photograph y +pe sh +ca hill +chi lean +gou l +fi oren +ðŁij ¶ +z il +sk im +bad oo +deli a +tre ble +n cc +ðŁĩ¦ ðŁĩ +a house +bul lock +sol itude +ا٠Ĩ +can cers +futureof work +hu tch +water shed +war mongers +sp illed +colom bo +mo th +associ ations +weigh ed +global goals +not just +christ i +tor g +swe ating +man eu +clu sters +âĢ¼ï¸ı âĢ¼ï¸ı +ta ped +ul y +tru sting +yu suf +te in +ra b +, ,,, +sin ai +audi ble +explic it +cro wns +sch iz +at least +ðŁĹ £ +de bra +je suit +ene gger +z hen +one sie +i it +ss f +gur gaon +chak ra +bear cats +k ran +k awa +reque sting +han over +g end +sor os +mer cy +lovel y +do omed +tim my +ku z +ul l +ab ram +sa ison +ãĥ « +clean ers +re mo +circu its +bar red +o th +mo ist +madele ine +gall o +u j +per mits +hea viest +car ols +az te +gior gio +flo ats +decl aring +us rc +min at +craf ts +pri ma +conven i +nickelo deon +danc ing +ceremon ial +blo gg +tw p +anglic an +she k +k nick +( (( +hubb ard +harve y +hit man +fen g +we some +for za +s word +op us +bro m +gi bility +z al +m unch +dance hall +gre edy +hd mi +re birth +ðŁĺĭ ðŁĺĭ +s world +figur ine +com post +k f +engra ving +gior no +st ana +k man +ham ster +compos ers +aj e +func tionality +pol k +is ons +air planes +te se +hor rors +musc at +gi ven +sp ence +ðŁĩ¸ ðŁĩ +eli ot +ach illes +fre ck +crypto currencies +sou ther +hal o +bor neo +polit ic +hahahaha h +up state +si ena +obsc ure +hau sen +lloy d +happy friday +motor bike +bon a +americ as +hol s +- ( +spor ty +un aware +reven ues +christop her +bank sy +av an +ev apor +com press +eyel iner +to dos +buff y +renewable energy +ly rical +ar chan +rapi st +fair trade +lma ooo +beat z +pro active +la pse +ir ical +revers al +po de +mcin tyre +mac au +ãĥ ķãĤ +nash grier +f sa +g all +çĶ Ł +perpe tr +il ya +configur ation +% ; +str ange +rac i +ภĩ +pic kups +kov sky +mam mal +w ps +g able +compar ative +z h +save our +da vey +on etsy +mu ssels +mis er +cri stina +electr on +cra ve +lo ren +precipit ation +m z +ðŁį « +vin cen +snow board +no ida +ah n +marin ated +g tr +town hall +min is +bethe l +adv an +su ra +shi el +fur ry +ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤ +lyn d +so il +sc ence +sen eca +shar jah +dick ens +credenti als +av ar +per k +requ iring +pre fer +j ian +de ca +r ach +ing for +del e +be ep +ðŁĴ » +cis ely +hu ddle +green sboro +haw king +ho ax +hang ar +ç ľ +mis o +lo vin +gre ta +ab ad +logi e +at an +snow flake +mahe sh +fear the +al kal +bobb lehead +ba hn +ju dged +fu tu +feli x +ðŁį ĵ +pi ke +der iv +notic es +au er +dis super +or da +wi pes +am ino +stri kers +foo tb +dram as +pun ching +score less +heming way +bi h +bal lad +chat ter +am mo +kle in +fabric ation +kari m +z end +hi sto +vol ta +rock y +marke ter +xtre me +sequ encing +paradig m +cle ats +boom ing +âģł âģł +block ade +promp ts +yogh urt +pur pose +nu r +regu late +nois y +ing rid +bird watching +bar tender +Ù ĥ +wor dof +cha otic +shor ty +el dest +z app +onceupon atime +fl yo +rit os +mike quind +ðŁIJ ´ +regi stering +. ] +ad ol +gg gg +pur ge +kid lit +ar bor +val ves +synago gue +o th +unanim ous +veri fication +dar rell +ãģ Ħ +vander bilt +tape stry +pro sper +did dy +dra fting +de cep +marqu is +st int +michael jackson +pee led +men us +bb b +sc are +ema il +wri gley +it is +f ell +some thin +bar ra +ed gar +di pping +pu ddle +sla de +lear ner +jal en +ðŁ§ IJ +the daily +mikequind azzi +ju x +iq bal +mckin ney +ra iser +ef an +dr one +cat o +pic ket +cro we +l att +uk o +giuse ppe +hin i +synthe si +ponti fex +song writing +to d +swit ches +din ners +h q +gabri elle +pensac ola +cir cle +expo ses +ev s +riyad h +pro men +o ck +sa j +cit ation +brew co +jo si +ep aper +dri f +point less +tang led +cri pp +line ups +fairi es +daz e +mour n +bla dder +sal z +bur undi +book mark +the people +sub sequ +princi pal +sk er +court ney +a oki +rac ers +ad m +mom a +critical role +hou n +shed ding +sa ka +ace ous +mck ay +hus bands + ½ +me da +accu sations +ro sel +nc is +witne ssing +or ama +go ds +hil ton +el man +ÃŃ n +meg ap +cra ven +announ cer +crit eri +sheffiel dissuper +milit ant +consu l +hoo ded +aby ss +b x +ma dam +lo cu +mary am +manic ure +grat is +ac tresses +ros ario +this dayin +king ly +gn ome +cel ine +r ous +he el +lil ac +vish al +ab h +thor ns +s ls +ne al +construc ting +be ren +s lang +ma ins +far ra +sar ko +pai ge +gu iller +l ala +ice berg +nou n +plann ers +u mmm +ou ses +ill ary +ma an +box ing +zi pper +srin agar +migu el +o str +mp o +responsi bly +lan terns +appli ance +x b +gren ade +neglec t +dy sle +ham mock +ne ctar +wit cher +r gv +di ence +ser bian +seed ed +cru z +bi sh +sp he +e q +sky rim +alge bra +phil ately +bungal ow +ge off +y ves +demand ed +consider ations +the vamp +pawan kalyan +co ded +grit ty +erup tion +se infeld +uni denti +ëĭ Ī +wor m +ac us +se ung +dun g +ro land +su d +di visions +ab lanc +shor test +j f +p oun +plant based +be to +tough er +mc o +don et +mark us +v fl +ðŁı ł +open ing +co ward +caber net +o xi +burle sque +sand ra +su mo +consi st +tho t +cay man +motor ola +gutier rez +d slr +y w +no bel +nov ice +moms demand +grun ge +sp or +d cc +pre sses +sli st +allot ment +voc ational +ft c +pu ja +lo ven +utt arak +tan dem +sh ep +come dians +anat om +cant wait +healthye ating +west side +mar gins +chi ang +asbe stos +stupi dity +proble matic +fit bit +: $ +ceil ings +shu a +protec tions +bio tic +beng ali +re sts +bien nale +tim o +cul min +e minent +affe ction +unbeliev ably +individu ally +canvas sing +wh itt +nov asco +chin son +h pe +go w +gloucester shire +pa o +thresh old +chev ron +s ine +we ther +pp ie +aqu ino +antwer p +âĸ ¬ +po on +inst af +equ ine +cinemato graphy +nbaf inals +vali ant +kil kenny +te rence +syste mic +sr l +p ound +made ira +pl ough +tre cht +mat ed +mp d +ransom ware +ph in +li qui +bb ce +boom er +i standwith +con ju +r te +nar a +foo lish +da shing +vier nes +br ite +da u +juni per +ai da +you now +ra zer +de i +repe ating +comfor ting +adjac ent +e to +ca sted +chat ur +mu er +syn th +san itary +mac le +independ ent +law ful +e erie +h or +ðŁĴ Ń +am rit +vel o +station ery +mu f +may may +contempl ating +elabor ate +gre gor +dri es +ac col +ภļ +schwarz enegger +ill nesses +day break +follow back +collu sion +electr onic +jo vi +hiro shima +ta w +hom ec +mic ah +qu itting +fro sting +ben fica +hel i +s ical +pic cad +corpor ate +ment orship +you are +sing er +shi va +ru ne +ing er +ri um +play able +doo p +wil low +ter re +ni p +at d +war bler +profession ally +er ase +proce ed +pedestri ans +mis chief +ben ding +alas kan +c kett +mo p +dd les +shut ter +ge ared +atene o +ma deline +g ations +o sha +der ick +sw ild +an gry +pat ents +hun k +decre ased +fr y +ðŁĴĸðŁĴĸ ðŁĴĸ +sal on +quant ities +d ario +ni gel +ku ma +jen n +happ ye +xx x +rex perience +pro s +au sch +rele ssly +ham burger +fuku shima +er ne +stat ec +ren d +may field +j one +lef ty +bern stein +sm il +gener ates +fore station +band its +ta yo +r ca +ac ci +rodri go +kn app +elo vers +vege tation +u ral +le ft +ħ ï¸ı +worl dre +sur i +embar k +w son +ba you +mu ller +mo vers +ðŁķ º +presby ter +l f +cre e +bat b +sal am +demonstr ations +an ec +n pc +it ics +to graphy +re inst +thur st +tal e +off ences +smart city +bro tha +ofthe year +in valuable +ear n +ðŁijı ðŁı½ +kre mlin +gra dy +town fc +guern sey +ma ha +contag ious +dre x +be en +( £ +nati vity +k tm +somer halder +comp ounds +íķ ĺ +" âĢ¦ +af g +ott news +h ound +fire fly +cil an +donet sk +volunte ered +ak ira +è ª +sing ul +st h +dro wned +mand o +he ir +ðŁİīðŁİ Ī +tax is +y uki +vel d +k ans +el k +ran ts +hash tag +t eng +ro g +a at +gru b +e ber +in india +colo ssus +sig ni +so ever +mile stones +der o +differen tial +phu ket +master mind +an gh +mel ani +bro ker +actor vijay +stun ned +continu ity +af fl +vo cal +perenni al +fianc é +in complete +hun ts +re issue +domin ates +tur meric +ro am +ri on +bag ged +nas sau +fu t +x ox +national trust +jo ye +san o +hearth stone +dis respect +le es +h se +siber ian +offe e +re stock +wolf gang +re gan +plan o +un wind +re par +mil le +] , +skul l +fat ally +concep tual +ðŁĮ ² +f é +ber to +b ms +u a +mag na +notre dame +le te +la undering +heartw arming +buffe tt +go at +pe abo +wind mill +v ac +continu ally +az alea +mem brane +can cels +make yourown +athe red +p to +tor pe +ðŁĺ ł +ðŁĴ § +sc ares +le aking +z et +pix els +ac i +kh il +marath i +ðŁĻı ðŁı½ +u la +tam u +chandi garh +z agre +aa b +pronoun ced +aubre y +sand er +pun ta +har low +ic elan +celebr atory +so t +unci ation +stru ly +mc dowell +deepi ka +remin ders +my stical +ct c +chat ted +s ica +bar gains +ch hat +ru bin +m net +oiland gas +pel ican +o at +mor ality +k our +i h +nu clear +gc u +ric her +vene zia +m ma +le ith +ac company +rich mond +sports net +ba ahu +smu ggling +mm i +ðŁĩ®ðŁĩ ª +twi sts +sahi b +.... . +amb itions +il lo +histor ical +fo rec +show biz +pon ies +chas ers +remo del +will ing +prince sses +am ple +cushi ons +ac les +lot r +da ch +an the +in corporate +new bury +ki ri +fried rich +ab v +ball ers +alber t +ðŁij Ń +let i +nan op +ci de +anal o +n sf +)) )) +griffi ths +valen ci +ro ano +fun run +babys itting +ca day +ent re +u ck +slu g +tic al +the sims +ro ar +car ney +g am +sto we +fi d +bun ny +sham rock +pe cu +mol ina +go cougs +con tributes +transform ation +mo y +v aj +sever y +antioxid ants +thir teen +sight seeing +l j +reversi ble +odd ly +hoo kah +nou vel +hal al +fe i +stab les +mul t +ho pped +bra ids +inter change +ghana ian +ww ww +eth no +con junction +ago v +ye ti +earth and +ts p +con serve +heir loom +metaph or +woo f +tor io +self less +n wa +em ilia +yl ene +y xe +gi ar +moder ating +pro bz +b fi +ne er +du mmy +hanuk kah +we bber +k v +eye brow +dag ger +su mp +ra ges +ork ney +tb o +hal sey +assign ments +tr onic +scri b +co on +an war +# âĢİ +jal ape +flori da +qu aid +haw keyes +âĻ¡ âĻ¡ +street car +ro g +dat lantic +gran ola +un changed +expect ation +Ù ĩ +mar lin +gu mmy +ðŁĻı ðŁı¾ +awareness month +oil painting +mu th +per ch +jun to +villa gers +mor g +che ated +web comic +the future +d ps +la kings +men tioning +vo or +ident ities +accor d +mc gu +l pga +rum our +massi vely +m pls +heal y +d ate +sp oli +re visited +on t +al and +scru tiny +lakel and +bl ending +< / +an kara +jami edor +metab olic +f ences +ann y +å ħ +semic on +oo tt +space ship +wack y +le ta +ap ac +she e +in herit +do res +ðŁĩ¨ðŁĩ ¦ +gent e +tw ick +ri ms +gal ve +de ville +king fisher +scorpi o +ow l +al ar +vari an +ðŁĹ ĵ +vene tian +star dust +then orth +q ing +har rington +consul ate +spectac le +ho bbs +tur ks +gre er +mat ing +ðŁİ Ģ +ðŁĮ Ģ +direc ts +í ĭ +pompe o +vo iced +la os +tz u +pro me +pri sm +mer c +fortun ately +bc fc +mcdon nell +not sorry +smi led +t ba +for war +mid term +dar by +we instein +up grading +wol ff +bron co +cab ello +ðŁ¥ ĩ +fi able +shar pe +bat tered +sat o +myth ical +instap ic +pre pped +eni um +e spo +di aper +explan ations +who pping +ragn ar +pe el +antibio tic +l acks +harri son +li sm +au l +qu ail +martin a +sent encing +sc ams +di di +tr onics +ãħł ãħł +go ff +za in +param ore +cha ined +clin ton +li ff +cott ages +em on +reve rend +consu mer +ce an +t any +lum pur +e bay +sto ol +ðŁĺ» ðŁĺ» +ta pro +h ath +modern art +just ine +prover b +app y +tra x +mani fest +am bu +nai k +pe pp +r sd +mer chants +kitch ener +shi fted +li zz +âĺħâĺħ âĺħâĺħ +âĢĶâĢĶâĢĶâĢĶ âĢĶâĢĶâĢĶâĢĶ +uto pia +tom o +ou ted +com ers +chiroprac tic +book club +cin dy +pro hibition +se uss +ë¯ ¼ +thin kin +rr rr +go fund +t ack +om b +catastro phic +ling u +guild ford +bo td +ॠĭ +plan ter +^ ^ +win k +kath mandu +sto ppers +smooth ies +re efs +hin d +bell amy +Ħ ë +waste water +vo or +nat l +! ] +re el +y ap +scoo by +work space +corin thians +bl un +obli gation +g bbo +dy son +cra vings +ell ington +dap l +wre xham +earthand clouds +uk runchat +positi oned +kal b +four square +jo ck +im pending +even ing +ath y +pro claimed +c ites +ann apolis +san i +mar th +ir l +accom mo +ka a +fin a +y aa +di sper +ec ar +bha k +will y +ðŁĺĢ ðŁĺĢ +mcder mott +mo j +gener ational +u said +train ing +lon ely +lo res +impe cc +âĢ IJ +beav ers +ma ki +he b +aap l +å ı +wolver hampton +leader board +me u +c fa +easter n +hu r +civil war +ou rage +hor ned +le high +awar ds +evi dent +gi gab +r ous +ma del +ro byn +ur gently +k ors +en as +heis man +bam bam +fab ian +f om +evalu ating +assemb ly +out sourcing +hun tsville +ðŁĶ ª +justi fied +cashi er +sp aper +buc keye +analy tical +illumin ati +au tho +o j +sha de +geel ong +wh ey +he aton +terri bly +ele k +un charted +sd live +moto cross +her mes +dar shan +dar lington +cash mere +gri pping +cilan tro +pun ish +... : +ðŁĴ Ħ +inst ance +der i +lo bal +muk her +sp ar +thin ker +fre mont +com piled +color ado +vig ne +sm d +whe ad +villa ge +le ek +formula e +ta res +persist ence +?? ???? +ped ago +he z +alzheim ers +vul ture +off ence +is great +suff ra +kick in +h mmmm +broad way +ï¸ı @ +art i +alli son +endor ses +ry u +lolli pop +soy bean +kend all +cer a +inv ade +( ðŁĵ·: +conver ter +car pets +ho bo +fr it +pe ac +es qu +ern an +ou f +an il +di ffer +ch ing +bre cht +sp g +daven port +stra va +sever n +n gos +stor ians +fe te +parame dic +j hb +al amo +sne aking +gold coast +roof s +isi l +depic ted +projec tions +nu mb +o ss +ep i +glu cose +zid ane +infin iti +íĺ Ħ +ran som +ton ics +fal k +g ler +ou tw +re ss +week ly +the on +n ole +ðŁĩªðŁĩ º +vol ley +sum mar +neg ativity +sam son +ye w +aus votes +ju l +ju dy +f art +pra yed +pal ate +multicul tural +double header +cycl ones +pier re +ãģ ¨ +âĺ łï¸ı +rt w +conver ting +wir ral +l ari +ir relevant +austin mahone +an che +ya an +sd f +$ . +explo ding +ulti mate +prof ici +gofund me +cell ence +ep stein +bul lied +sep tic +à® ¤ +lu mber +cu ff +vsco cam +pl or +ภ¥ +se ok +ro to +venezu elan +sor ta +spir ited +daniel padilla +team sisd +radio active +icelan dic +ðŁĴ ¤ +ver e +accommo date +shi pp +ot ter +ol ina +e go +su la +san antonio +de as +simil arities +âļ ¾ +y om +bro ward +å ° +can cun +veri fy +on te +candle light +ìł ķ +inf ants +az am +ðŁĺ ° +le ven +un stable +bloom ington +x ford +con tour +y p +innov ator +histor ies +po y +lolo lol +ex pires +cat alo +bill boards +an ab +el ic +novasco tia +fa ire +ìĿ ´ +rock well +gr ille +az tec +joh or +ur struly +fi ren +dun lop +id le +port man +jo es +tx hsfb +hol m +cham ele +under world +lo ss +ti em +therap ists +past ure +pa ste +ing now +vul can +ra gon +lar kin +o shi +ho co +child hood +umb rel +success or +kath y +iz en +° ï¸ı +share holders +ol ga +ai b +he ap +fl aming +ro u +air tel +rat t +z ane +vo w +thor ough +sn ag +par th +un conscious +ve y +new release +gh ee +croati an +facilit ating +swan son +astor ia +to logy +master y +ðŁ¤ ij +bil bao +trou pe +the ori +chey enne +ro tt +shore line +gra sso +master chef ++ ) +vi x +ellen show +as g +an ak +ku ya +safar ilive +debu ting +blu m +list ener +v ins +book shelf +smart cities +makeyourown lane +; ; +ðŁIJ ¯ +ri zz +on ward +bull dog +bear ish +vir uses +fri gh +lin den +we iser +sn t +gon a +dre sden +fl anders +cu k +wheel ing +ba u +atu esday +surf ers +swi ft +mc call +arbitr ation +aw d +mon c +b ine +at x +re fr +mi ro +po sey +n are +rit ter +âģ ¦ +play book +blow out +sports manship +s oooooo +malay alam +gri ms +bur bank +infin ity +sar gent +oit nb +joseph ine +ski pping +par kin +excur sion +semin ars +jo har +par tridge +post game +ll ll +blan che +temp ting +m na +lu ka +is ers +to ffee +bar ron +he mmings +sa e +go hawks +cu pid +li mbs +con se +un common +z ada +head shot +so ils +pione er +mam ma +sem itic +pan dey +jamiedor nan +spl its +vel a +son i +ra ff +t mobile +âŀ ĸ +pra wns +lit er +enjo yment +egg plant +tu b +cultur al +us ic +suspici on +sy cam +summ ed +ma du +ho ck +up wards +eye ing +ri ve +assas sins +âĤ ¬ +out fy +chi ves +t ner +la is +por ridge +sad dest +w cc +vick i +sna ils +biz italk +mill an +ðŁĮ į +sam oa +j ing +mi key +gu j +chel ms +eli gibility +arma da +thro p +surger ies +ãĤ ¿ +mo hawk +ex its +me m +is lington +c me +land fill +kait lyn +ðŁİ ¼ +combin ations +tomorrow land +ver b +cor a +pre cisely +na om +ðŁĨ ķ +shr ink +sof tly +merce de +mand el +poo dle +ball erina +sop h +jux ta +y at +ary an +hesit ate +lo wered +gu lar +dungeon sand +ron an +my ri +sp f +men opau +gra sp +pa thi +fe asi +fla w +shi story +ste ward +gg le +fay re +cli que +credi bility +yo g +sec tion +mu sko +se ville +no tt +cal m +mate o +indic ted +fi ba +by l +lin o +u kin +!! # +enig ma +siri us +bu sc +ðŁį Ĭ +mac kerel +psal ms +a at +tomorrow spaper +ðŁĺ ĸ +p fc +........ ... +shre k +mul let +o sh +danger ously +immen sely +am ur +ðŁį Ĥ +pro por +sy a +london marathon +abo ve +obli gatory +pro v +ra cha +alex is +pri mary +sh h +ether net +d stv +cou gar +un lucky +ni l +steak house +mel a +fc bayern +cause way +ca therine +fluore scent +nx t +to kyo +au sp +releg ation +qui zz +shored itch +proud tobe +promo s +inter acting +home brew +da esh +w pg +stead ily +provin ces +bal lots +i ah +al to +< << +you u +ri ley +prefe rence +tra verse +incen se +am munition +ho dges +# @ +hail state +tart an +witch craft +vent ilation +liber tarian +! âĢ¦ +ow es +% ! +ong chang +bru shing +le ic +fi ber +under attack +down load +ex pir +hy o +pompe y +mc bride +y ag +stre e +com bat +ten ding +ai ra +gug gen +ab ra +in na +fli ps +aw al +m ach +dol lar +inspir ations +z um +o du +it ty +video game +aqu aman +har u +bel fast +je b +but ch +us gs +calcu lus +go yal +mor gen +x finity +stand up +contrac ep +sab re +na be +in secure +gener ously +epit ome +l w +t ca +narr atives +don nell +pand as +ber gh +tu t +ker al +fel icity +br ampton +quinte t +nom ore +ðŁĶ ij +lo i +alham dulil +ðŁĶ¥ ðŁĶĹ +ston er +shaw l +clin ical +bren dan +gon e +fla wed +tri ppy +j g +al location +po aching +ve vo +mo cks +lef tist +bon uses +condem ned +abil ity +st ating +microbi ome +bio logist +for you +wahl berg +ss or +ift ar +w ul +ÑĦ оÑĤ +pom er +me me +ver te +tre ll +tra it +in let +hormon es +deliber ately +vill ar +battle ship +p bl +tw enti +ho kies +dal ail +say a +may fair +han s +die ts +⾨ ⾨ +od in +hot spur +pap i +k ana +k amp +fin na +flo tus +ti ans +unic orns +tribe ca +chang ers +fore ground +out a +inv aders +gett ys +tomorrowspaper stoday +mac millan +hand written +w fp +u de +state of +base d +âĺģ ï¸ı +cas m +psy ched +histor ians +fol d +d da +ag grav +p ans +green way +au sv +ðŁĺ ¶ +shradd ha +inde x +be sti +zim mer +t ness +eye shadow +ot te +go ts +distribu ting +pro min +yo l +ace a +tram rahim +hoo per +supre me +jam min +intu itive +quali fications +sli m +sid di +jay ne +tri pping +g tx +pun s +e manuel +om g +mid summer +in to +succul ent +ri en +new mexico +o or +hoo king +in f +ðŁ¤ Ŀ +flir ting +na hi +g friend +t ps +hel ix +z s +on ie +ct f +kri s +irresi stible +fla p +ðŁijıðŁı» ðŁijıðŁı» +us wnt +ru d +ram ps +pin oy +ot w +lol z +low ering +favor ite +t mc +phra ses +her mi +aver aging +em br +ben o +estu ary +sle eve +ribb ons +ta sh +ภ¹ +x f +aw gs +sun ited +brew eries +anir ud +pun ches +ol die +ip ads +wi fey +land lords +d ji +gun ner +íķ ´ +tex an +ex op +cas sandra +s off +ðŁļ « +igh ton +bak ers +awareness week +v all +ear p +bts bbmas +apologi zes +âļĵ ï¸ı +was ps +states man +snat ch +watch dog +ra fi +after party +spi ke +j er +peri ph +r nc +mu ll +le en +shi es +li eu +urstruly mahesh +mer ton +de sai +shi f +ðŁĮ ± +pe dic +gos ling +arrang ing +ww g +gen y +you uu +netfli x +e ttes +k wi +bernar dino +am iga +Ø ¨ +kashmir i +t ings +emer itus +de cat +ab domin +dc i +pha ses +d jan +be am +op ry +i shed +the ellenshow +the st +habit ats +to ons +mclau ghlin +ri pper +micro biology +tal aga +clu eless +ss u +cro che +bro mance +longe vity +zagre b +prev ented +tra ve +spo ilt +darry l +migra ine +al cat +dd dd +vi v +ser pent +mat tel +jam a +con quest +î Ħ +sam sung +presbyter ian +ket ch +fire fox +mo tif +le c +cho pping +cher no +j ann +ðŁIJ ° +pro lon +wake up +conver gence +mersey side +heart broken +lo oming +hal lucin +mai ze +commun ism +mo h +twitter storians +serge y +res eller +favor able +ed gy +re iter +mal aga +live me +ka hn +pul sion +big g +kim kardashian +ati o +tyr anny +ru ption +q ant +pro ven +by z +pu shaw +kri stin +e er +tar dis +ri z +awak en +mi ko +un documented +path finder +indirec t +resemb les +h ler +conce aled +scand al +re im +d nb +cr itters +attend ant +apprentice ships +aa u +scre amed +l su +fa h +har bour +ed d +bat sman +li ss +mi sha +spani el +it f +advan cement +fa c +close up +cecil ia +medi c +narcis si +lav ish +gi ac +ma ys +le it +wine wednesday +pushaw ard +let to +curren ts +bug atti +out ine +w j +un do +ler osis +devo tional +ðŁij « +on na +fais al +sa una +himach al +am ii +à® ® +di zzy +screen writing +ph x +sp n +ick i +ag irl +fi shes +wb z +pi m +bo ar +ac id +! .. +rocke feller +n ga +dra stically +simpli fy +dru mming +autum nal +gur mee +lor de +jo ann +give up +b our +am ura +der land +sim pler +wat son +tri dent +concor dia +bel lum +bre k +dum plings +vi on +dungeonsand dragons +sp ri +ascen sion +wil datlantic +u st +rob ins +legi on +insi st +jar o +gue ss +so b +bigh it +pool side +negoti ating +mc gill +bil d +techn icians +miti gation +ajay devgn +b to +ant en +cosmo politan +ðŁĺĬðŁĺĬ ðŁĺĬðŁĺĬ +patri oti +temp er +promen ade +nav ajo +nam m +wrink les +dc fc +le ach +bru nette +r f +cout inho +al ti +tradition ally +op tome +na z +accord ingly +rec ard +de ets +sw ell +po sure +whit ening +strang er +illi on +here ford +u wu +ro bber +cotsw olds +cl en +gor ge +nam aste +re lish +gri ff +adren aline +bla sio +val e +ê ² +toler ate +rail minindia +jen sen +ho ven +el lu +ob sole +eisen hower +unidenti fied +than niversary +body guard +Ø ¯ +i dge +sch al +stock port +sn i +re taining +po po +pix ie +oli thic +ki er +ha jj +sa z +cor bin +!!!! !!!!!! +v it +me gat +de h +circu it +af fleck +theore tical +hope less +u ab +slu mp +b ice +jam med +let stalk +can i +side ways +labyrin th +re fs +ha hn +jare d +ðŁį ¹ +jam bo +ph yl +enhan cement +c tr +ful lest +se ye +do ba +cho ic +yo s +cb j +andr é +re watch +pri ma +doctr ine +for gets +u hm +ar ound +u le +art lovers +shi raz +har th +ex tor +Å ¡ +unexpec tedly +eli us +y x +em my +se ac +ðŁijĩðŁijĩ ðŁijĩ +correc ted +com bu +wom anc +cou gh +what son +publi shes +divers ity +back bone +lock down +mesmeri zing +nor te +ma b +desig ner +í ģ +ra gh +mole cules +get outside +the beatles +semicon duc +nach o +lun es +ham mers +sul tan +o on +fe ren +att ach +ar qu +uttarak hand +s ash +; - +tre ad +i ko +ar thur +scandin avian +r ation +ga el +charge able +fish y +v ma +hand bags +char a +ay ne +de fam +sett lers +qad ri +pal ais +in wx +apocaly ptic +poo ja +a es +at ories +proof ing +n lp +ts la +v ina +li do +dee phouse +informat ics +v v +pp ings +di ss +à ¯ +uhur u +st ony +betra yed +b aff +my ra +as pen +allow ance +tam ara +ci f +cor bett +ser ge +di go +ambi gu +pain ters +p cr +p ca +nom s +lo ft +ve e +opend ata +ðŁIJ ± +alex andre +identi fies +fantasy football +re production +brom ley +ware agle +mm er +p ss +cu es +ay at +hut chinson +sar ac +jack man +ira h +ap ink +col s +aussi es +ex ecs +day ton +ðŁĻ Ĩ +im v +har am +chuck le +authent icity +ar do +incub ator +ภª +photo shopped +embrac ed +fight for +gor man +zz zz +schol astic +cri sps +te apo +mid night +ga ine +col lier +s ate +de tte +å Ń +imag ine +i ff +tw ili +i fication +teat ro +nor ma +es ur +emergen cies +rise up +r inger +hass le +cait lyn +tranqu il +vers a +se b +over look +gin i +bo go +se re +may ne +henri k +contamin ated +rhapso dy +pro portion +wildatlantic way +âģ© . +organis ers +tran e +stand ard +sper m +laun cher +ric ci +her ts +paper work +showcas ed +mer yl +pen a +p imp +disa strous +^. ^ +phar a +x is +fron tal +sw irl +sp ills +swag ger +smart watch +sizz ling +savi our +cat ar +bb cr +refurbi shment +dr is +citro en +absor b +patrioti sm +il leg +chro mo +fresh ers +ru s +lim iting +ef ish +down ed +man dir +hazel nut +p all +mac on +disappear ing +quali fies +bo on +bar racks +am ine +gen dere +ðŁļ ĺ +j es +ãĥ Ń +qu ito +middle weight +sch au +quad ru +aci ones +limit less +ðŁijĮ ðŁı½ +ch man +ar av +regulat ors +it up +batter sea +mil ford +g z +tic king +gh ou +cru shes +tu tu +dread ful +fam ine +for change +dalail ama +ðŁĴ į +whit aker +hash mi +h us +vo d +bet te +aa ah +iso o +ðŁ¥ Ī +ha ar +la ine +b v +all day +spr out +indie games +free bie +gree ks +but ler +ill in +ha al +ware ness +si ma +public health +gam a +wa a +oun g +goo oo +okin awa +off enders +im pose +ho c +young ster +story teller +sc ap +figh ter ++ , +whit es +music monday +re za +go ducks +bri a +mi um +cas per +cru mbs +a ad +marti alarts +ch p +ri gged +tn g +harve sted +sa k +do jo +mill wall +b nw +oc d +histor yof +t mr +si rens +fan ci +caregi vers +vir a +son i +recur ring +acknowle dged +ðŁı Ł +oph ile +bu cky +stre ssing +roo k +di gger +vi val +san do +fle et +si ers +sel caday +refre shed +anti fa +a que +po lo +disappear ance +de mb +âĮļ ï¸ı +ren ted +ber ger +g mb +cu la +ss al +goo dy +u hh +marcel o +w anna +soft ware +shop small +turt le +tom as +fri sco +ðŁĺį ðŁĴķ +jim enez +c su +day z +an do +wyn ne +choreo grapher +cerv ical +trail blazers +ed g +zend aya +travel blog +el s +whole some +co g +lab out +ar ney +del le +su isse +ma si +ine se +om be +fi ddle +re claim +pa u +wat cher +sla in +ber ty +opti mum +el ites +min is +tur key +patro ls +ger ard +au reli +wild ly +wal tz +br gy +w ob +cre st ++ ++ +ve z +fro sted +davi do +the x +param edics +p into +han k +du pont +ur g +fo stering +micro poetry +spec tre +---- > +ne uro +fri da +music al +galve ston +e ffic +sc ape +pal azzo +th all +pro visional +p js +au re +ðŁĶ ľ +mam amoo +kit ties +cre e +wa k +lo ool +lu pus +cn blue +à º +ðŁİ ¬ +rac ed +tro se +om as +stri de +co ors +⤠µï¸ı +in comparable +cy ril +broad er +arec lipse +ðŁį Ķ +inter val +ti ru +co working +w aco +a ham +a bee +flouri sh +the times +ol ini +kick boxing +lu cer +at la +as un +casser ole +mi aw +lobb ying +jan ice +cir que +re flex +le ary +sanat omy +tem pest +se mb +mur dering +us av +ro bo +on et +p cc +nati ves +life of +sa ha +ruth less +rel ates +appeti zer +pye ongchang +nor d +er u +a thing +ug ly +pl ying +bran ce +organ ise +kend ra +dat o +chees es +par ma +burn out +a stra +pre toria +adjust ment +uk u +sl o +li ken +fav ors +cli ve +be ets +snow donia +go tv +sy n +open house +pan i +portra yed +sl ated +me cca +ren al +supportsmall streamers +staf fs +da o +bi ker +vik tor +tit us +admi red +ðŁĵ ± +hurric an +he ats +gl ory +photo genic +mer i +de por +burn ham +or angu +dj ing +impre ssionism +ign ition +ca i +w ynn +de pe +cove ted +colla gen +sau s +or nam +administr ators +ss on +nh politics +hahahaha hahahaha +aspir ations +r gb +swol len +so we +sc r +diver gent +hou ghton +han oi +d ory +ni ki +land ry +b cci +ðŁijĮ ðŁijĮ +is mail +tri pod +her d +bhat t +dress age +tab by +ingu ish +hur on +à³ į +à ł +to das +evangel ical +chor ds +st john +slo ppy +marty r +face book +ali ght +sen sei +kath niel +r ites +zi one +u o +revel ations +weight lifting +pan o +nc wx +ac ton +à® ķ +Ø ² +som a +à¸ Ĺ +respec ting +mar che +fore man +be tty +ki k +shi bu +po on +argy le +k swx +et z +mar bella +brac kets +stand by +fire side +defi ance +v ex +britanni a +in habit +appo int +piyu sh +le ash +sci ento +fla sk +sen na +> : +at roc +sand erson +id lib +dhan ush +ðŁĺ Ļ +en thr +hit ch +de dly +al ley +dor k +mon do +cudd ly +mis sin +ye sss +night ing +j pn +w ary +ump ire +ma z +ê ³ +bab s +ĭ ãģ +stan ford +posse ssed +exce eded +ðŁĶ ¶ +wall art +tra p +j il +hi bis +sp ying +scri be +khali l +trans lator +lu mb +di zed +ch c +super vision +shut ter +ja g +_ * +yester days +ms f +hi hi +gonz aga +gille spie +vive k +ec static +this morning +ch us +ed es +ston ed +be es +ðŁĩ¹ ðŁĩ +tur in +ho ver +at rics +ster n +sam heughan +auti sm +mi ya +eye witness +writ ings +travel tips +chut ney +px rtg +keny ans +my stic +k rit +/ $ +red head +world ly +am us +op la +le ve +gab bana +se en +o clock +gang a +keen an +sc ent +ol dies +go green +corner stone +comp ly +con cours +ðŁİ¶ ðŁİ¶ +ha an +con fis +aw son +cle op +î Ģ +su zu +sau té +al gar +subscri ber +este emed +ãĤ¤ ãĥ +worth while +mel rose +flo ck +bri ghtly +viol inist +p ere +sli pping +and co +si gh +ha van +cu lo +m sa +fibro sis +matil da +ra fting +aw ard +ë ª +mm mm +ge aux +ste iner +sin n +help ers +beet les +ai mee +tai wan +pistachi o +mac beth +m zan +descend ants +on sale +in r +il m +grou se +sa ig +mo w +bi gre +adjust ments +tu la +mathe w +transl ates +mu h +bol lah +ðŁĴĽ ðŁĴĻ +amo res +ab outs +bomb shell +bla ster +x avi +s ns +k roger +ga ther +erad ic +daf t +chem o +ben ches +ðŁĩ© ðŁĩ +ut v +our a +n ko +gator ade +biaf ra +ok state +im danielpadilla +dom ains +open ingday +kid do +do i +ric e +day care +mac millan +ba thurst +cheer leading +ðŁ¦ ģ +cash back +k won +hob bies +exem pl +ries ling +âļ ª +ag les +ny s +every thing +nav is +ad di +magne sium +faceli ft +ark ham +grand es +extre mist +don at +vit ality +pump kin +be tta +sl td +arti san +li by +pe aked +ah hhhh +mary am +assi m +un sc +ment e +al aya +low ers +ar as +gri ev +le ip +gr ati +cri ses +spr ints +exe cute +w to +ms d +mag ical +re viewer +spark les +juke box +ðŁĺĤ âĿ¤ï¸ı +pay back +licen ses +dun kin +bel t +lake wood +h ateful +bud gets +rev amped +ph erson +ky iv +went worth +ro sen +cru ise +gi ggle +def star +assassin scre +ym outh +win kle +w fc +band wagon +b kk +w iring +kear ney +south side +pe tit +! ðŁĺį +nor dic +mir za +mu gabe +v l +scon es +k tv +sand al +du c +m alls +ðŁĴŀ ðŁĴŀ +it c +al ay +im pair +un rest +flo ss +c é +ab ou +var ying +muse o +ser ver +di ya +hibis cus +ero y +mer ritt +fin dom +f pp +un usually +go tt +conting ent +ali aa +ball on +jo l +hi ked +zy me +ay r +ag n +ga z +perio dic +spar ty +practi sing +lin ton +tal is +cy pri +womanin biz +radio disney +ðŁĮ ¼ +jump ers +endo cr +ðŁļ¨ ðŁļ¨ +and on +shar apo +mi er +ma sonic +fac tories +vi en +bb ers +ìĽ IJ +hol d +ke bab +be ak +approach ed +ac milan +mun ro +ko sher +excell ency +negoti ation +walt disneyworld +cr ouch +te asing +suppre ssion +en ya +b ce +transformation tuesday +cal lie +vis was +p gat +ic ted +end ings +esc u +recru ited +it fc +collabor ations +g ino +snu ck +ausch witz +i fc +x ii +ke sha +ger vais +clo ak +x l +sa ad +prob ation +pre cau +mac in +anasta si +le k +e azy +daysof code +mariah carey +yo g +stit ched +boy friends +sh ar +ph ile +ag u +twin kle +phi shing +week ender +ic ton +gurmee tramrahim +al ton +l eness +all an +pen ultimate +kry stal +go u +lan de +dis mant +ab using +nor se +pat erson +ed mun +ap an +xi umin +sk el +cat walk +re act +wal led +t angle +br yn +ve to +super moon +cas ablanc +appreci ates +ski d +bo th +catal ina +ele ague +cyber monday +cau tious +ðŁ¤ ĵ +nov o +hamp ton +ha ye +jose f +var an +lo bos +roano ke +orph ans +tt in +squ ads +ishqba aaz +black panther +e tu +k sh +cru mble +cess na +reli eved +scul ly +pollin ators +explore canada +ki es +kam loops +kir an +pri mal +sett lements +hot spot +brain storming +ce dric +bi ennial +sh ant +âĻ¡âĻ¡ âĻ¡ +do on +hear n +walk way +fe m +ve al +deport ation +tox ins +elimin ating +descen ding +by the +bla sphe +ha sta +comple ment +as cent +ri ga +provo st +âĸ ª +wee ping +anti semitism +employe e +unearth ed +pin o +natali e +bla d +ang ola +lock heed +in ian +ag r +ni ster +im pala +m ke +fan atic +âĺħ âĺħ +ðŁij ¸ +lu ch +simpli fied +gall ery +econom ic +cy borg +con i +sel ma +in ception +ko ala +dv ds +cre sted +m mor +visi ble +n sd +ðŁĻĮ ðŁı½ +w under +refriger ator +re opening +e era +carou sel +as p +balli stic +victor y +mo tive +tre y +sharapo va +si i +mon ter +int end +west chester +sp e +cy mb +vi dal +ll ama +uni v +fin er +crafts manship +jazz fest +b ch +ag gio +n cc +lamb da +tranqu ility +cis co +ba den +so bbing +of i +go ta +ru mored +war med +ore an +ac ton +mar ci +gh ani +âľ ĵ +as sorted +pembro ke +pen elope +da f +at ty +aim o +pretz el +carni val +than os +ko chi +mer sal +ham radio +ar twit +cas c +guer rilla +kush ner +k app +al ise +todd lers +steward ship +o tti +ter ri +tem pe +rest less +vit o +zay ed +rsp b +pi on +hi ppo +haw thorne +in as +am ily +nut cracker +lo p +d ali +tro pic +ðŁ¤ ł +ul o +jare dle +py rene +pale o +usa ir +m ould +it ated +gene tically +biom ass +ðŁĩ³ðŁĩ ± +do dd +practic ed +monarch s +un manned +m buhari +am al +photo gra +ko ol +bren don +ju ices +cu re +world bank +poin ters +ðŁĴ Ŀ +tur f +le ds +bor ussia +bapti sm +warwick shire +moun ts +gay o +be gg +co pied +asi ans +k g +moder nist +gi d +front man +concentr ated +y t +sc avenger +iron ically +adi c +ps n +ðŁ¥ ī +cultur ally +yu v +mac arthur +fertili zer +be withyou +ri gor +min ors +z oning +âĸ ł +ri r +adole scent +vin ny +ren g +sand stone +gu et +we sth +ple dged +lac ed +sp ide +v ai +ty coon +seiz ure +du p +appalach ian +ro k +cathol ics +sey chel +posse ss +la ger +jo di +cham p +stra s +d ina +cent uri +cal der +blur ay +ðŁĩ¨ðŁĩ ³ +mo do +an nette +youtu bers +chap s +ang ling +label ing +a qui +pk wy +ly le +bi sexual +lit ur +dug out +li bby +grey sanatomy +sub stances +august us +rall ying +fi del +ing ue +äº º +hallmark channel +tooth brush +m á +adi rond +ag gi +ðŁĵį : +cru sade +tax ation +k z +i ver +dou bling +room ie +wa b +en rolled +az on +a ju +grand children +as df +ðŁ¥ º +mat ic +ough ton +utili ze +ðŁĴ £ +pon der +rais in +dys function +co bain +butter nut +e man +su red +dri an +and friends +with the +on omy +heine ken +bri dal +leader ship +pyram ids +deutsch land +jo cel +bo wel +y qr +horse power +be acon +ing eni +gra dient +fer mented +mo om +thing y +pot assi +wrist band +bor d +bo died +ðŁĺŃ ðŁĺį +ma pp +ka u +cyber punk +ph ish +loo king +co ates +ap ur +am ie +uk labour +at in +g la +adop table +shel by +v illi +ri ya +m ingly +cli mber +bumble bee +ðŁĺ ¸ +c sd +âĿ ¥ +hospit alized +c ki +hat er +ch r +re tina +it a +fan base +beat rice +gwy ne +go ss +fo s +favor ited +swachhb harat +mal ade +mon mouth +" [ +si van +sh hh +command ing +sains burys +wee d +g man +ss w +rep tile +iv y +tro pics +roll ers +over cast +ex position +masquer ade +man crush +wa ist +spr inter +sle et +le vin +j pg +_ ( +o pel +explo it +ap a +po we +wrec king +jong in +or b +er ick +bo sco +pra ising +ber tr +to wing +in security +ku t +resto cked +rr p +prescri bed +trafal gar +per t +g ases +app rais +g har +music als +âĸ¬ âĸ¬ +mc fad +ag ony +conditi on +equi p +shi k +atra vel +ðŁĩ¿ ðŁĩ¦ +ke h +abduc tion +pe oria +wil kins +g ms +as d +ev i +ðŁĴĹ ðŁĴĹðŁĴĹ +u z +mo c +halle lujah +guad alu +lou vre +dra wing +go ve +ph ant +fri e +web dev +program mer +z able +games com +clari fy +li th +kin ky +âĿ £ +labour doorstep +son ata +ju ris +mai den +vi adu +buch arest +conditi oned +capit alist +u de +ps b +sp ca +lul la +footh ills +kay o +bon d +wom b +roun der +ce sar +bur sts +ap ra +sw oon +sab rin +fra grant +cle arer +ku brick +cli max +jour no +ag le +ðŁı½ âĢįâĻĢï¸ı +poo ch +hal e +sol it +sal mon +organis ms +bron son +art en +hodg son +alo ve +vent ure +bb i +ae a +ðŁIJ ¢ +ld n +d nr +o zone +el las +man ny +azz ur +un beat +tru ffles +th ong +ma ñ +las ers +ley e +gettys burg +back packs +or is +ma ison +craw ling +la bra +cl ing +dra gging +ste al +dou bt +de van +ck ers +agent sof +photo bomb +elon musk +abo y +dist ances +story line +sp i +nor than +europe ans +wh ale +ser pent +ðŁļ ² +fi or +tr it +ox o +awar ding +class mate +su fc +smar test +rich es +pr k +big foot +ar mb +bi polar +dw elling +om ars +k wan +gri me +m eng +freder ick +navar ro +sorry notsorry +jaredle to +pa ve +sl ack +barn sley +att ar +evic tion +accumul ation +o ir +cat chy +wel ter +vik as +has see +nik ita +mo yes +mathe ws +shi v +gat wick +pro filing +compan ions +mar rake +an tics +ðŁĻĮðŁĻĮ ðŁĻĮ +se se +bo i +bart lett +poison ous +ab uses +ym m +kam pala +guggen heim +imv kohli +dol om +bre e +thro ttle +gare th +fitz patrick +un ya +par ad +mar got +j nr +we a +potassi um +p nc +disgu ised +cra sh +ren ergy +ill ic +coup led +ni els +ci ones +æĹ ¥ +im ent +despic able +d ye +what cha +conne ctions +paralym pics +gaunt let +wait rose +suici dal +star ship +vap or +st ou +law maker +coo led +si mo +then o +offro ad +ja den +bas que +vick y +lu kaku +centr o +tri sh +strate gist +medic ations +hor st +b fc +gra il +sharp ly +ad itya +tom b +kau fman +tri pad +sam ba +pastor al +brit ney +sag an +hill side +mas ons +sar a +z one +x u +to tes +rob bie +app en +mon tag +der o +short film +charis matic +tat ors +ki ba +and ri +al arming +split ting +ic ar +th ug +scari est +sylve ster +an an +u trecht +a difference +me ade +bu ster +air strikes +cu ffs +account ants +ðŁĺ¡ ðŁĺ¡ +new t +bo tt +issu ing +cl ancy +wwen etwork +kyu hyun +rese mble +pajam as +sin k +kin ney +sul ph +or k +li es +la gh +or ton +ra hul +d sc +we will +re am +collo qui +shar ia +hec tic +sar casm +land er +tm z +endor f +ro z +ham mered +fri s +w adi +pope francis +he it +flash light +un born +op es +hol iness +ðŁIJ ¦ +nach t +im sa +gr acing +bj p +ver ts +c sc +home owner +a que +bigo try +anni e +bag h +âĿ¤ï¸ı ðŁĺį +car i +thom p +dispo sable +cardio logy +pat ented +hh hhhh +ld r +stephen son +cro res +fan ning +cli mat +ðŁijį ðŁijįðŁijį +ðŁijį ðŁı¼ +aer on +piccad illy +bank rupt +sil via +emplo y +don ny +commen ting +screen writer +io ta +ce an +anc ers +tu an +street wear +ठ¯ +sk ine +esp a +asi f +os ce +she ppard +more cam +bott le +der s +orac le +google play +aver aged +edmon ton +steph an +sister hood +cru sted +stag gering +methodo logy +congress woman +c abo +tri ggers +mil ky +gli de +tooth paste +room mates +nu ff +gu am +sprink les +alternati ve +wat fordfc +uof t +hal ey +cont acted +bun dy +pro stitu +gh ar +pre ston +on site +hil ar +g ts +c att +hamp stead +? ?! +ðŁĩ§ ðŁĩ +bbc qt +aless andro +resi st +ma idan +t ko +shad ing +pin up +gal lo +sin u +at ec +fun k +ac lu +stri des +rhy me +wet land +bbc springwatch +t ins +wild card +st our +flamen co +pau la +onto logy +gang sta +am ade +ãĤ « +t bs +skelet al +run ner +jard in +harri er +hun ted +z hen +believein film +de mean +au diti +re start +chon dri +âĿ¤ï¸ı ðŁĴĻ +mcla ren +ga b +sh um +au sa +lewi sham +y pg +k jv +fur nished +dor o +bon ded +mor ty +lat itude +_ ) +lo va +water ways +vin ai +shor th +drun k +c ay +ay ana +kap lan +capp uccino +spr o +life boat +has bro +spol ice +tor on +do ing +dam n +sh ree +foun tains +ent ation +mar u +boar der +to pless +j ada +chan ning +ul ls +en closure +gib son +fractu red +brit ton +à ¶ +t ous +por th +dra f +tra iling +mar gate +eli fe +down ward +lin n +gla des +girl power +ak rish +u ki +ron da +ts c +appreci ationday +vis ing +lo om +ðŁį ³ +mex ican +ar gos +y ya +jad ine +south port +d end +si sta +rede em +men g +bra xton +antioxid ant +s key +mp g +fin ding +vibr ation +ce u +kh art +di mini +cl ine +shel ly +hin es +ī ï¸ı +to pical +no ver +ma xx +prim itive +illustr ate +b ounds +tren ton +join tly +breed ers +u chi +wakeup america +b ada +ðŁĹ £ï¸ı +gu acam +sp heres +pere gr +youth ful +lo lo +bir min +t ly +jeremy corbyn +defe cts +co sm +a rent +v aa +bag els +medi ac +cori ander +ic ago +g haz +ab bas +re model +struc turing +pu m +out law +ad ani +r bc +gul ls +n li +confu se +ðŁijĩ ðŁı¼ +vil a +mcnam ara +correc tions +mug hal +ser i +re gain +ss b +lea ve +haha hah +gran de +di stressed +re chargeable +ho a +hou sed +sti l +attribu ted +opath ic +di ps +pri t +head phone +conclu de +pil o +he t +ut sa +nit in +je m +sni ppet +tutor ing +op er +sun k +en sla +cha u +ac orn +quinte ss +ran kin +affili ated +our lives +cl int +se ater +isa ac +ba shing +sme ar +nur se +doo dling +" ; +sa ku +atroc ities +im am +g fs +viol ating +comm end +brad shaw +er ville +b illed +b be +thul hu +i phones +moo se +di os +re w +me thane +strang ely +whis ky +ti ghtly +spiel berg +radi us +notic ing +wi f +ig nati +i fa +ap is +w ali +ha itian +bu shes +y z +v l +ex ited +asse l +tru ec +dom en +ash er +in king +newyear seve +hend ricks +bat i +ìĿ´ ì +rich ter +mon santo +con line +agre at +ðŁ¤ ¯ +master pieces +ar n +rough s +cle ve +se v +fashi ons +to ya +sh ail +cop eland +aqu ari +dec als +are you +y aya +a str +fon t +ml m +ar ca +pp or +pol lock +xper ia +conserv ation +chain saw +ag gie +?! ?!? +si le +sh on +ìĹ IJ +note books +marque tte +de us +bb led +spic er +mc cabe +nor wich +modi fication +boo sted +stru m +sales man +bang le +nis san +hez bollah +brea sts +a af +anth us +sk er +ow ed +her os +gi fs +fo sters +eat ers +du es +_ / +lymph oma +sf am +me gal +afri di +ag ic +p amp +jeal ousy +ðŁijĮ ðŁı¼ +calcul ate +napp ing +g ale +ðŁ¦ Ħ +lub bock +assu med +ren ting +íĥ ľ +subur b +ãĤ · +tech nic +u cla +in front +gar net +ster oids +stri ving +ho war +mo ver +le ton +bull do +is in +ci ao +sn z +fore front +d ams +mid wife +ma wards +cla pton +we in +subsi dies +spr oud +rother ham +phan tom +ar ach +spi el +rac ket +sel amat +no on +l bc +enti ally +ðŁĴ ¸ +sil ve +m oud +kine tic +y asi +ðŁİ © +o ol +mi ku +i za +fer a +flo ren +barber shop +groo t +z est +ne ars +stan is +z and +police man +juris dic +form ations +appar atus +sp d +arti fact +to sc +motiv ating +womanc rush +re dro +diagno stics +ra za +out fitters +el xn +dod gy +ry n +sh d +ortho don +ol de +jay anti +bal ances +quic kest +can ton +friday reads +! * +na a +a ak +ðŁĶ · +behavi ors +rasp berries +ä » +polit ical +cam il +å ľ +di k +ast ounding +lie be +novel ty +tur moil +sul ly +spring break +hon ouring +cc g +ðŁı Ĵ +my little +ky c +pro ms +ðŁķ Ĭ +à ¨ +bi ge +av ril +ðŁĩµðŁĩ ° +mari on +as ants +sur ya +oc tag +luf than +ac ron +fayette ville +ti que +love s +en ca +de kalb +ta ver +de vote +aux iliary +joh annes +tread mill +ay an +qu r +donald son +cher yl +" .... +s ven +kir sty +gun ners +ra dish +o ahu +v sky +i ble +con course +b ps +elo qu +ash ford +te bow +roblo x +ma da +dri ving +th day +spro ject +m ms +band ed +. !! +libr arians +flan nel +intoler ance +her al +ç µ +neme sis +list a +tar ak +cry pt +star plus +vish nu +sc ale +cr is +% ), +j illian +regg ae +pegas us +ol in +ip ment +man ic +l fc +godd ard +ite am +parl our +anch ors +lee minho +talla hassee +ant it +d ho +kid ney +y ash +batt led +az ad +gar is +faul kner +sni ff +papar azzi +ed m +phy llis +con tested +aa ay +se ca +k ton +vel ve +rain ier +for um +tam pab +ho sp +trac tors +ox fordshire +no tion +guang zhou +ðŁĺ ¯ +ref ill +wednesday motivation +sli der +mukher jee +pr att +fon taine +alph on +af ar +ts i +pest icides +fi ends +mo cking +bra w +tran sat +do ses +co res +hom ophobia +docu menting +zlat an +con doms +s é +sun set +kun st +ton ga +ภª +v ation +sp ray +chow der +ra ps +palla dium +nor wood +music history +hoo ker +si si +osp rey +ph ys +conce ded +bob cat +ar mad +ze it +Ù Ħ +ðŁĺģ ðŁĺģ +mer idi +ðŁĩ· ðŁĩº +corn wall +! ), +touch downs +ze it +chal et +mm m +al che +gor illa +fo ss +ati ku +lumin ous +ivan ka +be ek +sta res +sw iss +âĿ¤âĿ¤ âĿ¤âĿ¤ +scru bs +me ath +gusta v +jo gging +confe tti +as os +ers fc +breit bart +applic able +autho red +ya ho +h in +displac ement +j v +ðŁĮ¹ ðŁĮ¹ +ot c +non profits +diec ast +gu sto +inte stin +c ages +me en +lu kas +moon ey +ðŁĺ · +very day +tor ah +is sion +wa c +lever aging +ish able +cu se +le wood +may an +turn table +ju ice +tru sty +tu p +eti quette +supervis ors +stu n +gu zman +confe ren +ric o +fe ast +back ward +pol aris +mic he +jo g +h ing +field house +vel ing +sho cker +esc ence +ठ¾ +vi be +anasta sia +mar ched +kill ing +Ķ ë +fe tt +exop lan +... ( +snow day +lo h +ir ani +la khs +del a +po caly +boom ers +dictat orship +ac er +tur keys +quarter final +muskete ers +ðŁĴĽ ðŁĴļ +sf x +museum week +sc ala +ri sis +( ðŁĵ· +ãĢ Ĥ +z ies +bo eh +hu es +lu sci +dol a +impeach trump +roo d +don caster +tor re +hero es +fo yer +tar i +blur red +ke w +frank ly +dro id +ap al +Ð ¼ +y af +bre t +par agu +cac ao +ðŁĻĮ ðŁı¾ +ru e +head aches +shaw ty +char ley +pal er +go wns +correc tional +ðŁĺ© ðŁĺ© +breaking bad +ol ing +da p +endeav our +cit adel +tra d +incumb ent +medit ate +foo ted +ðŁĴ µ +shab bat +dayof the +wil lem +gal way +to red +marri age +f illion +sleeve less +aud itor +jin young +invin cible +kad una +a and +volcan oes +mon eti +indie gogo +buccane ers +ðŁijī ðŁı½ +ãĢ Ĥ +lay ton +cuck oo +hu mber +buzz er +Ï ī +to re +stra ins +sto m +pa ine +s we +du ff +z ou +si mi +li pp +ur n +se agu +ðŁĶ ® +sun dae +hi c +ðŁĺ ¨ +bull pen +u per +flyo ver +al dridge +glo bes +ali es +ken zie +ge es +y cle +sp lin +mag enta +j ha +bal u +gh orn +ti pper +wick er +taste of +con clave +ch ale +inv asi +cat er +dio xide +me gab +win n +at p +transform ative +nest led +hi g +bri dging +lil ies +chee red +bad dest +sc rolls +real is +dipl o +ðŁĶ « +conce ssion +prefe rences +explo des +er gon +introduc tory +ine au +ch af +som es +land rover +spir ation +sex y +sco recard +illustr ates +soul mate +wi en +inter disciplinary +fore casting +ent ities +glu ed +en lar +cur t +percep tions +boot leg +mi re +asho k +v az +hor ne +cal le +ac ulture +ther oy +night time +oc al +character design +ar mist +ðŁĺı ðŁĺı +yah oo +ac eae +to se +even to +sou t +nay anth +wh om +v are +ri gging +gen us +hi ve +com mands +sti e +day a +ethan ol +en f +hi fi +flu ence +cle mson +re invent +thermom eter +humor ous +emer ging +aci ón +ðŁĺĺ ðŁĺį +s ity +haw ke +accompan ying +t ility +ðŁĺ ª +re cess +protag onist +l ery +dun dal +int l +britt any +q bs +off the +marri ages +how to +viol ated +adel aide +wit t +lanc er +pak v +hu me +st ade +bra gging +ou tright +ad c +super st +real time +cu res +garden ers +ero ck +dale jr +ver o +bar tol +mo ti +mc fly +v pn +st ink +over rated +guer ra +e tis +ath ome +twd family +th ab +tn x +rafa el +family travel +x ley +sat anic +equ ations +ru dy +wal dorf +stan i +tu be +meas les +zimmer man +obli gations +i ously +bow ser +trans former +sho ppe +shak en +gh ouse +to d +ke tball +share holder +mar ca +kp mg +ak an +given chy +coast al +au th +roller coaster +mar ches +coordin ate +cine ma +apprentic es +par lor +mit o +men on +consider able +bar re +glo ss +enh ances +jaz eera +fal mouth +thra sh +stat en +k zn +eng el +samanth ap +flo ppy +sal om +ðŁıĨ ðŁıĨ +w ack +deliber ate +osc ill +herit ag +du sted +orni thology +pad dle +fer ns +bar un +cl ans +anticip ate +a ay +mat ically +é ĩ +tu mble +post man +unic ef +tro tter +op d +leaf let +ge ist +cease fire +scre ws +cre ation +wal nuts +longh orns +under statement +ab b +proxim ity +na x +un ity +turn pike +orda ined +dub step +chak ra +me ch +love her +look alike +donne in +vir on +Ù Ī +bang ers +vari ants +out dated +in ta +cri sto +sp elt +food and +f on +stefan i +margin al +hu tton +ti ara +tel ford +qu en +fair grounds +que tta +mikha il +heal er +v ball +ty re +under grad +gl end +hom ers +scri bed +main tains +po che +mis sal +mar ko +u as +á n +sh p +con vey +pad re +sab a +pu glia +madhu ri +pa xton +chap lain +n ago +ca si +... !!! +fli rt +sal eh +k are +di re +stam ped +extre me +ðŁĺĥ ðŁĺĥ +ho ppy +guadalu pe +advant aged +eu char +p low +un n +mac qu +port land +cla sh +pe s +lou bout +y p +keep ing +arca dia +fran kie +fi u +de th +encyclo pedia +si ze +inve sts +ðŁį © +geo logical +fran ç +con front +ðŁĺ ¥ +d ys +af m +tex an +graph ene +repost app +ac f +ur sula +gaz a +dd led +fu m +wsb tv +m be +fron tiers +chrono graph +ke s +inter faith +tab oo +spar ta +won do +flori st +em braces +ca w +no el +arch ers +ðŁIJ · +roman o +ban an +sh akers +melo dies +geo thermal +se phora +ìļ ° +оР´ +pro c +hand shake +pan de +popul ated +slow down +hor tons +registr ations +un deni +lan ts +pas sover +thak ur +li ef +adhe sive +pe tal +micro scopy +memph is +confir ming +air drop +mesm er +perce ived +ming le +lifel ine +gh j +worcester shire +pas sions +ach er +el lar +ah o +firen ze +bar ang +letter man +hat field +lu cha +je ter +e shop +william s +horo scope +pre de +east bourne +dur ga +di version +al trin +seis mic +premi osm +nar co +ti r +ori g +or m +land fall +ci ous +lin do +max ine +x ico +tra y +os wald +c ba +ric otta +n cr +mar au +ภ² +gladi ator +ch ery +lun g +u me +po psic +lon ging +can als +ta ya +decentr alized +sho pp +pres sures +mahar aj +eti had +wal greens +succe ssion +sign aling +li g +staf fer +north korea +def ying +as ma +de g +peri meter +oak ville +m sk +balti more +rece ip +de ple +ðŁĺŃ ðŁĺĤ +jambo ree +> .< +rsp b +puni sher +consider ably +in tothe +pari sian +acceler ated +polye ster +low es +fr ying +sauté ed +mou ths +seychel les +ra x +go dis +dak ota +house wives +the me +mat inee +black bird +ye sung +pre fers +pelle gr +in ated +trun ks +stronger together +re pet +re pairing +ped als +toler ant +her r +dun ne +indic ation +decat ur +b tv +exhibit ors +ik on +friday motivation +bra gg +live tweet +al ves +womens art +foreig ners +wal lets +min dy +lan ey +bb in +tv miaw +lif ter +tar get +tam e +dr ou +astro photography +mp c +g pu +nord strom +fric tion +run off +lov able +sp nfamily +ext ingui +bloo dy +sch el +arti stry +sw ish +scar ce +ph ils +max im +pos sum +com promised +sty li +sc fc +is sa +birmin gham +sket ched +angel ica +ordin ance +je ts +conqu er +ðŁĺ IJ +online shopping +s ori +reason ably +nue stro +ar turo +ch l +benef ici +spho to +wel t +ni kk +ðŁ¤ ŀ +dan ao +for mid +as se +af irst +âľ Ĥ +gil lette +as sor +an onym +sel ca +fe mi +bear able +y and +ar mory +cre pe +celtic fc +bra vo +in expensive +de lec +ge cko +new market +snow flakes +kab ir +con tra +can ning +mor pho +gar wal +ðŁĴĥ ðŁı» +fight ing +mu tation +woo dy +ju gg +gr aces +premiosm tvmiaw +kenne dy +gu p +sa e +op ha +off spring +fini sher +bet ts +span ning +mar j +h one +sh ing +contin ents +samanthap rabhu +un related +l acy +explo sions +benjam in +sophi e +no ting +micro soft +as sen +a hoy +i ker +ho fer +mo e +ah madi +yan n +an ak +ma hi +be u +aha h +creep er +baahu bali +am at +pri ory +haw keye +deloit te +sko da +print making +assemb ling +mirac ulous +no ch +sw o +leg a +oper ates +border lands +eli e +stron gh +rep tiles +pir ate +un fold + ¯ +qual comm +un predictable +ot r +rose wood +direc tional +counsel ors +corn ell +liber ated +j ad +ir regular +bulgar ian +high ness +vodaf one +sw ild +mini mize +gra zie +๠ĩ +r stats +stre ep +ome tric +humb le +lu mp +l ille +b ü +home depot +tripad visor +ki wan +a via +er z +ex ico +du f +blu men +mi zing +ar ma +in im +con stan +sor a +ju al +au n +tw ell +tren ches +her a +r k +po plar +recipe oftheday +ll an +bhu ban +short ages +ing don +bridge water +ðŁIJ ĺ +fortn ite +cam den +un cture +pro w +colon ies +t ks +n go +b hm +live pd +spl ace +sli ke +happye aster +ter rence +revol ver +j ed +yy yy +office of +m ts +exist ential +r ourke +explore bc +sse d +pri est +vix en +si ding +k pa +a har +ju ic +ob struc +foren sics +uk mfg +cancell ation +we ary +ab q +ele c +pri zed +deb ts +me zz +salv atore +m dc +gre tte +c gc +th on +snow storm +ts ch +cook ery +å ¹ +wa xing +n acional +mur s +ra ve +cap es +ger main +dri pping +sub mitting +ome lette +iter ation +aj es +shim mer +fu eling +ðŁĩ§ ðŁĩª +li po +bo bble +un follow +islam ist +hi ber +cat s +agentsof shield +sen si +____ _ +ster ia +inst al +ausp icious +har row +over land +femini sts +inst ant +char iot +blind ness +sp ed +sc arec +nu it +mini atures +ho seok +glo ck +fifa worldcup +e te +dis m +we iner +ex foli +ear ts +ภĶ +my art +man il +iss ant +form a +in cu +buffal ob +in tim +mc cul +anj ali +po po +un doub +hil a +fun gal +thank ful +fu tur +en dish +ren ds +th ar +she ff +ring o +nichol ls +io wa +po tom +cl ams +ãģ Ħ +acon f +stadi ums +di mp +di k +residen ces +do v +caric ature +seagu ll +kl m +confe ss +sla pped +cele b +turb ines +pp v +nur ture +el ab +.... .# +tu ff +de press +al far +amii bo +di spon +e wing +que er +friend s +for re +âĺ ¼ +sw t +aqu arius +head liner +cur d +fi gs +o tters +love fl +kare em +go vegan +fri yay +consol ation +at ri +ì§ Ħ +âĺĿ ï¸ı +poly ne +gu ed +o ya +la us +intestin al +cam illa +scal p +pi r +leed s +horri fying +bore tum +dand elion +fer rer +ell ic +as x +so ren +re loaded +ale ague +navig ator +ine tte +add ams +al chemist +ak shay +dystop ian +awe c +n aya +al isa +ai led +ag or +avi ator +ali zer +smo bile +findyour park +cop ying +to ddy +sh ti +mon ger +cal houn +nap kin +break up +y atra +se thu +ric hi +eras mus +fer ry +am ore +prac tise +bo bo +power point +oo se +li ffe +chin a +sh ka +fad navis +du ane +war on +fal se +ðŁļ Ĥ +wa shes +disc ip +==== ==== +g k +ab b +stub born +medi eval +p ci +ðŁį ª +maril yn +h yo +man di +cr i +prede cess +continu ation +om usic +s lat +wh al +mall ory +bon n +shen zhen +ca i +âĺ ĥ +sa fest +for wards +dra wers +bla sted +sle e +mor phe +mb ta +dumb ass +ÑĦоÑĤ о +alhamdulil lah +ec lub +al beit +heal ey +ayurve da +adverti sed +cro cs +itt les +bry son +be i +nj pw +honore e +fu sed +ðŁĶ ĺ +mul tin +n aga +de parts +ko p +kin o +jhar khand +ed na +ax le +mil ton +supremac ist +marrake ch +domin ic +tran script +] [# +: ). +wo c +sur rounds +o gil +leaf lets +co well +whe w +tru de +proli fer +succe s +sports man +con dom +po che +k up +imprison ment +{ } +scram bled +å Ľ +ka ine +cell phone +metam or +con i +remn ants +ee z +down pour +afterno on +exerc ising +ber ser +architec ture +wick low +m ns +is p +bo c +n iss +mn wild +stu mble +r si +lu ffy +sil en +dd ad +bul lies +haw ker +bb cc +scu ba +e pp +que ts +for aging +pal let +ha di +cinemato grapher +cat chers +to aster +k hi +lite coin +kid lit +amher st +maur icio +ip ad +mar malade +fe y +don nelly +g to +est as +cere bral +ant grasso +zz led +vir gil +swa pped +ðŁĺħ ðŁĺħ +no dapl +greate st +nhl bruins +fra ser +b mo +ane w +. âĿ¤ï¸ı +se gregation +remark ably +mccor mick +lo gger +er as +contrac ting +âłĢ âłĢ +yor ks +uku lele +touch screen +de cked +ben n +south wark +ra vin +nu mis +ðŁ¤ Ļ +ru t +gre co +eth ic +red neck +ar r +t cs +ih ri +ðŁĩ« ðŁĩ· +l k +inher ited +zy k +viadu ct +marty red +hi gu +ss n +be in +street style +fer gie +bank of +æĹ ¥ +stake holder +exempl ary +cre ss +ess a +ero tica +intre pid +gom es +bra un +bethan y +bang tan +pulmon ary +m illing +doctor ate +trump russia +ठ° +s ani +bl att +pla u +depri ved +t le +ful ly +bour n +st ak +lufthan sa +kio sk +far oo +def y +bad an +ðŁĺĺ âĿ¤ï¸ı +rit z +tri sha +ran ds +middle sex +arab s +pro j +sport scenter +repe ats +iv f +bleed blue +as sure +o bs +territ orial +ele n +bever ley +ann ah +âĿ¤ï¸ıâĿ¤ï¸ı âĿ¤ï¸ıâĿ¤ï¸ı +z l +for good +science fiction +gla u +son ya +pri th +st weets +mix ers +mari o +ant elope +writing community +went z +den ham +be di +sf o +harley davidson +look book +immuno therapy +or phe +es ville +ed ged +tas k +sb ball +corro sion +kilom eters +co sting +play back +ke ke +di visi +u ter +re location +yel led +pen g +up beat +ser ve +âļ ł +hal en +stir ring +reh man +en v +schu macher +frag ment +alkal ine +sb k +resil i +share point +rol lover +tra sh +counter part +âĻ « +ob itu +à ½ +ãĤ ¹ +mul berry +ðŁİ Ĩ +auton omy +spra ying +nat l +love you +fran ki +nu k +esc ar +can teen +ali baba +de plor +mole cule +pu d +fort night +blon die +sp hin +portra yal +ta che +bu te +consi sting +freep alestine +c sp +im mort +d ns +ðŁĴ¥ ðŁĴ¥ +tour de +coo king +archi val +ga thers +bit t +b anc +pre mature +snow ball +poetry day +lou dly +fug itive +ed ay +em ra +ðŁĩ¸ ðŁĩª +sci en +node js +jur gen +je ong +band ana +un is +fox sports +v andy +pro visions +wee p +tu k +i ko +h oun +zig gy +z r +fil let +bat a +tin k +con e +we want +k ilo +hor ace +sl t +sc t +stay tuned +victor ia +umb ria +att acker +ingham shire +fright ening +no ir +fr at +con tempt +lia ison +ho i +br ink +tr ill +ni agar +kick ass +dun das +not my +rho de +bu mble +no xi +fa g +spec tators +mancrush monday +jin ping +distr act +dais y +wal den +portra it +ar thistory +vol tron +ev el +is c +ac m +r ite +na o +de ported +swe ats +ru fus +lo bo +labor day +gam o +ihri thik +bl it +abdomin al +ãħ¤ãħ¤ ãħ¤ãħ¤ +i it +e q +bu sy +allu arjun +un disclosed +de ton +pro create +ki l +ðŁİĤ ðŁİĤ +mitch ell +ki i +inherit ance +al p +jo burg +pat rolling +compul sory +un signed +ni am +l ga +eshop suk +tr illi +ma w +appreci ating +rock ab +mañ ana +an tal +mal vern +roy o +grand prix +sut ton +go ftheday +dig i +ãħĭãħĭ ãħĭãħĭ +t les +varan asi +erec ted +discip les +cont act +ðŁĺ µ +li d +⬠ĩ +scen tre +radi ator +ing tips +trans itions +thursday motivation +chem ical +separ ati +sal is +mi m +geo graphical +book fest +/ . +âľ ĭ +v ae +cur rie +ag garwal +acceler ation +the ses +lg m +u mass +pro portions +nat a +ani ans +ku ch +be acons +ap r +@ # +ðŁĴª ðŁı¾ +nu ke +sher aton +ki o +ma kati +polit ico +mor ale +ì Ļ +econom ically +gg ly +ss en +pa stries +intern ships +vic ente +fanta ken +aveng ers +accu se +slee pover +indic ated +the dream +ster one +ren ders +fro st +ou i +gre gg +d ore +⾨ ⾨⾨ +pu gs +sat y +nu mb +hems worth +tam i +la ssic +schi ff +igle sias +ag awa +] " +re shi +game stop +divor ced +theat er +clau di +un conventional +prophe ts +ac in +twel f +tow ering +t ml +sc lerosis +k wan +ge ts +distur b +na ira +ener g +pir acy +pru itt +noti fied +hen na +bra m +ground water +bl s +opti mis +$ ) +luci e +biz hour +fang irling +gr ills +or l +ver se +c ina +law less +artistson twitter +tele vised +marshmal lows +radio head +bar r +m fc +bre vi +mmor pg +g aya +âĸ « +sub titles +j t +disney land +to bago +nh m +groo ve +fi awec +" / +ba o +scra bble +om ni +ff l +um c +si mba +ali er +ter rell +plu me +mi di +dig nit +co c +bru t +ad ata +alche my +d sm +ðŁĺĨ ðŁĺĨ +win try +spa res +cu er +conclu sions +to ys +od or +fl ann +gar vey +scrip tions +inspec tions +cat ap +ang lo +st louis +heim er +at ay +tr ich +en yc +chil ds +vent il +mont p +guiller mo +circu lare +z ell +mode led +craf tsman +al ina +stimul ation +cashe w +ju das +best of +to ire +susp ends +scol lege +real ising +by tes +bloo ds +as si +ðŁĴ ¿ +o hs +ðŁį ĭ +scallo p +ठµ +gi fting +camo gie +wil kes +o zzy +ðŁ¤ ¤ +ver onic +sav oy +deme tri +baby girl +ðŁĺį ðŁĺŃ +so x +cly de +induc tee +count down +self care +ठľ +vi ka +tor re +phd chat +pe ars +aw h +suff rage +le sn +admir ation +mp p +shark week +schul z +santor ini +clo ver +( * +stras bourg +ex iting +so yu +finger print +che a +ãĢ ľ +vin dic +song writers +so a +prou der +nam a += )) +simple st +delici ously +gil les +u q +mn wx +ep p +sh un +ken nel +fall on +ðŁIJ £ +sin d +tra gically +out es +modern ism +co ke +gy n +spi on +âĺ¹ ï¸ı +le am +compress or +apolog ise +twent yon +fan atics +âĻ » +sco tsman +sa wa +ko u +as er +ภļ +welter weight +phen om +twick enham +stri a +p out +ka z +gi am +cd p +ho y +emplo y +red mond +ภĦภ+sm ere +trance family +proto cols +pie ce +lu iz +iter acy +carl s +united states +har med +phd life +ch aw +foot prints +l é +cho ker +z ana +sli pper +eric sson +insul ting +articho ke +advis ing +acquis itions +op or +mut ations +re ar +ॠģ +pod cast +wi ther +kun g +íĺ ¸ +win slow +di apers +ðŁĵ¸ @ +ec ker +col lar +hu ey +gi ro +mono gram +kas ich +si veness +malay si +arom atic +gre s +gali leo +u ji +rob b +dr m +none theless +as a +: > +lo a +l np +at work +ag t +laksh mi +pipel ines +id al +stre l +re all +chain z +stone wall +san sk +ðŁı ´ +pied mont +hoste ss +ci u +t é +analy ses +wil helm +scott y +rw by +mosqu it +use mb +qu ins +ðŁij İ +tu cker +s conf +speci fications +psychi atry +broo kes +s ils +ol af +de to +co di +cli p +fil th +womancrush wednesday +go to +ang erous +be ale +w tc +paneli st +ne x +lar sen +emili o +tab leau +h itters +conce ived +americ ani +or tega +mar di +Ñ ĥ +pain tball +thir sty +new yorker +etis ation +go ss +we aker +u gh +tro ll +har ga +du al +ght ning +at ine +ðŁĺİ ðŁĺİðŁĺİ +cook out +pyrene es +po ss +authent ication +sports wear +yun ho +kir o +archi pel +shen ko +ren der +nov ation +divin ity +ðŁij £ +su fi +humb ling +ge opol +devote es +wait ress +tr ough +py ro +i ba +bl ing +gra f +epilo ts +bt r +of tball +bas king +domin os +so om +r ath +sher yl +qu el +astronom ical +wel d +track list +sig nee +slee pless +com man +ch ron +summ on +pure michigan +cri spr +sli p +la gi +ra q +um u +thal ap +char med +scru mp +quad copter +ski p +peter sen +mun i +ðŁĮ ¾ +mon aghan +tra ys +ick ed +canad aday +te gr +ï¿ ½ +hot ness +heavy metal +ab ar +gop debate +az ul +spider man +sun flowers +ľ ë +web comics +bar d +Ð ² +nichol as +slu sh +ram an +mark ham +ffici al +ff ler +íĬ ¸ +ple ss +anush ka +to to +sk aters +pro wrestling +compet es +ay ala +myster y +thr ills +mp g +independ ently +y ul +imper ative +formid able +tire less +st acking +ton gues +mal tese +pot ts +mat ti +char ting +chill out +super nova +ome o +sky sports +nu tty +ðŁĹĵ ï¸ı +ro han +insp ired +concier ge +ser ra +ma kk +gal at +chi pp +ye v +ì £ +reim bur +op ul +kimber ley +i eee +bre men +ch itec +or in +nak u +bon kers +foo ty +emer gence +ðŁĨ ĺ +sti p +serge i +zo ey +ai me +wou ld +dy es +destin y +vinai grette +dri er +circulare conomy +an archi +ss r +sch el +cin er +gro om +determin ing +gar min +cal ais +incarcer ation +bu kit +no i +chelms ford +mckin ley +chi pped +belong ed +tu mors +str oud +mi i +influen za +wwen xt +tun dra +tele communications +cat sofinstagram +t ages +beat ty +o du +ml kday +oo per +dang le +ak ley +cru mb +anti gua +ti mbers +rou hani +ðŁĴª ðŁĴªðŁĴª +ha fi +... !! +w cs +coo p +sn c +lit res +ãĢ Ĭ +ha z +co z +k ant +green field +cur ti +y ale +flye agles +what soever +wor thing +rou lette +flyeagles fly +un da +a inted +stand ing +lusci ous +h pc +effic acy +ash land +me ghan +ky wx +n pr +bath tub +ac os +h ani +mar cor +man tis +da isi +bo ba +ab bie +mu til +vi al +spy der +po z +g ti +el fie +nigh tw +metro id +anton i +mad die +dh ry +dar lings +ten ds +taek wondo +atlan ta +me ow +chlo e +ãĥ İ +ym es +siber ia +k con +gu es +mar iner +fac il +azz le +[ ... +han nover +bav aria +vir go +te uk +u sps +) # +wall a +sam pson +need less +ver bally +hay ley +bow led +pi us +lam pard +ham string +vol vo +road safety +cho king +sor bet +a hem +healthy food +brai ded +horticul ture +cr ative +che ek +ad do +the force +ko ko +schiz oph +j ie +w ada +twentyon epilots +h bcu +pro ton +pau ls +lou isa +lat am +kyr gy +com pac +sd k +sap i +?? ? +liber alism +ep silon +ai den +w usa +spra yed +baske tball +kim ono +blue wave +ali as +ë§ Ī +mug shot +ce c +do gre +ad ora +ðŁĵ· @ +kra kow +intrigu ed +exhau sting +astron omer +ven ison +lady bug +ci v +bra e +us m +bri be +acup uncture +pembro ke +ke ating +chi e +y ad +t si +sm i +see ding +gate shead +lis boa +gy p +canv ass +ðŁĶ´ âļªï¸ı +op i +ni r +soci etal +ly te +ati es +c sm +ar tery +al in +aka poor +abstr acts +âĢ¦ âĢ¦ +teen wolf +ne we +travel gram +sentim ental +per ched +han del +ho ek +f ay +coordin ating +anim ate +man ian +effor t +jer ky +f ck +adri enne +ma bly +tra ding +my el +spi ro +sol a +stor ing +over drive +monday morning +dream team +pul se +bon di +ber nie +pgat our +tri poli +son am +plat t +âļ ¡ +ag roup +îIJ Ĵ +inv ading +v cu +k ell +ñ os +un dead +pod casting +mercede sam +mana fort +cor tex +que so +impecc able +pal mer +wil doz +sport sc +guacam ole +dispen ser +cate gori +stun ts +per il +invit ations +dune din +xi e +achi eves +saf er +pre ds +ph an +knuck les +k ak +igno res +lovemy job +aru ba +ound ation +datac enter +co vert +gr ing +cou ple +ا ر +vol i +mc cle +arti sans +lu do +kal am +arom a +under taker +hu la +wiz kid +gu mb +god frey +bakers field +ker n +engine er +car ve +pal in +guaran tees +pe bbles +b ays +zi eg +fin k +â¬ĩï¸ı â¬ĩï¸ı +down pours +ro chelle +rasp berry +ðŁĺ ® +gra phies +stom p +caf es +ari zed +utt ar +cal vary +dri e +crusad er +bus an +tux edo +si u +seam us +cul tured +blan chard +town house +ge red +butter milk +flu ctu +roger federer +hel i +ðŁ¦ ĥ +u ous +ram esh +mu ppets +email marketing +ye ss +br ice +ri zio +pel o +donnein arte +u rable +inve stin +bump ing +raji v +sav a +thro wer +fore x +o hhhh +th rust +pull man +r fid +sep sis +le ed +fri ght +roun ding +ne b +ph ins +ai sha +utili zing +squ ats +gold smith +j ic +bo ks +vau s +i po +exclu sion +tari ff +po kes +min al +land s +en force +washington dc +or char +g x +mar ys +ey our +aussi e +bak ers +un popular +latin os +lar ge +pu tnam +bol o +wa de +pel o +di zz +ob struction +fla ppy +weare the +depend ence +pajam a +e te +y ann +e wan +disc la +a ay +kar ina +e ic +an trim +w soc +neg atively +kai do +fotogra fia +dh ru +colo ssal +mcle od +k wang +mani pu +ex hilar +us atoday +summer slam +co les +tapro om +unbeat able +de ma +tic ks +k ling +fil s +campaig ners +ภķ +brew ster +audu bon +qu ay +ch s +ki gali +d ler +strength ens +som al +sign ingday +gol ds +pig ment +orche stral +g q +lin kin +ðŁı ĩ +ta w +algar ve +ho v +ear le +gold fish +am ig +ex er +ben in +dru id +ðŁIJ ¸ +she m +quat tro +mer cen +men te +incorpor ating +bon anza +state fair +en de +concep tions +e es +âĻ¥ï¸ı âĻ¥ï¸ı +d son +fire arm +orb ital +we h +multi p +fo b +requi em +p light +thou se +sa id +oc re +remem brance +n old +chi pping +be v +er t +ca thy +sy m +ri ggs +m ley +dialo gues +sl ender +how l +gau teng +wd w +to bi +smo kes +im plo +b pm +ad n +mom basa +cap sul +bloom field +artic ul +cle o +goog led +flu ffy +l ard +en zyme +ve sti +ibra hi +fl ame +e mea +out ages +dispro por +ble ak +an sel +ick er +st louis +stock market +good friday +sau lt +stal led +pro m +ep som +b é +the se +sau ces +me w +lit fest +pre d +re u +kar ak +si enna +ell in +bio technology +ï¸ıâĥ£ - +tac tic +sa in +por k +mon za +ka j +lu sh +compart ment +chang ing +shraddha kapoor +fo al +ar tem +cu ando +can ola +ori ente +me sse +d ited +br c +box er +bbc two +s st +ment day +em ing +de wey +kof i +âŀĸâŀĸ âŀĸâŀĸ +reali zation +smo l +tw ood +san je +flag staff +ber wick +cor set +can ary +whistle blower +et ched +com posing +squee zed +bow er +auto desk +ne h +mathi eu +ba ja +Å Ĥ +hy dra +da im +am eri +insi sted +mer lot +gar ros +heart news +gaine sville +cut ler +bo de +ðŁĺī ðŁĺī +lew es +scoun try +g sa +us u +cc m +god awgs +phara oh +cra e +mor ley +hyp noti +f ades +neur ons +fu zz +ing co +high landers +star k +vig ne +pac kets +amar illo +reu ben +insul ts +bas ic +vec tor +n me +ac ruz +tro s +transm itter +ðŁĺ ŀ +interpre t +ðŁĺ ² +pre quel +mc gowan +dis semin +ðŁĴĺ ðŁĴĺ +mascul inity +indie gamedev +ali ve +te t +pe tal +ema iled +ar med +ko o +he er +ba ird +super junior +metro polis +delav in +decl ines +stit utes +Û ģ +p tbo +g lan +cho res +e aling +chri ssy +ste mc +vi an +assassin ated +pron ounce +illeg als +discover y +cav ill +fri fotos +f al +so i +sabot age +t int +p dc +ðŁİīðŁİ Ī +ãĤ Ĭãģ +ji o +endeav or +in sig +commit tees +she arer +me tz +mar rying +h dd +g by +fre t +tri sh +pu l +scrip ted +sa ki +l w +ke ye +shim i +nan aimo +ca h +à « +tem pered +ici an +du gg +dish washer +air field +s rugby +gr inch +y st +r ms +mahat ma +lan kan +disc ar +dige stion +no des +l ls +om ic +gu tter +tis garh +feder ico +election day +bo he +master card +fire ball +âľ Ķï¸ı +oy ster +p ong +do k +en route +m vc +beat the +ali stair +shu b +sh aming +cherno byl +ghi bli +the s +pin ion +d bs +sal ts +ic tion +epi ph +nc pol +in convenience +whit ley +inspec ting +wood ley +wi ener +skil let +no les +m ca +h ina +a sha +willing ness +well ness +tam ed +show time +dis advantaged +ber nat +us n +mission aries +coun selling +arrog ant +quant itative +leg alization +ho dge +energye fficiency +cameron dallas +pos sessions +p bb +harris burg +v g +hindu ism +happy thanksgiving +fi b +re acting +tweeta picture +pol iti +mu ppet +hur rah +pac e +coast guard +guar ded +as am +par ry +fore very +x q +oom f +ke anu +j ind +ri st +customer service +sac red +ðŁĺ º +ton er +occur rence +mat u +val dez +red d +is ak +power rangers +pe asant +raj ini +abra ham +e mil +car do +tr il +hair styles +obsole te +sam pler +direc tive +delavin kisses +ver ton +glo s +sp ay +paler mo +com ets +man ziel +chicag of +ski pped +pic torial +h ant +b mi +a ol +re opens +pad dling +devo s +fra ud +bas eline +que ues +sp ired +sn are +eu ve +descri ptions +daisi es +ca ching +gall eria +tri mmed +stin o +recy cla +ic ular +bir ken +raw lings +fli x +chic as +b gt +lik eli +argy ll +thel ove +ga ston +bl anca +ha k +f one +sailor moon +h aci +ima c +fl yn +de can +bel les +ap ic +zo g +taun ton +con stance +lasag na +ker nel +in ka +har bor +collec tively +calcul ated +av ille +shil pa +pur du +gi mm +fun er +a est +pembroke shire +nighting ale +n unes +hyper tension +hu bert +sli ders +infer tility +comm ended +transat lantic +metr ical +!! @ +Å Ł +ss g +bac ca +inver ted +fun factfriday +it ans +albu m +acqu ainted +ri er +whel an +sar ab +mu e +snoo ze +pi ff +agre eing +sp itting +jer maine +n ye +âľı ï¸ı +am bush +ze ph +con greg +univers ity +s app +wann abe +pat rice +ib d +do glo +fri dges +sun d +king ston +ar gon +kam en +hardro ck +ds ley +do lores +ì ° +ota ku +pi ping +be having +âŃIJï¸ıâŃIJï¸ı âŃIJï¸ı +blue bird +an sari +teapo t +fire work +cro p +log ans +ty ped +thick ness +ig ers +c fp +dys functional +contra sting +et ty +aston martin +tx st +dra grace +at tributes +marath on +manu scripts +john stone +ðŁĺ± ðŁĺ± +bo er +ay u +aru gula +poo rest +con du +assu mption +anag h +no h +delav in +sit ter +g ö +mor ow +kick start +com i +gl acial +ghe ad +ba in +ker shaw +en dof +fre ud +om at +i af +hu g +sign up +each other +defin ite +tu bing +shak ira +ðŁijı ðŁı½ +uu uu +sw in +sham bles +ol as +sk ell +brit ain +kn w +clu tter +om y +j ens +hang ed +city scape +scra ps +un locking +dead liest +er no +breast cancer +a it +inspec t +fu ri +ðŁĴ Į +ku d +ju le +or ah +mi ds +m dt +bur gring +r attle +pu sa +stal k +cle ans +iss ance +z ek +worth it +nam eis +musko ka +council man +urban art +bar rac +un solved +tu l +g ita +white board +soy beans +em ent +cont i +saturday motivation +conveni ently +doc king +t ado +âı © +sp ino +puppy love +po f +fabric ated +robb ers +adop ts +ti fied +kk r +indulg ence +notic eable +macqu arie +chap el +sensu al +ki ko +melan oma +lore tta +li ance +ab en +sp lus +ga al +ac ele +lib dems +compar isons +ðŁĮ µ +rhy thms +mer y +en capsul +nap ier +ðŁijĮ ðŁijĮðŁijĮ +ðŁij IJ +plat z +fre sno +re formed +ran bir +el it +the best +bhu shan +vin nie +impro vised +s ittin +re created +e ba +ec ker +ac rob +pon te +cor d +gi ddy +eur usd +fe ver +intu ition +gar i +dum mies +bud weiser +amend ments +te tra +sch nit +ay as +mar ys +ci st +k ani +ker mit +ðŁĺ±ðŁĺ± ðŁĺ± +tin ker +strol ling +di visional +niger i +omin ous +menstru al +kar ab +k hy +bw fc +pan handle +l illi +well er +stra pped +son the +transfer ring +ethe real +sne aks +ru dol +gab les +jac king +cin code +for tune +canadi ens +con for +ab normal +frank lin +tit a +mu la +persi st +cu ties +ki el +ðŁĩ± ðŁĩ +her mann +aw k +fi asco +ko to +we ta +hi ker +budd y +preven tive +mcgra w +game boy +forsy th +top shop +si ob +sad h +in tram +follow art +so aps +dragon ball +ou x +morri son +๠ĥ +lu bric +adul thood +morri sons +âļ łï¸ı +her mo +ta ka +stall one +mis use +team gb +ra gha +con fined +at y +hom ophobic +nw o +sky news +ho ya +ac rosse +wi iu +pur ée +jed dah +ðŁ¤ § +advis ers +ph ine +an is +scrump tious +ë° ķ +c ke +vin y +ter m +s dc +o do +home school +vas c +leop ards +debor ah +illic it +cur ran +as roma +nau ght +mar ig +brand i +em p +ðŁĺį ðŁijĮ +î Į +su spend +lu z +initi ation +sch aft +jensen ackles +craw ler +post doc +des ks +trail blazer +den omin +tri x +no ise +po et +± ï¸ı +s mug +vol atile +proof s +pharmac ist +sardin ia +mash able +kim chi +co ed +schal ke +doo dled +c sw +sh ur +ro x +do k +chris brown +mathemat ician +ab ound +ang elic +rock ford +d ole +yor kers +ms n +g man +xavi er +bor rowing +mark ings +longh orn +k ja +diver ted +mm it +euph oria +ay yy +te a +pa h +ck i +un cut +li ven +ky ung +fan art +mer ing +red ding +amo vie +gri di +c thulhu +schol arly +ju dah +th bewithyou +eu calyp +ðŁIJ ķ +hert fordshire +cour troom +by u +auc tioned +ple ase +mar cia +ê° ĵ +succe eded +el as +arvin d +t lot +saig on +re tt +ra kesh +fd ny +as en +se bring +gladi ators +you know +v lad +gol a +par ap +ÑĢ и +sab cnews +one team +oh l +sun e +ri j +cd c +star gate +run down +plat o +ph c +chat ter +ra viol +mn f +mand ala +li et +ภķ +mari a +hun gover +consoli dation +fer rell +tradition al +ilove art +gal ap +ðŁı Į +que zon +espa ña +ðŁĩ¨ðŁĩ Ń +ho bby +steam boat +mali gn +guil lau +pro hi +its me +íĥ Ģ +in scription +al z +mari an +k ade +mm on +adju sting +ne sts +intern ally +ci r +vik ram +mal ala +k ph +fel icia +the real +cap tivity +at is +marcor ubio +kale ido +che v +mano j +le more +gent ri +vi ps +tro pe +" âĢĶ +pair ings +mal nutrition +fr ay +desig nation +brun omars +az e +tor rential +pan zer +ga il +under the +the ological +schizoph re +dazz le +freder ic +mo par +ad illa +so ggy +ra un +medi ocre +colo rec +i fe +p inst +blu ef + ² +world water +gir oud +clar inet +ad olf +tar antino +receip ts +assu mp +ðŁij Ł +coffe es +âľĬ ðŁı¾ +du plex +s of +r x +lin o +timber wolves +pan dit +mo tm +e ga +ay ama +ach s +outsi der +ll en +co er +til ly +cheese burger +ma ds +ple dis +emp ty +national parks +az iz +p mi +jun kies +f ener +sq n +è s +gener ation +cleop atra +bhuban es +mosqu es +ty free +popp ins +tw c +or well +n age +ka whi +hol low +dal ai +¨¨ ¨¨ +ou ro +m health +gi on +az o +vis as +reneg ade +re ic +w sop +ðŁĴļ ðŁĴĽ +e chel +tox icity +mü n +bun k +stimul ating +asth our +\ ' +ep h +ende mic +cn bc +shrin king +peabo dy +michel angelo +can yon +wal e +su mi +si ders +inu it +? . +profession alism +dr acing +plat oon +p ons +out bound +maple leafs +de sol +cen cy +a than +ver ma +ru bbing +ok an +ðŁij ł +mull ins +authent ic +Å į +alman ac +ga ia +bb q +on imo +ke h +ty a +tou ts +y av +re posit +, . +wi ght +se eyou +cal lof +done sia +bar gaining +gr anth +sd su +amphi theater +p su +re watching +wine tasting +peak district +dete cting +thur man +phe e +èª ķ +u mich +re r +sculp ted +go le +name sake +ðŁĶ ģ +serv icing +bau gh +pu gh +pen cil +dar th +munch kin +at orium +ten ers +sun y +rolling stones +mag ing +star rer +i dris +fe instein +ag ron +âĺºï¸ı âĺºï¸ı +supervis ed +chamele on +aggre gate +succe ssive +mo gul +inst yle +pol dark +custom e +ohio state +ha ya +ci des +broker age +angel ou +fifa wwc +de forestation +al ton +pam ph +hu gged +ho bo +change able +ku ber +bur roughs +demon etisation +cape cod +vers atility +or ice +le ila +womenin science +tu a +he dges +embarrass ment +ali fe +so ars +ni ghter +hy mn +gi pp +chas u +tech s +ni all +k illa +hi ka +cam els +valu e + ¢ +sc oops +mah moud +clu sive +adri ana +pac o +oz il +un as +transl ations +whispe rer +s bi +bu xton +bio tics +indi ffe +ken ney +k lar +et ching +barra best +inst ability +se ine +vo tel +blo gged +whis key +my space +t ant +lan dia +give back +illu s +aw ak +ac ab +f bloggers +cloud computing +blat ant +syri ans +band ra +sty n +an em +ke ted +kar thik +barun sob +pin ot +gu bernat +gay e +arti ste +i fied +conven tions +hu an +geni uses +eeee ee +fol ly +somer ville +pride month +ðŁĩºðŁĩ¸ ðŁĩºðŁĩ¸ +chemo therapy +paul s +bak ar +ìĦ¸ë¸ IJ +taiwan ese +fol lo +c ss +re ign +nn nn +fla un +catastro phe +iti es +frag ments +extre mists +ym oun +car men +eze kiel +conne cting +se h +man ta +remodel ing +we ymouth +at oms +ce m +ne well +lu mi +the open +mo c +mili band +g land +z shq +mag gie +mani acs +m sp +ad y +cre ams +le anne +e sta +py g +af finity +pray er +dun bar +ligh troom +ac adi +wyn onna +roman tic +state dept +sick le +wh os +lam o +et our +fin ity +shru b +shar pen +pun dit +ed on +af ore +mar s +jeff ery +ter ps +medal list +kath arine +accu sing +ta z +roy d +from home +confron tation +alle gh +ðŁijī ðŁijī +refresh er +ran veer +never land +jo jo +lu crative +en am +ca ver +pa edi +man jaro +flu ids +the ssal +oppre ssed +mu ss +joh anna +Ø ® +cn g +buil dthe +sett les +s ith +fu ego +cl amp +ar ag +pay er +ted x +mand y +inter stellar +fr c +ch and +b cc +mo lo +len til +johan sson +grims by +nature lovers +ðŁļ¨ ðŁļ¨ðŁļ¨ +shin de +x in +international dayof +transiti onal +sat a +cad dy +wo d +if u +ha ys +holl yo +j ang +ir c +co im +grad able +" " +ðŁį ´ +ঠ¾ +a el +n yo +west lake +time out +sof i +phenom ena +cultiv ation +ag no +un armed +so t +con j +gen o +royal navy +nutriti on +fair mont +ti relessly +sn g +re ty +mic a +lu cent +slo ane +droo l +riz al +od ell +critici zed +. '" +la ze +deser ted +co der +pra s +l illian +itiner ary +dav y +an ap +whi pping +hobo ken +kare ena +çľ Ł +vi us +ter n +nan tucket +mis understood +bu laga +st ant +chin ook +z am +reli es +d ss +ed mond +sket chy +m ell +fe x +rec tor +dist ill +day dream +wine maker +ri pley +billion aires +hel ene +ati f +cul prit +bertr and +wou ldnt +ma pped +v ak +gla dly +parliam ent +kidlit art +ware ness +goli ath +âĨ ĵ +view point +tat ted +fu ls +dor sey +ang lers +li ds +ki ya +bow les +be h +b ite +compati bility +ance stral +pro x +beha ved +gubernat orial +ch field +sab an +z h +teen y +shibu ya +holli day +pan cy +âĿĦï¸ı âĿĦï¸ı +seun gri +? , +ðŁĩ¦ ðŁĩ· +im itation +impac tful +any i +gene vie +añ os +bate man +gli der +af ar +ra sheed +effor tless +sh war +dach sh +er un +at os +kin i +ch d +kha ki +k lin +felici dades +bel o +as l +to ppers +fin ley +stac ey +rigor ous +kar ting +le ppard +car michael +be ret +c se +ak hi +mer ingue +ab an +ha ke +ger i +er jee +re sto +comm anders +pr it +fl or +ad ven +ex termin +remain der +å IJ +es g +martin o +lulla by +| @ +mi gn +in store +big bang +cor di +cau ley +ante bellum +dg ate +cro ck +span dex +scaf folding +ore os +ê°ĵ ìĦ¸ë¸IJ +pom ona +ma uro +uni versi +re mi +af ootball +t ant +sm alls +ne h +worl do +tropic al +mor ph +jav elin +gla r +arqu itec +reminis cent +tu bs +spide y +make u +syl la +progressi ves +blo t +shor ten +keep in +ch ak +ang st +super food +decad ent +ston y +neuro logical +ar boretum +ann ak +fe ma +per cu +dis respectful +small biz +lo x +co om +c sc +bs bi +pre valence +him ss +esp an +mo ga +fr ampton +sky map +mas se +levi athan +( ). +noctur nal +car ameli +ang or +amne sia +outsi ders +she alth +rhin o +ant ag +ag io +ðŁĴ° ðŁĴ° +take me +kab addi +c si +m sh +coch rane +thessal oni +sil a +ha us +du sting +obe se +mack lemore +mani sh +len in +m dc +gro wn +shef field +s rs +ke le +car son +ch um +dah lia +can tore +opp o +how ling +cyber crime +sur realism +sc ran +fa iz +thre n +rac ists +r out +pk not +se mana +sin i +mc cull +ma chi +alfon so +y b +sar dar +kend rick +den g +reci pro +on f +doom sday +bri bery +custom iz +art is +c pi +ðŁĻĪ ðŁĻĪ +sla va +let te +en s +âĿ¤ï¸ı ðŁĺĺ +cra yon +ad an +tr c +migr ate +simp son +row ers +king sley +farmers market +shee han +ne phe +bor non +car ton +mic key +all ure +u lu +sli pknot +heb do +gui do +dog celebration +online marketing +acceler ating +) .. +origin ated +macar oni +ed tech +out field +mit z +disc us +adverti ser +man or +ha shi +descri p +cap ita +ful bright +recep tor +con n +con ey +spion age +r attle +pre st +u li +blog post +acker ay +) âĢ¦ +red velvet +mat th +inspir ing +b sd +ker ri +po con +mil lar +re pur +accent ure +ä ¹ +ram bo +ragnar ok +dele ting +british museum +pat ory +leip zig +flori an +sci fi +in ers +br ate +yo y +melis sa +ab er +ma sa +po te +mosquit oes +transpl ant +r pa +; )) +bast ille +yl an +joye ux +melo dic +cap tions +atri st +roch dale +gott i +pew die +cuties aturday +who is +aqu aculture +tiv a +sp el +he ss +ha ji +fred die +co per +brand o +v k +photo book +* , +my dayin +micha ela +brune i +sr ini +in te +Ä ± +de ol +d fc +separ ately +bun d +ve sts +to c +me ck +rein forced +constra ints +car roll +sq ft +re ver +cam per +bird man +in action +gener ators +triumph ant +pe sts +o vo +gy pt +al amo +sc aled +suresh pp +sd n +is mo +gi os +) @ +justic eleague +restaur ant +gab i +den gue +next gen +exemp li +ap ex +inspir ational +down side +kid z +u pl +et na +alvar o +fel dman +bar net +m ha +es ch +bloo ded +>>>> >>>> +kan i +ho fficial +casablanc a +bir ds +ty ga +sw amp +o day +new castle +nb ap +ci sion +cho ols +af lo +ne p +mon ton +ak b +super model +down time +th os +sc wx +snoo py +ag greg +yo ke +nor cal +we tt +prolon ged +me tast +beat er +f ta +t lap +disgu sted +y h +voice over +itch y +ip c +ðŁİ ¾ +phe asant +stra its +ram pant +j g +fer til +assu res +fortun es +sal inas +liz ards +kett le +i bs +cyn thi +he g +mc cr +soccer oos +happen ings +cor den +ðŁĺĤ ðŁijĮ +t ches +egre t +wolver ines +congratul ated +ho gg +bott ling +wr i +fer ri +bo sch +af ire +og den +s jo +j dm +sv t +con tex +tol lywood +min k +me se +super sonic +op oulos +å ¸ +âĶ ģ +knuck le +gu ise +gam i +chu cky +z inger +radi al +compla ined +bo da +fe tal +discipl ines +cor ro +ðŁĩ®ðŁĩ ¹ +op ted +filtr ation +ad nan +em cee +mi stre +insom ni +fer gus +tra jec +on don +med tech +tanger ine +madra s +gru e +cab s +z hu +sureshpp rabhu +insul ated +day swild +pp m +band ai +v day +s ff +squ id +lo thing +not dead +expre ssive +cu ll +ala stair +x u +up front +fish ers +en es +um d +dis missal +sti er +sel s +lu st +re active +prote ster +eyel ashes +al im +goo de +gre eng +da ir +com pen +anush ka +proto typing +ma pu +bear ings +ðŁIJ Ł +for me +bsbi botany +timo thy +out skirts +am bed +are tha +wend ell +stre aks +ni m +k pk +sne e +fit ter +quo ta +p ate +win ning +ðŁį Ń +sho pping +ma inst +cul ver +ste vie +mcfad den +counter parts +gren fell +fol som +dor set +tech crunch +⬠ħï¸ı +tip tuesday +us l +tre x +geor gie +ranveer official +lic ks +se wn +k f +' âĢ¦ +jap s +p ate +orth op +fe sta +stra s +mon tal +hammer smith +fore most +wido ws +mad re +ite z +mito chondri +lig ans +z ona +cari bou +m ss +andre i +weather channel +gh c +: ... +ta ft +awe ather +al isation +bru tal +bliss ful +nik ola +mal icious +q m +mpg vip +bro die +bl itz +applau d +dri bb +v ague +dog go +transl ating +interpre ted +hat ched +ge tyour +benefici aries +spar ring +caes ars +aw illiams +la hat +bro ke +ti mp +virtu es +rel ying +pie tro +k tn +ici sts +pab lo +lou i +a ag +pn pp +cha st +pul ses +fini sh +usair force +type writer +thomp son +dog s +ut to +ãģ į +sand al +new ly +do ge +z w +wan kers +ne gr +mu cha +determin es +black fish +sk unk +mu ps +instru ment +phy to +daysto go +skin ned +hai der +con ten +ðŁIJ¾ ðŁIJ¾ +we iler +undoub tedly +chair ing +wall is +sh ard +zind abad +adul t +absor ption +pre sto +deplo ying +drum mond +battle front +seag ulls +how dy +juda ism +des de +part ition +âľ Ŀ +no logy +national bestfriend +lesn ar +film fare +co asts +christen sen +ac an +mb u +co pped +ru bble +sw c +fun nier +far ther +where as +nano technology +with stand +pil low +bow ers +to pe +it ly +con fit +ma kar +comfor ts +bo sh +cli pper +bal la +sti k +mil b +safe guard +musi que +eas port +ya z +pad ded +bad er +fore ign +chop in +archi ve +o ka +tran sporting +tml talk +aj it +consequ ence +sc roo +ff o +collabor ated +pug chat +ye mi +jav ed +au burn +o of +ma w +sau cer +miti gate +i les +evangeli st +ter ie +re cl +indic tment +cat a +bright ness +may the +whim sical +un lv +key word +cu min +med way +west world +tra w +im posing +form ity +coul ter +ab z +ny pd +grass i +kel sey +qld pol +clock work +f dr +di anne +âĺ ij +ad h +p ann +bra vely +ae ge +un lawful +ver di +pocaly pse +phar o +kar la +reson ance +ma stiff +la dak +bu u +ma iled +hi i +craw ley +tor rent +mach ado +liby an +effort lessly +fal sely +q vist +ke ef +craf thour +cheri shed +val kyrie +s ari +kal amaz +be he +ðŁĮ Ļ +th im +ro ddy +col trane +but chers +ach im +wk end +awk ward +cab rera +:) ))) +fran c +decl an +con dos +a ja +pandor amusic +char ter +ph ill +mon trose +hatch back +handic app +gre aves +eucalyp tus +ut most +t son +bur ton +mid wives +in cur +ðŁĺį # +moo d +compre ssed +tom a +must ang +mo g +as ana +te stic +sho tel +in sol +cor sair +nh q +ben ny +sm ma +kap ur +in con +jon as +ener gies +don al +as ad +se z +n pa +archi ved +stimul ate +do p +hy d +gri eving +ãĥ Ī +ron a +why te +tree house +ss ell +sand ro +ko bo +ther most +se clu +hi ya +ge ez +mam as +prisc illa +flav oured +fas s +w old +maker space +cospla y +p tv +happy valentinesday +sequo ia +love craft +gu an +d tm +ci i +yoko hama +pos thum +re q +ðŁĶµ âļªï¸ı +galat asar +dol by +hamp tons +disturb ance +stone henge +ok c +disrup ting +month sary +jun gle +head lights +du stin +micro sof +happy mothersday +ko ko +gra zi +te sto +na idu +mal ay +ari al +ru mb +ab oo +har man +tra pe +spo ils +je ho +go dly +lock screen +z un +pi ous +ma gento +l enders +prob able +corpor al +m our +aw al +su a +call me +ton ne +go vin +devast ation +x j +gear box +war lock +per me +it ate +gaza underattack +du val +paras ite +clement e +le th +i va +fro zen +tho les +to bin +cair n +s ill +luc kiest +conver ts +st ale +pan cra +euro pale +wis dom +sch ur +ì ¶ +verti go +bi j +u bc +nu re +righte ousness +mt c +factor y +ver st +revers ed +hur i +hee chul +fab er +ar r +ul ous +ven om +ph at +green ery +bra dy +à ¦ +: (( +never giveup +di sha +mo ta +health care +dun ham +dex po +den zel +bb ins +f ics +wh am +mc g +eli an +wat a +str alia +tel lu +pe sky +spin off +ar moured +re acted +do fficial +te du +sag ar +mor ally +paralle led +fi os +dow ner +dau gh +re do +world cup +tari q +bar ne +glaci ers +oc cult +barbar ian +her mosa +!! !) +y ur +inter nation +p ss +sit u +p int +american air +sw am +dopp ler +ðŁĴĻ ðŁĴľ +cincode mayo +le van +hell enic +mc ne +ju di +yu h +st x +qu are +ðŁĺĤ . +sti g +g els +mot ley +hard work +euro zone +e ad +ç¥ Ń +seab ir +ci us +la id +alpac a +presu mably +pewdie pie +boo ted +am ari +tam ine +sol ace +bar row +acade mies +x ian +om ination +dun geons +b ma +de ity +ai k +stab il +hir a +affection ate +ving ne +new port +ãħĭ ãħĭ +thir ds +re tains +aroma therapy +ski er +ni ma +do pe +cr inge +con domin +to or +anim ator +sar aj +seas cape +minim alism +lake shore +calla way +berg man +à¤ Ĺ +whisp ering +stupi d +ri ghtful +requ is +ir n +se va +ut pol +tuber culo +squ ish +de but +govern mental +christ ine +all man +weap on +s ito +bur i +lo lita +leaf y +fu ch +tin ted +mck en +a hahaha +ðŁĩµðŁĩ ¹ +repe al +ne gan +ðŁķ Ĭ +tail gating +game insight +ðŁıŁ ï¸ı +yaku za +z t +ti ring +pro posing +bow lers +tra itors +ak shi +cler gy +cit o +up sets +tu scal +symph onic +sil ently +shu ff +black well +ðŁĺĤ ) +ko be +rober to +ri dg +dc u +mer ino +ft p +east side +. ~ +nb l +mn leg +ts for +frau dul +ca pping +in my +gymna st +ston es +ss in +twe aks +shag gy +oak land +dem sin +sang ria +mm va +hen nessy +down ton +ri ghtly +in it +aga ve +ob last +northe ast +friend ship +dal a +tro phy +ðŁij ½ +mag in +margar itas +ê · +ww fc +fa sh +di ke +cu d +char t +ðŁij ® +refuge es +jop lin +n cs +imp y +firm ware +pas cu +flam in +health tech +bell letstalk +w aka +ol ls +la go +co wan +bombar dier +sh ome +ðŁĻ ħ +mc master +na ve +well s +u ta +tell ers +mis fits +kap il +face off +af firm +a pro +whit epaper +super yacht +speci mens +al located +... , +- __ +ka w +dachsh und +djo ker +s work +qui ere +or um +ðŁIJ ł +som m +c mt +ingh our +skin ny +lgb ti +gi ggles +break away +resear ched +par ity +my al +ms l +re tained +si vity +make inindia +sol ves +defam ation +wal tham +sri racha +road way +concep tu +al in +iw ant +å Ī +del ft +tender loin +ga ins +faul ts +sw ire +st ellen +pol lo +dy ne +bornon thisday +asdf ghj +sq l +sali m +advis es +vo ip +ìĹij ìĨ +un touched +she il +ontari o +uph ill +so bre +de shi +nov ella +du tton +craw fish +ا٠Ĩ +ma a +tw ine +kal in +ðŁĩµðŁĩ Ń +ye ss +brook s +hoo siers +ton ka +umbrel las +ay ers +ate am +acqu iring +su ction +ä n +wi es +tari ans +soci o +mat tb +shepher ds +o so +charity tuesday +s logans +ninj as +al bat +by te +bash ir +trampol ine +mydayin la +i ja +bas el +ror y +gol die +fi rec +un noticed +pecu liar +sch a +ker son +mour ns +liquid ity +qu ipment +hi bs +ar s +aeron au +slide show +sla bs +delici ousness +sk itchen +hta fc +full erton +cre ighton +aer ob +procrastin ation +az ores +white hall +uss occer +medi ation +djoker nole +and me +um en +noxi ous +jo ss +ili fe +anni vers +sudan ese +et res +under mine +whole foods +diso be +kor i +ade le +eli z +can ti +al on +gymna sium +sarko die +meteoro logist +yl de +ste en +stamp collecting +nas al +lo tt +fran ks +ex ol +ack i +good year +animal rights +y les +vio lets +mm es +s thel +ra pping +tu scan +wai ver +tur ner +eat local +northe asthour +anim ations +tom morow +t sh +ff ame +bra e +pe tron +glam our +br yn +d cs +bal es +ðŁĶ ¶ +bro v +bre v +b ons +physi que +car ne +x e +elix ir +vol ved +l oma +ìľ ł +æ ĺ +van u +ri gs +bal ance +va res +bon ita +sprink le +perfec to +di on +le ak +calcu tta +o ba +d ma +c mon +tun er +pneu monia +bo gus +apolo ge +cl ough +bor ne +)) )) +revi ved +o varian +ner f +c legg +fan fest +cho u +reali zes +mc n +li gu +leg alize +just saying +for ster +bo sni +k hi +in dom +hei del +en cryp +si ss +ed di +mar bles +brisban e +y ing +pre paid +wal sall +cooper ate +orche str +mar isa +ho wie +che wy +bren ner +andro meda +e gan +sto cki +cav endish +ag an +ban o +de ir +go g +bl k +re thinking +ch ig +rhe u +sni p +p eng +semin ole +m swx +an nex +lyn da +lewisham ilton +cu mul +tb l +dolph in +agu ero +........ .... +pre lude +at our +gr anger +too ting +ro tun +dis ar +home items +da res +**** **** +ðŁij Ĩ +compre h +jin x +as well +iri e +circul ating +ðŁIJ ¥ +over board +cultiv ate +rhe tt +oriente ering +ca k +bal kans +s itt +jas min +britney spears +ro tor +se aling +g bc +oc ci +f as +eman cip +com er +war time +tic kle +son ny +pac es +log g +at rix +sr p +g win +do bbs +uz be +the wanted +dru sh +ex tru +m icky +honore es +dar win +re dux +mm j +ram i +jalape ño +io c +do ver +ju ju +whit ney +s eng +en ly +au ch +archipel ago +vigil ant +man gal +wil dest +parano id +hal i +bb ly +sanc tioned +real ms +con co +u ddin +c sk +play time +libr a +sav ag +oc tane +rec tan +re turn +par rish +mor rha +cc p +c mu +sa iled +se vent +ro sie +pil ing +he w +boar ded +seg ments +neph ro +( . +cr ats +bak es +ðŁį ¸ +back tothe +sibl ing +kirk land +ke o +gu wa +bre ads +ðŁĺľ ðŁĺľ +t q +haras sed +ga u +wil bur +j isoo +ep er +li sam +tri ppin +sh ino +ru kh +beast mode +cho a +inst aweather +rich land +gar i +fe z +cowboy snation +fur suit +k run +a en +sycam ore +se gun +ent ennial +di h +o ax +demsin philly +ðŁĻ Ģ +sn hl +pen nies +pass words +ma kin +ty e +d eng +kni gh +jeep life +hel pline +a for +zz zz +ste amy +pic ker +iter ate +happen ingnow +ki b +bloom berg +martyr dom +bul ly +assor tment +a hora +zo e +no i +illu stri +agar wal +p sc +electr onica +recruit er +gar diner +rad ha +naf ta +dot net +pi ero +geor g +bel s +ðŁĺĤ ðŁĺį +tuberculo sis +run nin +mor is +haul ing +ev oc +bre thren +sha ir +frame works +a stu +ri gid +ku ma +kre me +jin nah +insu rers +ny u +f ere +nol lywood +good vibes +- ... +toi le +sk ril +instaweather pro +cze ch +pa vel +one piece +nike plus +fi let +cav ity +ðŁı½ âĢįâĻĤï¸ı +ðŁİ £ +dra stic +dail ys +siam ese +re bu +oste o +lar k +f re +sh elling +p é +glad ys +ðŁıĢ ðŁıĢ +gusta ve +submer ged +grand stand +att u +won t +f pv +b ley +jon i +ang ames +weigh ted +al ou +ठ¶ +les bians +f j +anni es +am l +dor ia +dav in +be ta +can c +madewith unity +ha j +bad lands +mu l +blu ec +pa wn +cov ington +neuro logy +htt weets +dysle xia +thel ove +ne at +fork lift +autom ate +une ven +monte ss +he in +ha g +rel ics +competiti veness +can elo +mar tens +bullet proof +sk ittles +g ya +pri mo +americ afirst +woo o +abor tions +?? !! +ma che +ld ers +rl ly +preli ms +direc t +cour se +swa in +super cell +ec centric +sting ray +ple ts +wil cox +west in +okan agan +kir an +car bo +bomb ings +ra rest +bo h +gaw d +di gg +mo ana +enti rety +en closed +dodge ball +par ton +milky way +at r +thorough bred +re ally +qant as +epiph any +ine e +aero smith +spi eth +ar thro +ell ini +du bu +bra ving +âļ½ âļ½ +re structuring +illumin ate +equ ili +mp i +ash ton +pony tail +ma scots +flat tering +cru m +ast a +à® ° +stranger things +bar nab +ر ÙĬ +make shift +got cha +will am +cho irs +kilom etres +gho sh +eu than +dol ly +un ning +the ar +cre we +w sw +j ace +dis miss +ke an +ho ta +kh at +~ > +thir u +ren dez +hart man +tee ssi +cas ca +z ah +hydr ange +fo d +aw p +mzan si +thick er +nago ya +ne va +sti que +cast el +dam ian +there by +ji ang +ale k +music islife +ra q +calla han +gou ache +somal iland +sean hannity +ra heem +lo se +elo ve +whar ton +rectan gular +illustr ating +har ne +auti sma +scra pped +ell and +decre e +nag pur +ki pp +so re +n md +ma as +gun a +gart ner +bel li +then ight +je on +gendere quality +gi ver +a el +gar ments +ne u +mardi gras +mar sden +ro wer +pollu ted +camer aman +vin od +be asley +cro c +ji u +hollyo aks +anesthe sia +al les +ste ward +lati mes +ðŁĩºðŁĩ¸ðŁĩºðŁĩ¸ ðŁĩºðŁĩ¸ +tic ian +gor ia +come dic +ðŁ¤Ķ ðŁ¤ĶðŁ¤Ķ +nai ve +sli ons +ł Ī +bur glar +ðŁĺŃðŁĺŃ ðŁĺŃðŁĺŃðŁĺŃ +york shi +se ñ +fan boy +lau rel +inci dence +potom ac +rober ta +presi den +pr yor +os bourne +w ku +te me +pal ae +ðŁ¥ º +re boun +itu de +red dish +k hand +coloni alism +north carolina +ðĿ Ĵ +manne quin +lady bird +ta sty +knowledge able +g shore +ðŁĮ Į +à® © +qu aker +salz burg +med alists +chy na +bridesma id +ma ori +ro p +outra ged +in adequate +truck ers +al ana +ìĿ ¼ +ri x +oooo oooo +command ments +lam beth +aa j +eco friendly +bla z +morecam be +boun cy +rou x +rai ded +mi zed +sh c +gaw x +labor atories +ru bs +rest room +consult ations +ca jun +virgin i +so ir +rev ue +ple in +wag er +ç ¹ +we do +growing up +! ðŁĺĬ +face ted +sin ners +ho vering +ti ene +seas oning +an ja +leg go +il is +fla x +dev o +ash ram +mati sse +ker i +go wer +bo tox +mar shes +unh cr +ts m +opti mus +dun i +stu ffs +so k +order ly +n bad +islam ophobia +raviol i +fab er +cre ds +won ka +in fusion +over weight +daily news +assi mil +acol lege +medalli on +kili manjaro +sti ff +tham es +sun ken +th ard +my dubai +hilari ously +han nel +plu mber +fair view +separ ating +rasc al +qui en +necess ities +confeder ation +ll ll +: ] +weak nesses +bron co +ra ffles +el ot +ãĤ¸ ãĥ +advent calendar +ðŁİ ¹ +stra vel +tun ic +k su +im peach +e spionage +! - +di ment +cur rant +bio de +commu ting +by ron +ðŁĴĵ ðŁĴĵ +shad ed +tr uro +cray ons +ar ne +h sc +fre aked +dram ati +fle ek +u cd +marl borough +^ - +cross ings +mal o +black ops +bin ance +cho ked +chen ey +pl o +ge stures +val edic +ryan air +rem ington +v cs +mc kee +ec z +be gs +nail art +mayor of +happy fathersday +war t +pet itions +n ingly +clean energy +bro x +sl alom +exist ent +ab ay +ug liest +tom p +stom a +sel by +goal scorer +ben ji +overwhel mingly +lan s +semiconduc tor +south korea +re scheduled +sk yl +en listed +dow ski +si del +rosen berg +nas ser +white head +pri us +har are +en n +ry der +í Ĥ +mon g +clas ico +transpor ter +po tty +is me +** *** +vic e +sk it +ode ssa +l mp +her n +raci ally +pin oy +paragu ay +obitu ary +go es +bu cha +side walks +angu lar +un constitutional +transiti oning +i bu +gu ys +un packing +oooo oo +black girl +ber gs + ¯ +wordof theday +trump train +thunder bolt +m si +fasci sts +ठ¬ +t sk +collap ses +raje sh +loveis love +migr ating +set back +ðŁĺĬ âĿ¤ï¸ı +t els +safety first +nar rated +jae joong +un answered +lique ur +en nes +dal go +bill ings +salt water +mer maids +lon gs +clap ham +we arec +pic collage +n ach +h ace +pois oned +lo th +ag na +adel rey +guar dia +poli shing +peace keeping +d all +p isa +la pland +process ors +de andre +so bs +p once +dra ins +c be +ðŁİ¥ : +spla sh +meat ball +fon tana +worcester shirehour +ne v +bri sk +b int +ac r +po x +cay enne +skril lex +j fc +hahahaha hahaha +gla s +en gul +tempor al +oni zed +con cre +com pose +vibr ations +plant ers +fer t +criticalrole fanart +t bli +sch allenge +huck abee +munici pal +iam bic +radi os +ne vis +dura bility +mc cla +horse back +inst itutes +ful fill +atta ch +ate ur +ak an +resi sting +illumin ation +hand le +hair care +om ent +macle od +ka iser +g no +bear down +ly f +gl omer +distor tion +z m +san k +roo sters +is now +as ports +ag en +wo ken +st george +ro mper +my le +econom ists +ru to +t will +health and +d ito +ws l +tair p +pra kash +mic heal +h ts +w rights +kat su +fioren tina +defen seman +d itch +var sity +texan scheer +ba ham +sc anned +we il +seduc tive +ðŁijį ðŁı½ +fu e +er win +dav ison +ter ran +moo ds +wool f +re source +@ . +cu sh +ðŁį ° +regre ssion +cur led +la zer +jo anne +ab bott +mo z +down ers +mm mmmm +valent ina +k hair +dream t +cro ok +che k +ste aming +nephe ws +cl eric +as ober +indefin itely +w ye +us news +joy ce +flu shing +wynonna earp +ron do +kis s +hot dog +bar ns +sax ophon +far ley +gas p +decre asing +al way +pe x +l sd +shi ft +p outine +ra zz +rescu ing +ni ko +ho ch +cc l +u aap +n ts +m car +il wx +conqu ering +ket tering +stur dy +delay ing +sto k +vani shed +cath ar +bin gham +in v +ic hiro +he mo +budge ting +[... ] +be ss +sebasti an +slow ed +ðĿ ij +musli m +stun s +acton climate +ve a +se ton +rose tta +oun t +hard in +flu id +ca w +ðŁ¥ Ĥ +yach t +un l +sp hy +provoc ative +or ic +is back +__ _ +nicol as +gy an +loo se +fl in +reb ate +: :: +! "@ +com icon +she ff +down stream +chic hester +beach life +mom life +diabe te +ar ra +van e +ok u +ye o +man go +try out +app ell +he irs +arjun a +dd u +na veen +movi c +soci alists +s back +criteri on +soyu z +k her +da z +yol anda +wine oclock +re ina +one w +leon ard +en dez +u bs +support local +facilit ated +carameli zed +b pa +vuel ta +my tho +m ami +spe are +nbap layoffs +fe vre +nick jonas +im print +c so +craig slist +la salle +gi deon +ha doop +dis regard +w ud +tu c +ma gee +acou stics +ta a +qui e +pol a +cr t +dw yer +dis sec +capit ol +men tion +kn oll +he igh +fin ders +plac ements +l se +indi ra +gur i +madhuri dixit +kingdom s +iambic pent +geor gina +je ky +conflic ting +bay an +aga tha +uph old +dr on +vic ar +ex pat +periph eral +pe ssi +fa f +ance stor +? .. +wid get +pun c +comm enced +beav s +air waves +ad dis +po a +de sses +co den +vu e +ru pee +kar in +spo ck +m sy +ภ° +pr ick +fill more +ti fication +thing sto +sar de +em ile +pere ira +n ad +bright ening +arre sting +wo king +usc g +sp ill +raspberry pi +hu go +ite c +is ma +cuff links +optimi zed +oc c +mi wx +en ka +el ited +afford able +sa kh +coron ado +ho h +at ul +ai oli +jim cantore +accoun ted +vin ay +her mit +groo ves +ran ch +r illa +we tter +ou tof +veter in +ni kov +ki an +fair banks +ram apho +n iti +k ko +ru sty +ne stle +tv xq +shahe er +âĿ¤âĿ¤ âĿ¤âĿ¤ +penn ant +gem stones +dem debate +ðŁIJ Ĭ +auton ews +support indiefilm +mach o +ve x +new sat +ne ti +conce ssions +can died +yof the +mac au +den ds +cricke ters +san iti +mari ano +gh at +ar toftheday +¡ ľ +e gos +gen oa +chat bots +bri er +al labout +mon ty +spi ed +r tr +comfor t +sni ppets +real time +gra in +exam ined +en lightening +tt u +god bless +release the +sing ular +ki ans +ha ka +sor ren +defe ct +mar g +equ ities +d orian +su ka +per l +aishwar ya +pul lover +preci sion +fair way +ne ve +rive ting +vill anova +en com +ak o +passion ately +europale ague +siem pre +x vi +enligh tened +c fr +âĺħâĺħ âĺħâĺħ +wast eland +is f +new comers +emergen cy +amphi theatre +- . +text books +figur ative +tre mb +pe sc +ab hin +ab bot +ac acia +har ds +por sche +kau ai +el isa +car rick +abo u +elli er +be ch +neu tron +galap agos +ru ben +in nis +how to +nun s +sab ine +i ac +clin ched +no tori +fi ves +cairn gor +per i +gr c +ðŁĴ¯ ðŁĴ¯ +mal m +twelf th +di ff +rout ines +marty n +lin den +synthesi zer +nu mber +game cube +fal kirk +byz antine +queu ing +gr ill +scal able +char red +rou ting +her bali +gri zz +ðŁĺŃðŁĺŃ ðŁĺŃ +tol l +termin als +l pc +ab d +war mups +remo vable +¯ \ +vi go +pap aya +ne ve +lov ingly +jo kers +ib les +sse tt +poten ti +pel e +gi gi +sadi q +leg acy +son o +ru pees +retar ded +ele e +par r +fi ance +ey re +say ers +pend ants +mak nae +al bans +adap ting +p ff +pu berty +ji u +ing rad +hypocr ite +diplom ats +phys ical +rob by +bon sai +ãģ · +f att +catal unya +âľ ĸï¸ı +ro ma +more land +so e +conver sions +stl blues +shol m +gra ssy +pra do +on u +assaul ting +> _ +sett es +dis graceful +aph ra +âļ½ï¸ı âļ½ï¸ı +ठª +kil n +goal tender +s ru +philanthro pist +b als +th n +stu den +sando val +dogre scue +eli ons +asse ssed +lar go +hec tares +sh rm +sa if +cle avage +no ches +n ene +fat alities +cur ing +clean ser +al es +p vp +south bank +pizz eria +marsh als +kni fe +an dover +tbli ghtning +sr sly +ou te +digi mon +timesof india +prome the +le bo +f su +wit z +rever e +man as +mam ba +ch ica +gu an +exhibit or +csr racing +d ere +xx xxx +gu sta +story time +ston ey +organ ics +and u +se am +min ogue +anushka sharma +ab a +ðŁİĻ ï¸ı +ugand an +chro matic +as sn +document aries +sh t +ru paul +loy d +k ats +e us +ite ch +me dusa +pan ty +kel logg +et to +talla de +sha a +do st +p ms +mari ana +je ster +croo ks +ðŁĶ ¬ +min danao +ind hoven +ðŁ¤ ª +le xi +tv n +jan is +co te +ãģ Ĩ +ser rano +iw m +ðŁIJ ¬ +k ke +distribu tors +cap u +counterfe it +camp site +ag gie +ðŁĺ ¼ +chhat tisgarh +~ @ +state u +san di +prevent able +cl s +can ne +mm c +i ver +sa haran +pal is +night out +do s +ap ia +absc bn +manag erial +aro se +mo wx +aro sa +ðŁĮ ³ +under dog +remo ver +astronom ers +lent ils +su scep +smoo ther +pend leton +fau cet +e mory +dal mati +af cb +tic us +exem pt +en rol +d heim +ðŁIJ º +restric tion +star fish +sto w +snor kel +thunder birds +she ad +homo sexual +dy n +as li +andre tti +dou che +dom o +tar mac +slu mber +pr onto +first dayof +mini ature +mari achi +argu s +recomm ending +mobi les +in ce +illustri ous +or c +adver ts +gr its +wea sel +pag oda +over pass +gre ys +maxi mus +arma gh +wood land +sun ni +ðŁĴ ī +ë Ŀ +ti one +soci o +ho s +ðŁ¤Ĺ ðŁ¤Ĺ +wind sor +subsequ ent +munch ies +id h +exclu ding +e mi +cu th +z ai +week days +law suits +barn ard +Ø ª +pe tting +net es +mul ligan +pharmac ists +ra quel +e ton +cran ston +gil ded +cle ary +ce ph +ra a +pam per +lombar di +as in +sher ry +pro d +for te +ari anism +buffalob ills +æľ ¬ +ðŁĶ¥ # +uu u +just ices +car ina +nat in +mas low +dro oling +cog nac +cam ber +el ong +r dr +in en +convic tions +am use +tro ck +harm less +visit ation +gen omic +bl and +beno it +chim p +tuscal oosa +gre asy +x po +gil t +se q +per mitted +christma seve +book s +mu e +old school +human right +be ati +ðŁĶ Ŀ +sh at +sculp ting +h wan +fern andes +sci utto +fu entes +endeav ors +maid stone +un paralleled +shou ted +queen of +mer c +band ic +ve da +sel angor +pi le +ja han +intimid ating +disapp ears +cl ich +za ha +w urst +hi v +fod ils +cor dless +aaaa aa +hy dra +bel inda +e els +bu f +su staining +rugby league +no c +brig itte +( ðŁĵ¸: +tromb one +soo the +smo g +ad p +stab le +ing ley +diagno se +ms g +we ss +tic keting +one e +nsw pol +e up +auto psy +adity anath +sun down +river front +si ya +p is +hier archy +dur ango +di jk +ren shaw +he aps +epide mi +david bowie +interne tof +dd i +nation ality +mb ar +air y +win der +w alia +elli ott +c x +bav arian +pl att +an tw +wi wx +sof ter +ne ha +h eller +th and +dani ela +bo ast +degra dation +ðŁĴ¦ ðŁĴ¦ +transform ing +man e +av ut +ðŁĺĪ ðŁĺĪ +vo ter +the e +t ate +pu ff +in door +sop roud +boy ce +boris johnson +wait in +immun ology +ðŁıĨðŁıĨ ðŁıĨ +âĿ Į +street food +liz asober +cavali er +c elia +need le +motor ing +g ato +, ) +ra de +harve st +t ms +jar pad +on ey +air men +v re +impair ment +abhi shek +snoo p +l ant +fam ously +bl ou +s ze +g ander +un touch +tu f +dee jay +col lateral +b ind +ðŁļ © +pin ning +ic n +' ; +the economist +ul tram +worldwater day +ti poff +the i +feed ers +campa ign +sc umb +day weekend +yo m +pe dic +h ough +ps v +pl in +on de +boston marathon +az zy +* _* +con ley +thi ago +hoo o +gal erie +luci d +je tt +gl itz +final fantasy +achiev ers +y ung +peregr ine +op hi +dam es +biom ar +âĺĢï¸ı âĺĢï¸ı +sk c +l ics +fl ank +ar rahman +ho of +uphol stery +t ats +wo z + ¿ +snor ing +ra er +l ju +ap d +pl ating +kan u +im ation +fragr ances +m ra +mor ay +mo tt +im muni +hearti es +bho pal +tim ers +g ata +color way +car nation +win get +si ghs +s ville +optimi st +chate au +olympi ans +ci o +singer songwriter +ny o +fi bers +bur ch +ag ro +mil ne +ig bo +cr amer +ation als +dan ube +pad ma +nor mani +en forced +bre ck +boeh ner +ar den +sur rendered +pros thetic +om a +ha iled +calcul ations +w fa +bi b +fcb live +fon da +west coast +que sts +friend ly +to wie +fit ch +bal ot +star dom +scrat ching +ho sa +thi ka +o ven +stro ke +out post +pharmaceu ticals +hi kari +mu y +af d +fallon tonight +squ at +or u +dra ined +chocol at +ë¯ ¼ +wor ths +ri b +mu j +that s +residen te +it el +boo st +mi gos +mul led +la a +etsy shop +don keys +me k +p tc +flin ders +e hs +ro hit +mu ir +g ad +compos itions +åĨ Ļ +combu stion +i kh +yemen i +wav ed +gar ci +ak os +oo ds +fu sion +se que +s lan +pl ur +kic chasu +shenan do +s ams +worl den +horo witz +with me +mic robes +k ki +ðŁĴĶ ðŁĴĶ +w su +patch work +fre er +y aki +the art +symboli sm +mil er +bt n +ma bu +side kick +motiv ates +sag itt +natur als +serv iced +ps ori +pa ola +qu ig +i badan +gi ggs +ë ³ +sciento logy +si oux +salam at +d res +cad bury +d hawan +ci ón +_ ' +swa pping +maris ka +james bond +explo sives +ay les +af er +s agu +cen sor +tom a +jeff erson +ring ed +par tist +ir responsible +aguil ar +vac ay +equ itable +altrin cham +ac ur +man ish +ger min +schoo led +pu tter +ed ad +nav al +toast y +sol areclipse +dish u +coy ne +ac co +mu ck +mar an +el os +len der +cro ix +worth less +ha ber +gun men +ðŁį ĵ +zen ith +t enders +hur st +hol tz +itali ans +car low +u cd +characteri stic +bun g +av l +u th +sa sia +rs l +red man +neighbor ing +green peace +sti ps +follow party +y gk +en os +omni bus +na issance +chri ssy +secu re +call back +ji hoon +memor y +block er +l anta +daf fodils +bil t +ffer ty +fau st +ie c +nipp les +so g +m nd +jagu ar +bol dly +ab poli +pro position +gun sense +evan sville +cu tters +we go +dou n +do x +stal lions +ka j +shi ppers +j awa +vol o +le ven +pap rika +kov ich +jor di +induc tees +app alling +dial ysis +allevi ate +âĢĶ âĢĶ +pie ter +mid wi +q tr +juli ette +inter mission +haw ks +act ment +one ill +k lin +vam ps +fam ous +cou ld +autom obi +da an +west end +elli p +nh c +mel anch +web series +ton gue +snat ched +smy th +tan gible +sl i +e asing +bar stool +over lay +afford ability +ting ed +ter as +ay ush +wanna one +rh ine +dan a +sh ana +kend al +fer tile +w ir +repl eni +lar vae +is ro +con vos +ab brevi +u cc +hun gry +bur rows +ag er +nav i +mat in +du per +cer n +ma don +ķ ï¸ı +é ģ +tu ps +hy att +sh ep +friday night +wis er +hei di +hat ton +p gh +foun tain +wrist bands +ahmadi yya +aeri al +subscri bed +so los +m ace +sla yed +for fe +dul ce +christ mass +arun jaitley +viol ate +ob stru +ni eces +w vu +idy l +fa ze +pre serves +infr inge +premi ers +inter vals +agen cy +( © +stand alone +di mes +bo er +param eters +ge tit +ðŁĺĺðŁĺĺ ðŁĺĺðŁĺĺ +tu lane +for given +scol l +mb ps +smash bros +rob bi +prima vera +ali st +ghost ly +ay at +ye ats +impre ssionist +ear phones +caul field +wai kiki +sal ute +sc ou +mu ay +louis vuitton +bak hta +ado g +inven tions +hur d +forec lo +stream line +thalai var +ch snews +will ard +t sn +euro parl +cru sher +my sore +gro wer +ra ping +pat ti +g den +sm w +muf ti +kid man +ab r +soun ders +skep tical +ðŁĶ İ +sun dar +i me +fer g +feather weight +ar lington +pas qu +ag azine +wearab le +nati c +mccl ure +inter mitt +hor de +six ties +car te +bha v +ze al +experi ential +ador ned +som mer +eno te +hypo thesis +stin ky +pro to +dead lines +vo gel +mus ings +monc ton +gu ter +f le +aci on +voice of +ta sha +inhabit ants +type face +s ba +bts x +ðŁĶ Ĵ +wor x +u hc +jo ko +cell ars +gor o +continu um +... & +weather cee +ha p +sr k +ris ers +lonely planet +un named +co eur +ðŁį Į +the world +ili ke +fa sten +ami go +ri ba +ramapho sa +staf fers +had ley +? ?" +fi ore +sal ut +hu ff +bez os +Ñ ĭ +ra der +kam ala +in line +fill ers +um atic +all in +shat ter +re in +o ku +ch ases +fla gged +baby metal +water stones +ts b +cut out +op hel +aam a +rockab illy +sto lic +jet blue +ich ick +down ton +uzbe kistan +pat na +la q +gr ange +) _/ +subsi di +sc p +newsc ast +it sa +twee tyour +e mor +archae ologists +uni fication +por ta +q x +protec tors +pro hib +charis ma +car tag +ren fre +scul pt +guwa hati +de ma +boo p +unf pa +dex ter +lay la +alleg es +sou ps +never again +l ys +cal c +bar oness +visu alize +ger ber +absor bed +i ers +a han +fon tein +detec tors +verst appen +sv c +formul ated +ac dc +li x +in competent +bh k +lour des +water house +snow ed +appreci ative +sig ma +lizasober ano +pen ned +pay check +tall inn +fanc afe +par isi +av alley +vi g +ru fc +hard ship +so cute +po ise +ì ¹ +roth schild +k ly +???? ???? +l hp +il ay +f hs +am ad +ide als +brad bury +bal boa +nic ot +kid nap +wol ve +tas manian +op t +matthi as +ãĥ³ ãĤ +super markets +mylittle pony +me lee +li ster +gr oun +fe dora +kind ness +en en +bra hms +¯\ _( +ros well +mar lene +ic u +re formation +or ail +he brides +dispar ities +terrac otta +swal lows +re id +influ encing +flu or +den e +tum our +blon des +thunder bird +sh eva +moga dishu +ka b +cre eps +i ving +ene ed +anno y +âĶ Ģ +intri gue +enqu iry +ar aj +tur al +kuber netes +end lessly +divi dends +tor a +ti sh +commemor ates +un ra +tri b +pon ty +ne m +diss ent +brew ingco +ðŁĺ ½ +nor mali +bi of +( ... +chil len +ì£ ¼ +mell on +av is +mccor mack +ing ra +enrich ed +custome rexperience +testo sterone +snu g +sett i +ger onimo +inqui rer +bre aches +very thing +bloom ing +mu ra +dispo s +bi de +de va +shade sof +in trin +sh ev +s ven +nayanth ara +gan esha +c ws +ber ta +label led +use um +nick named +ma han +car uso +ap ur +ðŁij Ĩ +w q +orphan age +discar ded +mag nu +lu e +je on +bridge port +pac ing +mercur y +( ðŁĵ¸ +marx ist +amphi bious +transplant ation +stit ching +then burg +gradu al +ãĤ Į +ro ft +ma ils +ine c +guy ana +dopp elg +ver o +re write +head less +harb augh +gate way +car sforsale +sw i +st is +mach t +un de +sura baya +stap leton +nur turing +mil ner +ya o +lma oooo +ko sh +arsen al +k ame +er ry +ar royo +dis misses +ru bbed +rc b +lew d +dil u +and or +vi de +ur in +inter sec +ha ar +al b +year swith +app leton +é al +ul livan +suc cu +monter rey +d mx +artem is +ron nie +farm land +s football +gro tto +anth i +ãĢ ģ +à® Ł +vid ya +jimmy fallon +ൠį +t zer +gravit ational +w thr +u hhh +e hr +tin ker +ti juana +scran ton +ram charan +bar clay +re van +m si +ka p +wr s +we thenorth +tor al +sat u +gro m +fac ep +erick son +z yn +se dge +oo dle +spur sofficial +ds p +sic ilian +soli hull +recei vers +ladak h +hend rick +ther i +presi ding +mc guinness +litt ers +gun nar +gh oul +wi b +n tv +kar o +fro ck +b lau +ampli fy +all is +ul lah +memo irs +kh loe +intercep tions +pet day +lo oney +con fin +ch ay +piyush goyal +frequ encies +ut z +event ual +warm ly +obli vion +an ka +ta it +âĿ¤ï¸ı . +director ial +ru lers +prince s +mu ck +stur ridge +deu ce +abri dged +bagu ette +un cles +pen du +min ding +forre ster +av ila +wall er +wall street +ment or +hin o +high way +crom well +fanart friday +mb i +co yle +a hi +tro ve +spie gel +pay tm +mcin tosh +jan sen +nit i +nash ville +len o +leicester shire +le gos +dic t +ðŁĵ ½ +sp ad +beverly hills +sy rah +separ ates +z ain +un fit +dra gs +tan ia +over flowing +hri thik +haw thorn +z ani +mac far +fi de +to tem +pe ds +fundament ally +cal ico +sin ner +j ä +hil de +ds d +ten ay +ta hit +mil f +lie b +inform ing +up lift +ra el +mortg ages +lec t +ii ii +guillau me +compos ites +old smobile +l end +gar th +com mish +bapti zed +scorpi ons +ru cker +bringback our +alli ance +thalap athy +tal i +sp ans +eri dge +wither spoon +lin da +sky lar +kor n +hom s +Ä į +sil enced +caf fe +ar ty +dist inguish +to wed +pun g +jessic a +ear nest +beau fort +t ama +study abroad +si khs +new bie +nav ratri +mar ble +loun ging +lit ter +dal it +so sa +iz es +gra de +com promising +tr iton +de tta +v j +chau ffe +spec tral +powe red +montess ori +artic ulate +hal ton +al co +ye y +mn twins +acoun ty +ðŁijı ðŁı¾ +âī Ī +mad men +kal a +gru m +chi k +ati s +su me +akh tar +job search +high lighter +bo ath +âĦ ¹ +tar zan +lam bo +âĽĦ ï¸ı +ox fam +dump ster +pretz els +mac os +incl ined +fac tual +adverti sers +shu i +pu ree +ml pfi +anti dote +cap o +pa str +merc ado +but ton +ar min +ag g +lol la +horri bly +er rands +christop he +time snow +monday motiv +li ss +scand als +mc i +dispropor tion +âĺ İ +sur pass +samar itan +so tho +pu rest +fl att +trivi atuesday +delec table +leop old +hermi one +chou dhary +en rich +¡ ¡ +subsi diary +ine qualities +bachel or +auto immune +la kota +i hop +ad jec +the simpsons +sh es +se k +gret chen +up stream +hin akhan +coper nic +x tina +lu g +tough ness +e ad +cli pped +bi us +sl v +fah ren +dee pak +ca u +x an +im mature +dig ni +bo bs +shred ding +but tery +accommod ations +de ven +chun ks +super league +sky bet +kil dare +je et +ë į +ce k +wrec ks +pro pane +oh l +tb d +quo i +trum pp +mi mo +reluct ant +ver ne +o ic +ma gh +ar nau +se ver +li dge +stair way +kicchasu deep +ðŁĶ º +mach ining +aama admi +ot i +c da +al it +pan y +inst alls +ac ct +e shop +di em +hard well +fulfill ment +sc afe +qu ack +extrac ts +swee tened +fi ghton +f di +d inger +wal tham +us ur +refe rees +seok jin +gran n +af rin +th n +sch af +par cels +bet is +amar ine +nom an +kh tar +mor itz +cou pling +bar ons +ðŁIJ ¸ +à ¸ +sl p +sad ler +x ander +tri ad +mc millan +kh z +divi ding +ìĹijìĨ Į +dar yl +zed d +le ys +pla ques +flu ori +tipper ary +on nell +di dier +lang ford +im c +the sun +bir dies +ar cha +ye ssss +t di +dar ia +cand ace +al tam +pal aces +ch it +sant am +event ful +book of +ad b +mon stax +cre ole +co el +âĸ ½ +we aren +sten nis +she ath +ati sm +gron ingen +mlpfi m +le pre +wrong ly +rsp ca +rendez vous +acknowle dging +pel vic +solic itor +sla ys +nue stra +lo d +is lander +fer oci +fashion show +ra ss +dge on +adole scents +sma shes +negli gence +grate ful +ved ere +sw oop +ing l +apol ice +vand alism +gan n +jo ao +di supdates +zimbab we +under age +radi ance +w of +bour geo +pla s +cr ani +gh ue +wrec kem +warran ts +re form +jim mie +at wood +ys l +neil himself +l bj +i man +tan to +nois se +ver bs +equip o +al together +mam ent +l ice +dou glass +tier ney +pri med +j hal +furn itu +braz ili +v ill +past els +n ison +u ff +paral ysis +jay e +im po +ðŁij ģ +strate gically +pakistan is +was sup +super bike +thank u +tru elove +sha ikh +israel is +vi p +to g +li en +la ker +grey hounds +cul ars +bian chi +balot elli +ar ran +loo s +str ates +he bron +ar vo +sunder land +the al +tomb stone +sand man +c pac +thanks giving +love him +lat ino +an in +aka if +ĭ ãĤ +tor quay +di est +alli anz +ðŁĺ ķ +golf club +cl lr +wal cott +sch nau +promp ted +nomin ating +len nox +val et +mon ro +may ward +e ph +ðŁĶ Ķ +inter oper +r da +re flex +arm chair +ê° ķ +stri pper +por ti +ph arm +ham za +ni reland +ne ue +h pv +port foli +sun burn +fris bee +be al +bapti ste +x h +ty m +pr ati +o vers +haz rat +deser t +der ry +us ky +em mett +ach arya +)_/ ¯ +shu d +may a +ham ill +ra im +nr c +fitt ings +cur vy +ðŁı ĩ +ster ling +ॠĢ +wal kin +short cuts +mil ly +ast ur +alpha be +pl i +pe z +miss you +rad ford +ml g +ta eyang +notjust lakes +du mps +seren dip +le ur +ra ving +e ster +de priv +absc bn +ðŁijĩ ðŁı» +scar city +o cr +mean ings +cap t +da hl +fer mentation +bri oche +to win +out lander +massi mo +en cro +ðŁ¥ ³ +buil t +po tam +kir i +tm w +monit ored +k ites +peoples vote +gray son +íģ ¬ +afri ka +a dies +i vote +gy ne +g annon +di x +c mc +ou ral +fox andfriends +bel i +ig ne +gl an +katrin akaif +co politics +qual itative +p si +lu cci +disc oura +âĺ ® +kel li +gau tam +carac as +reale st +pu la +in us +hill top +make aw +atten borough +tw y +r arity +peck ham +ma hon +corn elius +clin icians +ton line +tb i +paradi se +ka si +inev it +fresh ness +colling wood +lun atic +defen se +cop d +in fra +wain wright +sains bury +alab am +te ma +lac o +chec ker +releg ated +tren t +stal ks +huff post +bhubanes war +ast ral +share your +prim rose +hi me +cat an +end ment +en dow +cle mens +mal oney +hil ary +game time +den ise +collabor ators +b wo +radic als +gue tta +ici on +au a +snap matic +sat chel +excav ation +base man +s ão +gn ation +fel d +surve y +shah zad +ma st +anirud hofficial +tru cker +ot ago +geo graph +ethe l +âļ¡ï¸ı âļ¡ï¸ı +s ver +mu tt +internetof things +ancho red +wh ouse +bang la +bal main +ç¹ ĭãģ +break fa +á Ģ +twi ster +te tris +ca v +stag s +g z +au b +stor med +hel ens +yar mouth +st asy +gustav o +co sc +vin son +up p +sc ricket +assump tions +app e +nu h +u er +pre mise +n aga +e amon +coron ary +na f +north side +el mer +ro tar +out lining +el f +re surg +kat elyn +in can +hyster ia +ce e +am bani +pro lly +Į ãĤĬãģ +ax es +san jose +rem brandt +mag pie +even ly +scor sese +qu aint +f g +b buk +indian football +weare all +spd wy +pis ces +ec g +âĺħâĺħâĺħâĺħ âĺħ +pre orders +: | +ni pple +sal azar +ju me +jail break +min n +bas sett +ze tta +jef free +ad jun +tic on +san diego +drink local +chol era +solic itors +o bo +com post +ni an +wr a +tre ach +ic ic +profession al +del ve +leg ate +histor ia +cro issant +con noisse +nam o +palli ative +chem trails +i ority +global warming +comic art +behavi oural +re sted +li as +cli mates +Ł ãģĦ +rut land +nou rish +menopau se +hot ties +demen ti +ve spa +mel ville +anal ogue +tz man +str ung +im perfect +gl are +cir cling +ros berg +rec o +oc ity +lo ire +em be +do ssier +ne el +nan do +me a +gal vani +fin esse +ag p +berke ley +asi m +âĺº âĺº +quil ted +ish ere +un matched +po tion +for z +at re +selfi es +juli ana +ðŁļ ¶ +âĸ º +mel ton +âłĢâłĢâłĢâłĢ âłĢâłĢâłĢâłĢ +spin rilla +pur cell +ed p +at leti +tony awards +ra ja +pro gno +mol ten +stu ff +p ally +nobel prize +âĻ» ï¸ı +spiritu al +spe ake +sa sha +bri um +tru ss +critici ze +assassinscre ed +yor uba +u lo +fire man +workin progress +ef cc +fla res +ro bot +hi kers +cl l +shado wing +pat sy +leh man +c ns +å ± +guad al +à± į +ra pe +r honda +paralle ls +son ja +langu age +land ings +z ola +cr amps +bur ning +apprais al +jol la +ham m +kas a +gul ly +f go +uly sses +ri be +ðŁĴ Ħ +ib u +eti enne +bri ar +fin ely +comb ating +y ql +go tham +we chat +to paz +primar ies +l se +iz z +hel e +dispon ible +cy stic +bel ichick +th rush +kansas city +ge om +soli di +red bubble +by stand +cambridge shire +par fait +ast le +ow o +ind ore +stom ping +sm elly +ðŁ¤ ĸ +locom o +adm itting +hol me +clock wise +min sk +mc co +for get +ev p +cam ra +ab ella +yo tes +universit yof +mé xico +silver ado +ric ket +crom bie +pu j +eradic ate +deli ght +y go +glam ping +vic a +du ggan +coun ters +cf d +sc our +react js +pu ram +paras ites +in ki +vill en +stel la +li mbo +ang as +k cr +ðŁĴļðŁĴļ ðŁĴļ +vap ori +mum ford +oli gar +à ¼ +al oo +boo ties +ad r +k elli +dru mmers +av ici +nature uk +ron al +in trac +un splash +le che +g oma +el ine +envir o +bi onic +bu eno +mi k +av in +star ling +em powers +cake day +boy cot +ðŁĴļ ðŁĴļ +ðŁĮ¸ ðŁĮ¸ +v ach +m ci +fractu res +ger i +sk ing +exclu ded +lu ce +ja ve +ig gy +evi den +aki stan +a wn +mor als +luci fer +ha ban +tumb ling +sunday motivation +mo sley +captain america +sch icago +the one +mo td +d ts +ðŁIJ ¼ +rep ell +ii i +locu st +geo spatial +mer sey +immer se +desc end +ber nade +j s +boat sales +win der +cran k +sing leton +candid acy +ben a +ðŁı» âĢį +high lander +ol t +k prs +healthy lifestyle +four teen +end the +ith aca +circul ated +r ans +pre valent +ha vas +splend or +roo ster +kalamaz oo +jewell ers +enne dy +rou sey +es y +cann ons +ornam ental +// // +ren don +win ne +mol ding +eid mubarak +coun tess +simon a +ha wa +fo es +du ster +sb u +por tray +mar ries +goo dday +cho co +achi ever +ðŁĺ¹ ðŁĺ¹ +pre neur +tr amp +tom i +n bat +garden chat +farra khan +ever glades +ab ru +sou sa +se ce +homes wee +terre strial +bar it +sri devi +ol u +mel inda +f rick +can dies +ðŁĺŃ ðŁĴķ +qu reshi +family fun +exor cist +cardin al +ny t +dies el +cu mulus +capric orn +si ology +lor na +dou gie +an die +super sport +c fl +п ÑĢи +say ang +pe ek +ภĬ +lo be +j em +ing lis +gg led +c sn +amne sty +chu ps +ba es +sau er +ðŁı IJ +mongo lian +en et +back street +dr illed +acce ssing +ce o +b se +ai ken +pur r +wor sen +whe res +war k +testi fying +bu ri +bla st +aw g +ðŁĵ ĭ +re defining +hear ing +u ci +c mp +bon i +tail oring +ta ji +noc chi +em t +stephen king +ne et +compla ins +campaig ner +luci ano +twili ght +ti esto +pas sports +flo yd +cathe dr +na ked +caregi ver +b coz +ade cides +ku ri +ly k +br aries +dren ched +disc lose +ðŁĴª ðŁı½ +le blanc +je tty +gar ty +chip mun +b su +rhyth mic +ic z +fri d +anne x +ame x +solo ist +lanc ers +arro whead +speci fication +simul ated +na is +inver te +bo wing +wor ship +f z +abo ss +sha q +ì¶ ķ +challeng ers +an arch +aamaadmi party +ãħĭãħĭ ãħĭ +suffol k +so corro +sn ell +cla dding +absor bing +shaw a +particip ates +ðŁį Ķ +book stores +bak u +seap ort +ko jima +gab y +pack ard +electr ician +let it +mo wing +fa wad +young jae +hot mail +men ing +u rie +intim acy +con ti +: ") +lifeis good +in ciner +i dri +craz iness +jour nos +fran chi +bott len +al da +ff es +k x +south we +air a +clay ton +sco ti +f j +bri ga +ðŁ¤ĺ ðŁı» +demonstr ators +y z +stor k +na q +casc ades +travel chat +plat a +pad ma +fran ci +at tain +bat girl +lom bard +hoo s +d dos +neon atal +discla imer +r ss +r ant +di sen +tex aste +so cal +frac tal +cam ry +stri fe +sn acking +mu h +sant ander +mor ons +gra f +par ades +hu ston +dru pal +mi ento +kir stel +hy de +vom it +forti fied +sphin x +da v +bir yani +win nings +s baseball +mer ged +lovel ondon +ling ering +dream big +car leton +liveli hood +djan go +astri d +gri ds +down e +bru ised +s ne +scarec row +hel ium +f nc +bi ggs +an ter +restor ative +em pires +ab del +life style +kiwan is +colloqui um +me en +pr ick +anti que +ze b +mi mic +edmon ds +ðŁijĬ ðŁijĬ +q ing +pp el +mc gill +interpre ting +âŀ ķ +rash ad +do ka +narr ator +electro magnetic +ash by +sau ra +iran deal +âģ īï¸ı +krish nan +in di +ff en +bre a +os man +multin ational +chi ppe +recruit ers +aus biz +p ounding +re gen +cur sor +refu sal +mac s +in ak +ax ial +wa ifu +up cycled +hindu stan +cas sini +carly le +scrat ches +re ef +man atee +eat ery +ðŁĵ ¢ +un condition +sen pai +on ther +comic book +pro sciutto +de mar +mi se +ma ge +fre ec +aye sha +al der +android games +ley ton +ho ck +door way +chicagof ire +aali yah +sw elling +bi x +. ðŁĺĤ +evan kirstel +torpe do +kon stant +genevie ve +ma ia +ha user +do torg +hide ous +fi k +sp raw +e ek +z appa +wan dered +' ' +ra jan +bam bi +( $) +wid ening +tool box +sa ir +illumin ating +pra ys +out patient +i w +day o +lo b +sw fl +sha des +gu ms +coo kin +ko di +gri ffin +traum ati +ste a +slaugh tered +god bless +air time +pseu do +b sa +hau led +ar if +à¸Ńภĩ +le l +wc po +mil iti +char ters +worl da +ru k +k gs +digital india +is able +idyl lic +esp ino +marie tta +e bo +team canada +ab our +wil ton +rock stars +fav ored +phys ic +wrink le +tb r +d print +ball arat +ad al +z ey +ðŁĺį ðŁĶ¥ +tom lin +mt r +pal sy +fener bah +tight en +phil ia +ir oning +ry u +b ant +enqu ire +ca ir +abur ger +tru n +green berg +chau han +ir ina +sh ani +trend setter +pre tt +zaf ar +alo ve +v ici +pan ic +no o +lu stre +disrup ted +bal lis +son sof +mon si +inst ac +ake st +ëĭ ¤ +kw ame +horror movies +distric t +sau cy +mb an +ar mies +with drawn +med ics +loft us +er oom +be kind +ar ns +all on +un ison +davi ds +cr at +nicot ine +so or +sm x +on co +cospla ying +zombi es +har ms +e ger +ro sy +moon shine +fe in +ce tt +du brov +reg ents +ben itez +ðŁijıðŁı¼ ðŁijıðŁı¼ +ste c +m alia +prioriti ze +ic eland +ft se +v amo +lam ont +homo sexuality +bre es +regu i +cb p +te j +sky sports +deter gent +sha sta +de rel +conserv ancy +colori zed +accol ades +vis o +show your +nan ow +bice ps +us ability +bi m +dailys ketch +pearl jam +stran gest +mega deth +broad casts +bar ren +ar ton +chri ss +confi gu +lu res +is the +e ul +railway ana +global health +gi anni +u aap +s lum +consci ously +ab re +n up +bud get +v ada +e sch +real ness +er ased +th unt +be z +armist ice +ðŁij ¹ +sh run +o led +driver less +ðŁ¤· ðŁı»âĢįâĻĢï¸ı +won dr +sk an +sal aam +mother land +h wang +gen o +gang nam +tw right +endor sing +en ic +ador ation +pau sed +patric ks +do cked +plat te +ff xv +ethnic ity +auto show +side show +after life +re located +orphan ed +food network +dare to +and ra +sla ps +v live +swim s +re imagined +mist le +re vise +real ity +bhar ti +ðŁĴĻ ðŁĴĽ +late st +prou dest +gra sses +lan yard +fresh est +carcin oma +anom aly +zieg ler +sum ner +ly rix +gor g +is d +av el +swild life +me squ +john cena +euro league +sab er +master ful +yar ra +cogn ition +jacob son +abo lic +sir loin +shuk la +moj ito +su pere +st weet +me z +e sa +rudol f +gur a +where you +tt m +win s +trust worthy +ny k +bra den +table top +good food +es on +be k +lingui stic +gra ys +ch ath +h cs +mon i +de ans +cu ssions +ch ell +slo ws +he mi +d app +shar pie +boo sters +a os +str ack +se dona +mu eller +hard wick +or nate +thor a +sal ud +o twol +ch um +mi ho +for age +thel ittle +tear ful +ones elf +min dy +sm g +gmb h +emer ald +ðŁĶ´ âļªï¸ı +tu tti +recep tions +re vising +i brox +tope ka +sal ami +expan se +i books +dob son +cli o +at s +ðŁļ Į +mo ha +is ance +shu tters +moo t +jan ine +marvel comics +jor dani +pos er +kenne th +hy ung +de ja +ase ball +speci ality +eu ston +classic car +had ith +ðŁIJ ī +chas ing +iz o +gros ven +ag lia +thisdayin history +t row +om ile +hu ar +by n +sal ine +div ine +demon ic +ty ran +han dover +revit alization +pa ella +cryp tic +se dg +m end +dun kirk +bre d +wal d +sport scar +a ard +whe aton +da ener +k lan +br t +bakhta war +spi res +schu bert +ro ti +poli sh +o se +ag ame +wonder con +prote stant +bo sa +ðŁĺ Ł +d ü +joy ride +ger trude +âĿ Ŀ +gil a +v h +tw a +tra v +swal lowed +star ve +la in +ent ren +rei ki +su kh +cra ic +az u +web page +kee fe +hypo the +hir sch +hel le +camp ground +w amy +tra vi +sha hi +san deep +ru i +han uman +dw p +reposit ory +no or +no ff +un real +p ell +black history +har vick +ma scar +pay ee +pa sha +gastron omy +d ÃŃ +ai g +rosen thal +open day +embelli shed +t tip +sun bathing +go pack +end ome +ï¸ı # +invali d +final four +st fu +squish y +ra sta +mo sch +jam esc +die trich +sel a +mel b +el vi +t dp +sun i +sli t +j ha +bi za +spi ked +l li +l illard +vam pi +syno psis +az har +kendrick lamar +ĮãĤĬãģ ŁãģĦ +heart less +country file +air play +arrog ance +pre e +virtu oso +ãħłãħł ãħłãħł +raj u +le bu +for ward +tu g +dro s +mondaymotiv aton +concep cion +thel o +pad i +looo ol +ÑĢ од +it ss +eth ical +end uro +__ : +expend iture +mon ste +mas king +terri ers +ib is +e mber +cu mple +punctu ation +pi per +ir vin +ade e +yy yyyy +flash backs +cel sius +don nie +bo gota +ben evol +the script +shil pa +pro se +fin dia +ze ke +ne ko +do ves +blues lyrix +fro sh +sowe to +mp lo +al ai +sab i +raq qa +wf tv +stro ller +ian somerhalder +ðŁĶ ª +an on +mo seley +! ?!? +sta king +mol y +car tri +c sg +ast or +transc end +ma er +de ux +cow girl +sas k +pun ter +ma ken +o ates +love tt +grow ler +sag in +v n +ssi ble +officeof rg +y mc +sab ar +faul ty +ap ha +ak on +ðŁij « +snow don +ae w +raise the +ðĿ ĵ +grue some +clement ine +sp ing +lat a +worlden viron +mi mic +can aria +bakhtawar bz +ao a +fal a +ãĤ Ń +avi va +you uuu +thi gh +la dders +gu mbo +tz ky +fu zz +plastic pollution +est ate +strength ened +k ant +dr in +cal vert +transform ational +frigh tened +mac lean +elited angerous +ear thy +t son +to da +j nu +.. , +mic hal +i ban +je ong +is real +sim coe +exclu sives +blue bells +ben e +te u +pil sner +pens ke +athe ists +m pu +cartag ena +ðŁĴĹ ðŁĴĹ +million aires +kk kk +it ar +subscri ptions +remo te +ma fi +hin ton +w cc +ho k +ds b +ab leton +sevent y +pun ks +e indhoven +sh one +mcfar lane +lim popo +empha si +à ¼ +sin fo +pe tre +man grove +ch ino +ber tie +play lists +push awards +p af +deb bie +c do +r ino +ðŁı¾ âĢįâĻĤï¸ı +fol ke +bon nar +th ine +sl an +hal ter +evi e +aw some +vul tures +spar ky +seiz ures +âľ Ķ +ram one +ine ffe +al n +pro ctor +ast ra +the voice +gro te +sci on +dead line +am aya +tain ted +patter ned +exce eding +cross fit +kay lee +drop box +ru shes +tack led +mo by +retro gamer +n cbd +benef itting +shay kh +guild hall +gen try +dream cast +dread ed +bun dled +th aw +revol ving +n pt +kylie jenner +imagin ative +ron i +over came +family time +ds burg +car naval +relation ship +recogni zable +cor oner +ho le +fan fic +emir ates +bur ritos +analy se +thin ner +ne es +galli poli +bl r +cat woman +-- >> +au lt +ada ily +nau ghty +ili o +solit aire +mtv br +jocel yn +arun ach +rep ent +south gate +hy acin +essenti al +fent on +and um +it or +go pal +sl inger +po sei +aw il +wi elding +ra ila +eli as +a sto +à ¤ +tend ency +str ata +ker t +< - +im acele +da es +sti mulus +han ley +fit nes +ec stasy +lim ous +ha iling +ðŁ¤ Ń +chis wick +tar ies +sla v +pul i +moderni zation +black mail +b ingham +h fx ++ + +ðŁĩ®ðŁĩ ³ +ni v +we a +profess or +k off +bol ster +su ave +sequ ences +pepper oni +not te +dre n +ãģ¨ ç¹ĭãģ +hs v +o ga +ap tly +z ad +excel si +rin ka +mol dova +min n +ma bel +conferen cing +bas ing +of er +ob si +hamill himself +care less +brief ed +inhe rent +par ish +dub nation +town sville +sar awak +gee ky +doncaster isgreat +was abi +gu p +phen o +dra inthe +carrie underwood +ble eds +bbc world +ane w +alta f +dul wich +ani ston +w ti +sumat ra +gra fton +bl n +me ster +bode ga +re go +es q +an jo +sump tuous +mai sie +ï¿ ½ +wil t +jak ob +el vis +se pul +mu ster +air pollution +president e +happy monday +exten sively +fl ondon +t ls +play ing +pe ed +din ho +var dy +pi ka +n iro +au cus +ðŁį ¦ +nu ll +el ondon +juvent us +imag ines +dis ab +lit o +d ura +work places +promo te +mc caf +wood work +waw x +à® ª +tt ino +shar i +sem per +better together +ðŁijĬ ðŁı» +ze bra +pon dering +en chil +ho m +cosm ic +tan z +mo cked +ec cc +ath ed +abo lish +prop eller +paris agreement +assemb lies +indu stry +fraudul ent +pe sa +chang min +ax x +ðŁĴ µ +irr ational +cu sa +ramad han +octa via +on elove +jac ki +bar ak +taxi der +seri ous +nathan fillion +mc en +ch k +po part +grav ity +copp ola +reading fc +illu sions +j ig +ww x +re sh +ex porting +buzz ard +âĻ ¤ +p cm +lan apar +ko s +arom as +antal ya +ww dc +ven a +phil a +ball in +ðŁij Ħ +quin ta +ma o +f ery +eigh ty +sentim ents +safe guarding +r wa +pu ffs +luc ille +de cath +sl u +nu gent +de ter +braz il +ze iss +super bowl +subsi dy +alter n +hi dalgo +enz ymes +ä ½ +tag ne +hair dresser +adri en +walk out +oppo ses +can tina +bed side +af an +ðŁĶ Ĺ +prophe tic +dan es +un successful +super charged +pk k +exem ption +hart le +secu lar +cli pping +br s +united way +c net +pat chy +ha gan +e en +âļ ľ +var a +sym pathi +never trump +affir mation +om f +ny cfc +ma ja +sur ro +keer th +up scale +sandal wood +mon archy +kno bs +å ĭ +po tholes +hunger games +ter races +na sir +coun sell +welcome to +wa q +se aman +m ita +stun ningly +on theroad +in ability +) !! +bon go +ant v +sp ut +worldenviron mentday +resu sc +y td +fi m +eun hyuk +sa chin +rose anne +cler mont +ape c +am ina +v ening +n antes +al most +sin us +ex as +ty l +ti en +ple ad +lanc s +bur naby +re k +jo om +observ ers +disco graphy +cl g +âĻ ¦ +sn ack +r ti +o ily +crystal li +bru te +web development +topp ings +la f +an is +ad der +reli ving +car lin +battle of +we g +syri an +pon t +n dc +lagh ate +yu ma +sp p +p iti +ro bbing +mart ing +rey kja +raj put +nc ds +kie wicz +âĢ¢ âĢ¢ +vam pire +substan tially +opio ids +nepal i +k line +ar oo +under stand +lit t +u it +thro mbo +sar ies +qu ot +b alling +t tr +s gh +philip p +br ant +ac l +m ello +whit taker +. ; +defi ant +b gc +repl ying +mir ren +metamor pho +sch wab +bul ge +utili zed +pick ering +par don +d sa +ภĪ +doo ley +cumul ative +Ð » +ur gency +e mir ++ /- +¦ Ī +ot as +âı ³ +station ed +grape vine +ar ac +karan johar +f ancy +sau l +coo gs +lgbt q +ا٠ħ +jav i +u mmer +pl l +den is +dai pur +pu ffin +lewi sham +fand om +co pe +ves matter +s ve +hel pless +deo dor +ostr ich +kaz an +friday the +con dor +v x +sophom ores +rob les +cu tt +cli mbers +ë¦ ¬ +sle g +sn f +mac ys +hydr ating +grou pe +po yn +mou lin +hg tv +lmfa ooo +sulph ur +asdfghj kl +annab elle +hump back +bra ved +viswas am +multi purpose +hu midi +escor ted +barb ican +f ad +cor sa +ðŁ¤ « +pi ppa +here to +can y +ser gi +or cas +o vie +ed ou +s any +glob alization +man cini +food truck +f is +defi brill +sch re +sma fia +love wins +la ut +k aka +hol lande +game on +resurg ence +out side +olympi ad +int an +abstr action +rapi d +pal om +cal le +jas min +attack ers +swag g +mit ra +ky lo +à® ² +her mitage +gor do +e ira +so sfam +roll out +exc ite +sy nod +mer rill +c als +as sa +liveli hoods +ju ve +the black +gopack go +ant lers +alban ian +wool ly +qu iche +puri fication +are th +smar thome +ne k +all blacks +mex icans +is m +ger ms +comple xion +mar ck +u shi +ðŁIJ IJ +char l +ca stic +till erson +giuli ani +biode gradable +mal bec +bo is +ju bil +im es +r ame +gene tic +esp nu +ch ley +so ho +go pher +g sc +buu ren +cu be +bridesma ids +webin ars +to e +mani pur +viol ently +notic ias +ex changing +chi ev +replac eable +muay thai +bu ss +sp il +instal ment +div ya +cait lin +o lim +fil tering +whirl wind +sta red +prior it +pr am +pompe ii +mono logue +k ite +bu ka +âĢ¦ .. +vac cine +bre ro +woz ni +sol ent +re ferr +my rt +gridi ron +galatasar ay +fro ze +clare mont +ðŁ¥ ĥ +victori as +ssel dorf +pa stures +net neutrality +ch or +ðŁij ģ +ಠ¿ +we ho +symp tom +jo sel +in ous +dragon con +power ball +p te +four thofjuly +ec la +ear buds +where abouts +salt life +depriv ation +ch ter +wi ggle +syste m +ps st +ch az +d any +ri mo +oax aca +lanapar rilla +barcel on +melanch oly +way back +ho tro +n si +l illy +kur o +ja han +intellec t +board game +ðŁı Ĭ +sneak peek +k prc +jail s +cand el +zan zi +mor timer +star ch +ra gs +p fa +long live +k art +gir ona +cro cker +christop h +precau tions +war ship +per m +paren t +van gogh +gif ford +allegh eny +ra yn +ut m +sten cil +rec alling +pen ney +z azzle +ìĥ Ŀ +hin ds +aren as +nu ev +law ler +gu in +do this +ðŁij ķ +ì¶ķ íķĺ +we g +ti b +ri din +complex es +turbul ent +pe sos +de marcus +vall arta +sam sun +kis ses +hein rich +deport es +wil ms +ur d +then ext +inki gayo +ho wi +fir sts +carri age +clean liness +mas war +is ch +ax el +si zzle +road house +fr ans +ent ourage +co bble +boo th +benedic t +tal on +fc u +year ofthe +ray on +raider nation +fo yle +ko val +pi anos +l pg +bur mese +man ure +geo caching +cosc ino +b np +fer ra +stro phy +mar ais +ce es +legen dof +kat niss +eno ch +av ed +you know +d prk +ðŁĺ¢ ðŁĺ¢ +sp un +pro st +sor rows +cent red +ke a +gal icia +? ðŁ¤Ķ +ÑĢод а +bou chard +ðŁĴĻ ðŁĴľ +yu i +seed lings +jon ah +reco vers +ny rd +board room +su ma +my japs +tun g +sha i +ir gc +eli o +wag ons +ka shi +polic emen +john nie +ale coscino +shop ify +dot ted +de tri +va w +to fficial +in your +chal mers +trac ed +no vi +by es +ari el +nipp on +la pel +gri ez +b gs +fool ing +d ita +vijay sethu +nm wx +as ot +kr anti +hel m +ve di +sic kest +mo chi +k abo +shru bs +he red +b sp +sq m +ham r +dul kar +anth a +nr f +avoid ance +at en +publi x +be arers +nas i +ha p +h ells +ðŁĸ ¥ +ภ· +thelast jedi +oh wx +ðŁį « +wa hoo +there se +rec aps +ss nhq +bird photography +v ay +pet ti +pau lo +bel vedere +( * +gr l +du vet +c pec +sa it +por sch +meas urable +avi ators +fre mantle +bre en +on om +me and +life saving +eu ref +en don +embar as +aira sia +el is +dun kin +star magic +s ill +porto bello +ki efer +ex e +mu ted +ãģ ¦ +we thepeople +logi a +liber al +theforce awakens +min ed +haun ts +freck les +care taker +s india +âķ IJ +dev lin +list on +direction er +oh n +fi garo +em manuel +du bois +cl ones +bru ise +ðŁİĪ ðŁİī +disin fe +der matology +as r +s watch +dis comfort +tam anna +pi day +mack en +k atic +delu sional +shaw nee +gu d +al bino +p ali +din gh +cucu mbers +coffe y +anticip ating +treas ured +web summit +shel tered +sav or +pedago gy +m gs +sh ma +s bu +den ali +cam pos +bubble gum +o ir +le aps +y ler +r one +sansk rit +min t +meat less +futuri st +du de +a vel +prote sted +squ ire +z aki +sz n +har court +cycl one +bour dain +gather ings +d ant +advent urer +parag on +alt man +dd ing +ban erjee +snorkel ing +mother well +mis sy +en der +glo ws +ki wis +chick pea +por o +e fron +app t +u y +speci fied +gab by +e strada +com bos +bour bon +vin i +var un +steph ani +key words +car vings +amit abh +wr ought +tw al +re els +clu bbing +ubi quit +cri t +ambed kar +æ Ļ +prun ing +vaccin ated +boe ing +s ks +lo ona +hypno sis +edel man +pho l +he w +colo sse +mckin sey +u on +to te +sacrific ing +ox i +n ang +e mu +пÑĢи ÑĢода +m th +kers wednesday +argu ed +timel apse +ris king +regul ating +ni gh +likeli hood +cu bic +au ction +rein for +pi stor +no ses +ye l +snu ggles +pe i +jean ette +ta ku +ri th +guy z +ภŀ +y te +ver ted +pay soff +jau regui +hoo ligans +procedu ral +mi b +har dy +el eng +chec kers +all ine +the met +prou dof +keerth yofficial +collabor ator +ni u +infl icted +adv ani +re twee +memor iam +f icial +ti ghter +sal em +re viewers +br ics +ben digo +am ell +tur kish +sush maswar +paul son +pal awan +mol lie +stitch er +s burgh +ir u +hay dn +en ers +aro a +u zzi +saraj evo +hel a +apol lo +nine ty +vac a +sp on +vent u +jel ena +hei fer +avo ids +sp ine +pri ze +mar ist +re creating +me de +woo den +find lay +ro fl +n di +compreh end +yu go +y ü +to work +u fos +son ar +pi ston +recor ding +tent ative +art forsale +pel lets +fre do +ÙĪ ر +mu ses +custom ization +pro found +is ner +ide ally +si am +plan kton +cm dr +man ger +fran ken +customiz able +ठ® +walk away +swi vel +vast ly +no ton +lex a +ex moor +z as +tan te +reduc tions +lol ly +hip sters +benef ited +ë ² +ww www +mascul ine +fi ji +dre y +ph ill +ane ous +nic ol +men dez +disapp ro +ch ner +through s +shen mue +east man +ðŁIJ İ +yu ck +under tale +re ys +go beavs +eng en +c na +mer r +bir k +ãģ¨ç¹ĭãģ ĮãĤĬãģŁãģĦ +âĥ£ @ +yn na +ste ed +offen der +at um +vani shing +presi denti +love them +g nocchi +fri ggin +per il +mad hya +ag ne +dee jay +mar nock +m tb +fold able +@ ___ +stand re +bron x +bow ski +fin ite +cro ckett +b sf +ge tit +seren awilliams +mir o +ignati us +sla y +rin se +fon due +sel dom +s more +gan i +dy ce +dmit ry +cru mb +late post +pri mark +oh ana +flor als +do a +remembrance day +d ds +azi one +toon ami +air port +æĿ ± +th ad +fi st +dine sh +dr who +ad words +admi rer +pro je +kyrgy z +à « +manife station +le wan +j ic +thi bau +le ased +van ity +nouri shed +never theless +aug mente +fu elled +che ad +wil shere +ru di +p z +my co +mor ro +herbali fe +hardro ck +de man +dre ality +sp ades +ce vic +bha i +bar on +ultimat efan +hou news +to bi +stru t +ke el +affili ation +the masters +sm al +hu e +este ban +con v +om nic +datab ases +co v +ter ti +st g +snoop dogg +metab ol +leth bridge +ðŁı» âĢįâĻĢï¸ı +year ling +residente vil +nws l +iy aki +griez mann +c ous +ðŁĵĿ : +tor ian +sam i +ðŁĶ¥ðŁĶ¥ ðŁĶ¥ðŁĶ¥ðŁĶ¥ +g are +alli ances +whit field +we ther +refin ing +coy i +kra ken +ðŁĺĺ âĿ¤ +singul arity +lil i +h ns +bol dand +waw rinka +misogy ny +lo vers +c q +b dg +ad ona +gar ter +women of +sc d +recogn ising +mun a +str ou +sign alling +lare do +hell boy +alek sand +un available +pedi atric +as in +mer ia +ri shi +futuri sm +w ye +polari zed +e we +pro pel +in forms +cre ase +~ " +arti ston +like for +heidel berg +er ra +life in +len ny +inter rupt +cohe rent +ca z +vick ers +le veled +f bs +cab ins +bu mmed +apost les +we h +ten don +souven irs +infu ri +pier ce +asse t +m las +go th +di ggin +ann as +yl or +th waite +sw el +pan era +mur derers +croo ked +bs go +ac u +a on +re an +one of +ko hl +bloo dh +pest icide +lost dog +fle xing +ëĤ ĺ +su pra +eter nally +ðŁļ Ļ +pa olo +ol an +mom o +is elle +captain marvel +s lou +mistak enly +akhi lesh +mer t +il inan +bu on +bal kan +mir ro +mill en +der ail +dam on +tit i +bi os +re don +pic ard +par te +ðŁ¤ Ł +Ø º +son ics +fir sth +dd c +veg ans +tur ban +ni gan +lot tie +lyn don +star buck +pink floyd +life styles +am ara +a she +r sc +val a +sm er +cw gc +cli ent +buen as +jag an +coo ps +ðŁijij ðŁijij +speci alizes +snag ged +g lar +ben net +wildlife wednesday +bow den +pi k +art in +empor ium +ar l +re ba +pas ser +disappo ints +additi ve +âľĬ ðŁı½ +bay er +missou la +ha skell +comm ences +ni x +ne man +explo ited +plastic surgery +cc d +aso cial +vo t +sie gel +fro ome +kap am +far a +e ha +pro bes +mw f +meet ing +p bb +ak ins +mistle toe +kingdom hearts +for kids +ec r +bal e +escor ts +adidas originals +k wa +k ts +hallo ffame +ðŁĺį . +wag s +pot ted +o wing +honey comb +he fty +uro logy +mer le +b pd +stri pping +re ich +k state +gu ay +yon ge +shak ti +g loom +bat t +son om +n ery +el ba +blan ks +hel le +triple ts +bom bay +ak arta +ab ia +transm itted +rol f +ja is +angular js +fi erc +m ss +trac e +ॠĩ +tom bs +old man +kom bucha +fo l +e health +cere als +are lli +in ari +ðŁĴ © +wo l +liber ties +fa wn +af firm +nun avut +hyster ical +k drama +art es +âĢ¢âĢ¢âĢ¢âĢ¢ âĢ¢âĢ¢âĢ¢âĢ¢ +valent in +man slaughter +gal es +eo in +energi zed +del s +with draws +st les +sar castic +ram esh +incredi bles +lock hart +ya wn +ultimatefan live +oooooooo oooooooo +mu en +guru dev +te er +pe eling +new snow +lingui stics +direc tv +ag end +uni lever +ru ger +han dedly +ero se +li mel +the c +royal ties +fini shers +nr g +m gt +fid get +com ps +bac on +aggre ssively +ab it +ch â +tar de +slu gger +q anda +gre ening +d ats +ensla ved +spec tor +o ye +fre ef +b hand +stop brexit +mis conceptions +cav a +ðŁĺįðŁĺįðŁĺįðŁĺį ðŁĺįðŁĺįðŁĺįðŁĺį +multit asking +hou sel +ferre ira +cen time +ank les +jo dh +hel ly +fro me +out tuesday +nar nia +bal aji +l bloggers +jyo ti +ðŁį ĩ +lan cia +cap ri +y ap +nat ash +down fall +." âĢĶ +à ® +ligam ent +coat ings +ai ded +hi ko +fall ing +encryp ted +yeg food +infringe ment +cu di +ce p +ðŁĺį ðŁĺĤ +tra d +super rugby +ed win +wh iche +vi meo +lay ne +in vigor +he he +dubrov nik +bie ber +u tr +sham an +op ers +ham ill +en ig +di f +ar um +scrap book +min h +diver gence +mckin non +life time +guter res +wil le +ple as +patt y +mic ron +k z +dom aine +ru sher +m ds +ches ney +screw driver +âģ© , +sle dge +hau er +chan a +stam ina +sprink ler +pl n +he ff +bol ton +om on +car rington +accor dion +jor ge +inter ception +in puts +gu ll +tran scription +vanu atu +it ical +eth os +tic h +spac ey +pee king +u mi +ha ger +psycho tic +illi an +illi a +bonnar oo +an ese +pu c +laghate parth +en hall +econom ical +dre dge +% - +u we +tu bular +scoun cil +pe asants +fl er +tumb ler +he p +ford ham +row ley +initi als +ev asion +er nation +plu gins +coch ran +c attle +acid ity +ðŁİĬ ðŁİī +re grann +jump man +ef ace +x ma +patri archy +esco bar +cristi an +tip ton +nu eva +hack ney +back seat +kill arney +aid an +sta dion +simul taneous +ida ho +a je +u th +figu re +clo s +bur k +volun tar +rec ite +macfar lane +cur few +bou do +w gn +sti x +sla p +scrat ched +philli p +jour ne +ex pelled +wa z +u ke +tati ana +ou e +ho pp +dimit ri +ðŁĵ £ +mato logist +electri fying +blu ffs +bill smafia +az cardinals +y aa +x mas +shar a +r ith +g ills +dre s +bar ton +authori zation +imperi alism +home of +to do +foot path +band width +visit spain +moh sin +erup ted +mi ki +insig nia +mike l +ss h +ger a +bank holiday +aw an +t weak +star craft +e al +construc tion +skelet ons +le ep +ine m +bar clay +ship wreck +monsi eur +yo h +ron t +form ative +ser o +le p +horse man +hoo sier +haz mat +cylin ders +cen ti +ðŁĴ¥ðŁĴ¥ ðŁĴ¥ +re em +na ire +mus ically +gras shopper +est onian +termin ology +ro main +blogger rt +tox in +stan ce +cultiv ated +an ast +ðŁIJ į +shi mano +go pher +ene i +recycla ble +gam ification +fight for +c q +avoc ados +ke ys +eli ke +gly cer +shak ur +mobili zation +gal ley +expla in +ex changed +pe th +obe dience +illa ge +en nis +ãĥ ŀ +wi v +walla bies +ma ar +ig ers +fin tech +fin alized +wo j +meaning less +in field +onna ise +e et +bron te +pass ages +ðŁij § +strick land +northern lights +lom ond +h tc +wr ay +shi fter +di alog +ðŁį į +>> >>>> +te atime +ste ch +sic huan +qu ill +fran ca +comple mentary +bar rington +marcu s +mal am +goo oo +for sa +elec tra +af s +âĹ Ĩ +tri fe +sn azzy +fo lia +and olan +after dark +wood son +stra de +litt lest +o gun +con wy +co wards +ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ ðŁĺĤðŁĺĤðŁĺĤ +íĬ ¸ +se ul +mur phy +dun ks +kapil shar +jo achim +wom ack +equal ity +aver ages +a ine +ðŁ¦ Ī +tac ular +dis ability +u ked +mid century +bar thol +teas ers +tab ern +nj caa +sp out +op i +ku bball +bl om +so ar +popu lism +meth yl +ðŁijĬ ðŁı¼ +o spre +alo ils +ðŁĵ ĸ +ðŁĮ ļ +x er +sp illing +publ ica +car dam +adi sh +sa cha +p kg +bu da +lyric ist +i bc +gru mp +ho ver +hal ep +anti body +anem one +âĻ¥âĻ¥ âĻ¥âĻ¥ +m cl +litho graph +cc u +s fest +path ic +calli ster +otta wa +gun sn +rut ger +hali but +en vision +differenti ate +ðŁļĢ ðŁļĢ +pir an +lat el +uc n +trou bad +ra ine +fierc ely +learn english +lea se +wex mondays +em it +dray ton +bur rell +scuba diving +hol ler +dr u +clo cked +w ral +ap ro +trans lucent +w bo +patri arch +mo ja +lan nister +fish ery +ne derland +mil dly +mi rai +ma ko +ja p +ðŁĺ©ðŁĺ© ðŁĺ© +pro statec +p anna +ar ama +under taking +tomp kins +ne op +soli ds +sav oury +e ames +cut lery +wood bridge +steam er +ri zzo +wild cat +rat na +lamin ated +kin eni +jal ap +ai des +acknowle dges +?! ?!?! +! ðŁİī +w afc +mag gio +ha ves +dar je +of i +gr il +v asi +bru x +mo hd +fake speare +arn old +r mb +for be +wal leye +ro di +therapeu tics +strate gi +ob ste +mu dder +download able +dd ings +d ca +asi angames +campe on +appropri ation +th century +ram atta +dra ped +bul lion +mu c +one x +se greg +ophel ia +bod ily +âĿ¤ ðŁĺį +wi zar +te ased +ade my +to id +sur a +lazar us +sn ickers +ma se +lo h +bow ed +bibli o +x change +har lan +gho shal +flavor ful +bha gat +alle z +whiche ver +ten stein +disc er +organ iser +mt g +dream liner +t se +hok kaido +mo k +indulg ent +hick man +blin ded +al yn +aaa ah +sp ool +lough borough +inter pret +et v +aristo tle +optimi zing +avici i +madu rai +ju li +naw az +mat chups +ab ide +paint ing +w elling +vel i +octag on +in scribed +po king +plac er +life cycle +kili g +g sp +eli ves +cle ments +na sheed +me sut +incarcer ated +dist illed +wal ang +delic acy +del gado +che z +ch ita +ad ero +tu x +pati l +o do +abh cosmetics +tv c +p bc +in accurate +hardwork paysoff +ball er +quot ation +merchandi sing +ga stri +defen ses +dro gba +bex hill +ban kno +win ona +si eg +p gs +hahah ha +agu chi +su bram +mirac le +de sch +li bre +ba cher +ent ine +bbcra di +lou dest +r ps +pi erc +fr yer +storm trooper +rafael nadal +pas co +exhau stion +epic onetsy +rc tid +kel lie +ga ines +d bz +sm riti +s bridge +lim ited +cla w +technic al +bio graphical +ado red +ภ° +exclu de +ac adia +key boards +fur man +so ca +sur u +ni ps +sw aps +server less +run e +pu ffy +north ampton +nish ings +hen der +cartri dges +gun shot +ðŁĵ ¹ +fil ament +respon dents +pey ton +mountaine er +mer ging +life span +intimid ation +p afc +nl wx +expan sive +pur r +f ck +ca e +at ti +tele thon +so hn +mend el +lo pes +dor i +un broken +te red +tast ings +in active +disin tegr +t assel +share the +pi ano +is lay +air space +z awa +ricci ardo +ming ton +fresh er +cur ry +re vs +pharo ah +h mv +exhilar ating +wh oo +lin kin +kri spy +competen cy +ste wards +ne bu +kat su +ad mins +baz ar +as ar +giving back +s summit +song z +lin us +raj kumar +farm ington +fanta sia +ðŁĺ´ ðŁĺ´ +so bri +lis se +barry more +pri sm +blo b +sen ew +mono xide +exp ire +eigh teen +di pper +xi ao +kil t +hin ch +bbc sport +bam boo +p ter +ex al +ðŁ¦ ĭ +ham lin +expe ditions +star gazing +food security +wy lie +ul f +st ingly +on storm +lo eb +bro ome +bn ha +pancre atic +eli ve +!!!!!!!! !!! +ther apper +ortho pedic +avengers endgame +antit rust +ìļ ° +go te +om d +off side +gy llen +win eries +white water +ad l +lu pita +exce eds +consi sted +chew bacca +ash leigh +nhl jets +is san +sh ld +hay at +cran berries +ðŁ¤ĺ ðŁı½ +rock the +spring training +fall out +dairy free +wa j +un decided +so wn +rc n +north wales +htt r +fu mble +d its +comp elled +popu list +min ted +blan chett +. '' +pro pulsion +m illa +au berg +her tz +h ta +u daipur +serendip ity +azte cs +als ace +ðŁIJ ij +lu n +sho es +char li +gar za +ðŁĴ Ł +pro biotics +fox tv +ol is +mi ff +loc alized +diffu ser +si gue +fun ko +rend ous +ðŁĴ ij +jeky ll diff --git a/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json b/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..2c2130b544c0c5a72d5d00da071ba130a9800fb2 --- /dev/null +++ b/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|endoftext|>", + "unk_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json b/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ba7bf706515bc60487ad0e1816b4929b82542d6 --- /dev/null +++ b/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json @@ -0,0 +1,34 @@ +{ + "add_prefix_space": false, + "bos_token": { + "__type": "AddedToken", + "content": "<|startoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "do_lower_case": true, + "eos_token": { + "__type": "AddedToken", + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "errors": "replace", + "model_max_length": 77, + "name_or_path": "openai/clip-vit-large-patch14", + "pad_token": "<|endoftext|>", + "special_tokens_map_file": "./special_tokens_map.json", + "tokenizer_class": "CLIPTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/ldm_patched/modules/sd1_tokenizer/vocab.json b/ldm_patched/modules/sd1_tokenizer/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..469be27c5c010538f845f518c4f5e8574c78f7c8 --- /dev/null +++ b/ldm_patched/modules/sd1_tokenizer/vocab.json @@ -0,0 +1,49410 @@ +{ + "!": 0, + "!!": 1443, + "!!!": 11194, + "!!!!": 4003, + "!!!!!!!!": 11281, + "!!!!!!!!!!!!!!!!": 30146, + "!!!!!!!!!!!": 49339, + "!!!!!!!!!!": 35579, + "!!!!!!!!!": 28560, + "!!!!!!!!": 21622, + "!!!!!!!": 15203, + "!!!!!!": 9168, + "!!!!!": 5203, + "!!!!": 2360, + "!!!\"": 28048, + "!!!)": 42532, + "!!!": 995, + "!!\"": 20556, + "!!#": 34997, + "!!)": 28352, + "!!": 748, + "!!@": 40705, + "!\"": 2947, + "!\"@": 43819, + "!#": 9670, + "!'": 13222, + "!),": 37904, + "!).": 26225, + "!)": 4571, + "!*": 37737, + "!,": 29325, + "!-": 43499, + "!...": 22121, + "!..": 35475, + "!.": 22517, + "!:)": 31671, + "!:": 17545, + "!": 256, + "!?!": 29767, + "!?!?": 47081, + "!?": 6004, + "!@": 15117, + "!]": 34466, + "!âĢ¦": 35068, + "!âĿ¤ï¸ı": 32559, + "!ðŁİī": 49085, + "!ðŁĺĬ": 43434, + "!ðŁĺį": 36438, + "\"": 1, + "\"!": 10377, + "\"\"": 41530, + "\"\"\"": 25539, + "\"\"": 8575, + "\"#": 8345, + "\"'": 31065, + "\"(": 32741, + "\")": 13112, + "\",": 4332, + "\"-": 9375, + "\"....": 37785, + "\"...": 9049, + "\"..": 25403, + "\".": 2811, + "\"/": 39486, + "\":": 7811, + "\";": 37549, + "\"": 257, + "\"?": 11727, + "\"@": 1512, + "\"@_": 20236, + "\"[": 36930, + "\"âĢ¦": 33993, + "\"âĢĶ": 41151, + "#": 2, + "##": 15483, + "#...": 31491, + "#:": 30144, + "#": 258, + "#@": 35062, + "#âĢ¦": 12834, + "#âĢİ": 34262, + "$": 3, + "$$": 24233, + "$$$": 31859, + "$$": 14929, + "$)": 39460, + "$.": 34682, + "$": 259, + "%": 4, + "%!": 35070, + "%),": 37819, + "%)": 16063, + "%,": 14505, + "%-": 48784, + "%.": 12475, + "%;": 33379, + "%": 260, + "&": 5, + "&&": 27791, + "&": 261, + "'": 6, + "'!": 13781, + "'\"": 19479, + "'#": 15319, + "''": 46594, + "''": 8445, + "')": 19175, + "',": 5662, + "'-": 26152, + "'...": 20474, + "'.": 4645, + "':": 7182, + "';": 44517, + "'": 262, + "'?": 17242, + "'@": 26397, + "'d": 1896, + "'ll": 1342, + "'m": 880, + "'re": 982, + "'s": 568, + "'t": 713, + "'ve": 1200, + "'âĢ¦": 42120, + "(": 7, + "(!)": 30253, + "(\"": 18741, + "(#": 6229, + "($)": 46597, + "($": 15186, + "(&": 15042, + "('": 18235, + "((": 22944, + "(((": 33287, + "((": 13796, + "().": 41737, + "()": 8475, + "(*": 48004, + "(*": 39575, + "(+": 12903, + "(-": 20228, + "(...": 45159, + "(.": 43055, + "(:": 8528, + "(;": 23983, + "(": 263, + "(?)": 22885, + "(@": 2181, + "(£": 33987, + "(©": 44886, + "(ðŁĵ·:": 34610, + "(ðŁĵ·": 37999, + "(ðŁĵ¸:": 44422, + "(ðŁĵ¸": 45204, + ")": 8, + ")!!": 47518, + ")!": 7805, + ")\"": 13046, + ")#": 39981, + ")'": 23613, + ")(": 27956, + "))": 13720, + "))))": 42911, + "))))": 34181, + ")))": 18305, + "))": 5167, + "),": 2361, + ")-": 19034, + ")...": 15274, + ")..": 41822, + ").": 1818, + ")/": 26616, + "):": 4143, + ");": 19686, + ")": 264, + ")?": 18765, + ")@": 41928, + ")_/": 45028, + ")_/¯": 45781, + ")âĢ¦": 41844, + "*": 9, + "*)": 30956, + "**": 9825, + "****": 21326, + "********": 42974, + "*****": 43571, + "****": 25167, + "***": 7829, + "**": 4441, + "*,": 41895, + "*-*": 23568, + "*.": 31304, + "*": 265, + "*_*": 44535, + "+": 10, + "+)": 34810, + "++": 47298, + "+++": 35986, + "++": 19056, + "+,": 35885, + "+.": 25238, + "+/-": 47614, + "+": 266, + ",": 11, + ",\"": 3823, + ",#": 11215, + ",&": 26905, + ",'": 10599, + ",)": 44493, + ",,": 21340, + ",,,,": 33225, + ",,,": 14811, + ",,": 8844, + ",-": 29821, + ",...": 20365, + ",.": 41277, + ",": 267, + ",@": 13975, + ",âĢ¦": 14601, + "-": 12, + "-\"": 18646, + "-#": 10151, + "-$": 24946, + "-'": 28010, + "-(": 33345, + "-)": 3535, + "-*": 21527, + "--": 2154, + "----": 5753, + "--------": 11772, + "----------------": 23122, + "----": 30164, + "---->": 35999, + "---": 11079, + "--->": 14518, + "--": 2432, + "-->": 6422, + "-->>": 47252, + "-.-": 32765, + "-...": 43147, + "-.": 44040, + "-": 268, + "->": 5081, + "-@": 10087, + "-_-": 27227, + "-__": 42718, + "-âĢ¦": 30047, + ".": 13, + ".!!": 37805, + ".!": 14030, + ".\"": 18650, + ".\"-": 21234, + ".\"": 1081, + ".\"âĢĶ": 48703, + ".#": 5014, + ".'\"": 41558, + ".''": 49379, + ".'": 5938, + ".(": 22294, + ".)": 5376, + ".*": 26145, + ".,": 5276, + ".-": 12481, + "..": 608, + "..!!": 23707, + "..!": 17994, + "..\"": 15229, + "..#": 15735, + "..,": 47143, + "...": 3002, + "...!!!": 38351, + "...!!": 39915, + "...!": 16860, + "...\"": 5240, + "...#": 8195, + "...&": 44979, + "...'": 23167, + "...(": 37981, + "...)": 14040, + "...,": 42717, + "....": 2386, + "....\"": 26689, + "....#": 20346, + ".....": 34151, + ".....#": 38867, + "........": 8246, + "................": 24855, + "............": 42965, + "...........": 35008, + "..........": 25526, + ".........": 19881, + "........": 14720, + ".......": 9917, + "......": 5590, + ".....": 3104, + "....": 1390, + "....@": 29790, + "...:": 34570, + "...": 678, + "...?": 16388, + "...@": 12672, + "..": 852, + "..?": 23875, + "..@": 21124, + "./": 31975, + ".:": 15811, + ".;": 47596, + ".": 269, + ".<": 29442, + ".?": 29294, + ".@": 1230, + ".]": 33511, + ".~": 42651, + ".âĢ¦": 18047, + ".âĿ¤ï¸ı": 39085, + ".âłĢ": 30097, + ".ðŁĺĤ": 46580, + "/": 14, + "/#": 13217, + "/$": 36266, + "/-": 19811, + "/.": 39382, + "//": 15348, + "////": 46271, + "///": 22734, + "//": 3502, + "/": 270, + "/@": 8216, + "0": 15, + "0": 271, + "1": 16, + "1": 272, + "2": 17, + "2": 273, + "3": 18, + "3": 274, + "4": 19, + "4": 275, + "5": 20, + "5": 276, + "6": 21, + "6": 277, + "7": 22, + "7": 278, + "8": 23, + "8": 279, + "9": 24, + "9": 280, + ":": 25, + ":\"": 29498, + ":\")": 46432, + ":\"": 12089, + ":#": 26625, + ":$": 33769, + ":'": 8017, + ":'(": 21250, + ":')": 10701, + ":'": 23851, + ":((": 42496, + ":(": 5965, + ":)": 11070, + ":))))": 42339, + ":)))": 21840, + ":))": 10164, + ":).": 39010, + ":)": 1408, + ":*": 12617, + ":-": 13021, + ":-(": 25137, + ":-)": 4223, + ":-": 10323, + ":...": 42140, + "://": 12441, + ":/": 13604, + "::": 33077, + ":::": 43818, + "::": 9788, + ":": 281, + ":>": 39677, + ":@": 14339, + ":]": 43486, + ":|": 45986, + ":âĢ¦": 22365, + ";": 26, + ";))": 41873, + ";)": 3661, + ";-": 35657, + ";-)": 10475, + ";;": 34824, + ";;": 24492, + ";": 282, + "<": 27, + "<-": 47280, + "": 34308, + "<<": 24588, + "<": 283, + "<<": 16482, + "<<<": 35054, + "<|endoftext|>": 49407, + "<|startoftext|>": 49406, + "=": 28, + "=))": 39587, + "=)": 17840, + "=": 284, + "==": 11748, + "====": 21734, + "========": 38952, + "==>": 29688, + "=>": 9714, + ">": 29, + ">.<": 38507, + ">:": 36196, + ">": 285, + "><": 28015, + ">>": 8270, + ">>": 2988, + ">>>": 6395, + ">>>>": 18461, + ">>>>": 18435, + ">>>>>": 32972, + ">>>>>>": 48947, + ">>>>>>>>": 41947, + ">_": 44144, + "?": 30, + "?!": 9785, + "?!!": 25342, + "?!\"": 29315, + "?!": 2835, + "?!?!": 16349, + "?!?!?!": 49084, + "?!?!?": 37619, + "?!?": 11395, + "?\"": 3283, + "?#": 24018, + "?'": 13610, + "?)": 9626, + "?,": 41628, + "?...": 22641, + "?..": 43905, + "?.": 41251, + "?:": 21067, + "?": 286, + "??": 5195, + "??!!": 43219, + "??!": 37341, + "??\"": 44996, + "??": 2197, + "???": 40017, + "???": 3824, + "????": 15936, + "????": 10362, + "?????": 21370, + "??????": 34589, + "????????": 45091, + "?@": 29258, + "?ðŁ¤Ķ": 47928, + "@": 31, + "@#": 39397, + "@.": 43730, + "@/": 28639, + "@": 287, + "@@": 30314, + "@_": 2692, + "@__": 17042, + "@___": 48308, + "A": 32, + "A": 288, + "B": 33, + "B": 289, + "C": 34, + "C": 290, + "D": 35, + "D": 291, + "E": 36, + "E": 292, + "F": 37, + "F": 293, + "G": 38, + "G": 294, + "H": 39, + "H": 295, + "I": 40, + "I": 296, + "J": 41, + "J": 297, + "K": 42, + "K": 298, + "L": 43, + "L": 299, + "M": 44, + "M": 300, + "N": 45, + "N": 301, + "O": 46, + "O": 302, + "P": 47, + "P": 303, + "Q": 48, + "Q": 304, + "R": 49, + "R": 305, + "S": 50, + "S": 306, + "T": 51, + "T": 307, + "U": 52, + "U": 308, + "V": 53, + "V": 309, + "W": 54, + "W": 310, + "X": 55, + "X": 311, + "Y": 56, + "Y": 312, + "Z": 57, + "Z": 313, + "[": 58, + "[#": 11115, + "[...": 39975, + "[...]": 43790, + "[": 314, + "[@": 15148, + "[]": 22240, + "\\": 59, + "\\'": 41239, + "\\": 315, + "]": 60, + "]\"": 39434, + "],": 34067, + "].": 26262, + "]:": 21641, + "]": 316, + "][#": 39009, + "][": 29329, + "^": 61, + "^)": 30720, + "^-": 43516, + "^.": 31552, + "^.^": 35791, + "^": 317, + "^^": 34454, + "^^": 9064, + "^_": 14423, + "^_^": 15995, + "_": 62, + "_'": 44701, + "_(": 36951, + "_)": 37393, + "_*": 36237, + "_,": 31417, + "_-": 23193, + "_.": 26841, + "_/": 37647, + "_:": 13109, + "_": 318, + "__": 2355, + "__:": 47043, + "__": 3838, + "___": 43812, + "___": 13530, + "____": 4727, + "____": 25350, + "_____": 38803, + "________": 9549, + "________________": 20115, + "`": 63, + "`": 319, + "a": 64, + "a": 320, + "aa": 1821, + "aa": 3894, + "aaa": 14376, + "aaa": 9583, + "aaaa": 6727, + "aaaa": 19336, + "aaaaa": 31095, + "aaaaaa": 44413, + "aaaaaaaa": 23126, + "aaaah": 49151, + "aaah": 35856, + "aaay": 37846, + "aab": 34108, + "aac": 23251, + "aac": 11346, + "aad": 20464, + "aad": 35894, + "aaf": 37638, + "aaf": 31534, + "aag": 42174, + "aah": 28990, + "aaj": 28727, + "aaj": 43411, + "aak": 37739, + "aal": 22268, + "aal": 30208, + "aali": 27896, + "aaliyah": 46577, + "aam": 12943, + "aam": 22775, + "aama": 45018, + "aamaadmi": 45563, + "aamaadmiparty": 46406, + "aamir": 27456, + "aan": 20705, + "aan": 13426, + "aand": 38054, + "aap": 12023, + "aap": 12052, + "aapl": 34516, + "aar": 4695, + "aar": 13234, + "aard": 46932, + "aaron": 13948, + "aaron": 7709, + "aas": 28542, + "aas": 32205, + "aat": 34018, + "aat": 35004, + "aau": 35426, + "aay": 38281, + "aay": 40249, + "aaz": 26770, + "ab": 596, + "ab": 3937, + "aba": 44204, + "aba": 11102, + "abad": 33444, + "abad": 7155, + "aban": 41662, + "aband": 8595, + "abandon": 28805, + "abandoned": 11227, + "abar": 17860, + "abar": 39805, + "abas": 25402, + "abay": 43542, + "abb": 38954, + "abb": 38297, + "abba": 30870, + "abbas": 37494, + "abbas": 24412, + "abbey": 31927, + "abbey": 10132, + "abbie": 39949, + "abbo": 13536, + "abbot": 44046, + "abbott": 43737, + "abbott": 15649, + "abbrevi": 44843, + "abby": 30586, + "abby": 14694, + "abc": 13137, + "abc": 5334, + "abcnews": 31566, + "abd": 44093, + "abdel": 46511, + "abdomin": 35335, + "abdominal": 39328, + "abdu": 13361, + "abduc": 17884, + "abducted": 31520, + "abduction": 36984, + "abdul": 14227, + "abdul": 15593, + "abdullah": 21317, + "abe": 15856, + "abe": 12734, + "abee": 36037, + "abel": 31938, + "abel": 25318, + "abella": 46156, + "aben": 40865, + "aber": 7828, + "aber": 41867, + "aberdeen": 30539, + "aberdeen": 17236, + "abh": 27484, + "abh": 33649, + "abhcosmetics": 49189, + "abhi": 18113, + "abhin": 44045, + "abhishek": 44502, + "abi": 16867, + "abi": 14161, + "abia": 48604, + "abide": 49163, + "abig": 20863, + "abigail": 25686, + "abil": 21135, + "abilities": 8724, + "ability": 35146, + "ability": 3024, + "abit": 48668, + "ablanc": 33716, + "able": 10102, + "able": 863, + "abled": 10655, + "ableg": 24055, + "ables": 8486, + "ableton": 47169, + "ably": 6748, + "abnormal": 40934, + "abo": 2889, + "abo": 21861, + "aboard": 11661, + "abol": 31768, + "abolic": 46827, + "abolish": 47403, + "aboo": 42433, + "abor": 8416, + "aboriginal": 20422, + "abortion": 12336, + "abortions": 43218, + "aboss": 46401, + "abou": 36455, + "abou": 44053, + "abound": 41037, + "abour": 46637, + "about": 20204, + "about": 781, + "abouts": 36339, + "above": 35019, + "above": 4348, + "aboy": 37077, + "abpoli": 44779, + "abq": 38767, + "abr": 44932, + "abra": 10694, + "abra": 35087, + "abraham": 40623, + "abraham": 15869, + "abram": 33255, + "abrams": 29852, + "abre": 22472, + "abre": 46756, + "abri": 28605, + "abridged": 45333, + "abroad": 11253, + "abru": 46295, + "abs": 18431, + "abs": 11109, + "absc": 25389, + "abscbn": 44260, + "abscbn": 45810, + "absen": 32453, + "absence": 19240, + "absent": 30363, + "absol": 4624, + "absolu": 7055, + "absolut": 4666, + "absolute": 7501, + "absolutely": 4703, + "absor": 14303, + "absorb": 35806, + "absorbed": 45059, + "absorbing": 46412, + "absorption": 42210, + "abstr": 7530, + "abstract": 23885, + "abstract": 10197, + "abstractart": 31170, + "abstraction": 47696, + "abstracts": 40065, + "absur": 21639, + "absurd": 29757, + "abt": 9850, + "abu": 9167, + "abu": 11787, + "abud": 20180, + "abudha": 21450, + "abudhabi": 25256, + "abuja": 23371, + "abun": 20544, + "abundance": 23236, + "abundant": 31611, + "abur": 23377, + "aburger": 46660, + "abuse": 7678, + "abused": 23855, + "abuses": 37132, + "abusing": 36558, + "abusive": 26858, + "abv": 34172, + "aby": 16342, + "aby": 31378, + "abyss": 33632, + "abz": 42292, + "ac": 546, + "ac": 2816, + "aca": 9213, + "acab": 41388, + "acacia": 44047, + "acad": 32537, + "acade": 2892, + "academia": 22662, + "academic": 31178, + "academic": 7935, + "academics": 26417, + "academies": 42569, + "academy": 29968, + "academy": 4041, + "acadi": 41455, + "acadia": 49236, + "acam": 26172, + "acan": 42227, + "acan": 26318, + "acap": 32357, + "acar": 22232, + "acare": 16961, + "acc": 26805, + "acc": 9318, + "acca": 30883, + "acce": 8564, + "acceler": 10161, + "accelerate": 23619, + "accelerated": 38513, + "accelerating": 41821, + "acceleration": 39387, + "accelerator": 25261, + "accent": 28110, + "accent": 18931, + "accents": 31738, + "accenture": 41853, + "accep": 4616, + "accept": 16447, + "accept": 9338, + "acceptable": 14209, + "acceptance": 17090, + "accepted": 9159, + "accepting": 12855, + "accepts": 22338, + "access": 7596, + "access": 3822, + "accessi": 10787, + "accessibility": 23407, + "accessible": 13977, + "accessing": 46339, + "accessories": 10220, + "accessory": 20417, + "acci": 4263, + "acci": 33943, + "accident": 6608, + "accidental": 24895, + "accidentally": 11061, + "accidents": 22072, + "acclaimed": 21172, + "acco": 44730, + "accol": 33858, + "accolades": 46731, + "accom": 23658, + "accommo": 34495, + "accommod": 14386, + "accommodate": 34708, + "accommodation": 18066, + "accommodations": 45536, + "accomp": 24985, + "accompan": 14746, + "accompanied": 20715, + "accompany": 34142, + "accompanying": 38179, + "accompli": 10205, + "accomplish": 25542, + "accomplished": 16462, + "accomplishment": 26100, + "accomplishments": 24965, + "accor": 4182, + "accord": 34293, + "accord": 28513, + "according": 4717, + "accordingly": 35535, + "accordion": 48760, + "accoun": 3081, + "account": 18424, + "account": 4684, + "accountability": 19377, + "accountable": 24216, + "accountant": 31026, + "accountants": 37222, + "accounted": 43951, + "accounting": 14805, + "accounts": 9974, + "accra": 31900, + "accred": 17451, + "accreditation": 27015, + "accredited": 27647, + "acct": 45569, + "accu": 5618, + "accumul": 19275, + "accumulation": 37112, + "accur": 6551, + "accuracy": 18423, + "accurate": 8858, + "accurately": 24206, + "accusations": 33615, + "accuse": 39414, + "accused": 9434, + "accuses": 27496, + "accusing": 41474, + "acdc": 45067, + "ace": 2675, + "ace": 804, + "acea": 35219, + "aceae": 38153, + "acele": 40868, + "aceous": 33610, + "acer": 37990, + "acer": 25809, + "aces": 5725, + "acet": 28735, + "acf": 38389, + "ach": 972, + "ach": 987, + "acha": 22686, + "acharya": 45780, + "achat": 32706, + "ache": 27771, + "ache": 7214, + "ached": 17048, + "acher": 38442, + "acher": 17936, + "achers": 25051, + "aches": 14823, + "achi": 3264, + "achi": 9087, + "achiev": 8160, + "achieve": 14798, + "achieve": 8175, + "achieved": 12359, + "achievement": 8245, + "achievements": 16114, + "achiever": 46286, + "achievers": 44544, + "achieves": 40123, + "achieving": 16120, + "achilles": 33327, + "achim": 42335, + "aching": 12864, + "acho": 33130, + "achs": 41195, + "aci": 4359, + "aci": 34100, + "acia": 30163, + "acial": 32422, + "acid": 35474, + "acid": 10085, + "acidity": 48800, + "acids": 27751, + "acies": 20162, + "acin": 39442, + "acing": 9442, + "acio": 26202, + "acion": 44965, + "acion": 24968, + "acional": 26435, + "aciones": 35832, + "acious": 16020, + "acity": 7511, + "ación": 38175, + "ack": 877, + "ack": 725, + "acked": 5698, + "acker": 31201, + "acker": 7940, + "ackeray": 41843, + "acki": 42857, + "acking": 5515, + "ackles": 28503, + "acknow": 13563, + "acknowle": 18100, + "acknowledge": 25209, + "acknowledged": 35913, + "acknowledges": 49083, + "acknowledging": 45645, + "acks": 3858, + "acl": 47593, + "acl": 23073, + "acle": 6504, + "acles": 34164, + "aclu": 37354, + "acm": 39317, + "acmilan": 36500, + "acne": 24195, + "aco": 9463, + "aco": 8800, + "acol": 17431, + "acollege": 43468, + "acom": 17224, + "acom": 22342, + "acon": 11621, + "acon": 11571, + "aconf": 38851, + "acons": 31599, + "acor": 22076, + "acorn": 37537, + "acos": 39943, + "acosta": 31994, + "acou": 8794, + "acoun": 31295, + "acounty": 45449, + "acoustic": 10616, + "acoustics": 43873, + "acp": 19627, + "acqu": 7946, + "acquainted": 40713, + "acqui": 12194, + "acquire": 21576, + "acquired": 15932, + "acquires": 27376, + "acquiring": 42785, + "acquis": 14207, + "acquisition": 16543, + "acquisitions": 39649, + "acr": 43648, + "acre": 26749, + "acre": 9493, + "acres": 11630, + "acro": 21060, + "acrob": 40891, + "acron": 37770, + "across": 2500, + "acrosse": 40979, + "acruz": 40455, + "acry": 10440, + "acrylic": 12252, + "acs": 11782, + "act": 10305, + "act": 1393, + "acted": 10971, + "acti": 4786, + "acting": 6319, + "action": 12493, + "action": 1816, + "actions": 6271, + "activ": 3430, + "activate": 26737, + "activated": 22249, + "activation": 26769, + "active": 19009, + "active": 4046, + "actively": 18645, + "activi": 7230, + "activism": 20117, + "activist": 10850, + "activists": 12649, + "activities": 6514, + "activity": 6206, + "actment": 44807, + "acton": 36167, + "acton": 36697, + "actonclimate": 43797, + "actor": 12181, + "actor": 4035, + "actors": 9255, + "actorslife": 25117, + "actorvijay": 34033, + "actress": 5805, + "actresses": 33639, + "acts": 6816, + "actu": 2375, + "actual": 7488, + "actually": 2955, + "acu": 9204, + "acu": 48475, + "aculture": 38145, + "acup": 30869, + "acup": 27278, + "acupuncture": 40043, + "acur": 44719, + "acura": 30120, + "acus": 33710, + "acute": 19734, + "acy": 18717, + "acy": 2356, + "ad": 594, + "ad": 680, + "ada": 25785, + "ada": 1886, + "adaily": 47254, + "adal": 46646, + "adam": 6037, + "adam": 4944, + "adamlambert": 27659, + "adams": 7942, + "adan": 41802, + "adani": 37499, + "adap": 6341, + "adapt": 22666, + "adaptation": 16566, + "adapted": 26657, + "adapter": 21839, + "adapting": 44120, + "adaptive": 28672, + "adar": 27702, + "adar": 32681, + "adas": 23250, + "adata": 39500, + "aday": 31367, + "aday": 10280, + "adays": 24337, + "adb": 45630, + "adc": 38201, + "add": 19408, + "add": 3536, + "addams": 38912, + "added": 4149, + "adder": 47557, + "addi": 36378, + "addic": 5709, + "addict": 14614, + "addicted": 16275, + "addiction": 11751, + "addictive": 29638, + "addicts": 29997, + "adding": 8676, + "addis": 43911, + "addison": 32369, + "additi": 26927, + "addition": 6698, + "additional": 10666, + "additions": 22575, + "additive": 48546, + "addo": 40001, + "address": 5834, + "addressed": 20817, + "addresses": 12702, + "addressing": 10594, + "adds": 9944, + "addy": 24746, + "ade": 2194, + "ade": 1928, + "adecides": 46374, + "aded": 9994, + "adee": 47054, + "adel": 4434, + "adel": 27308, + "adelaide": 38193, + "adelaide": 11611, + "adele": 42843, + "adele": 21220, + "adelrey": 43627, + "ademy": 49123, + "aden": 28669, + "aden": 28688, + "adena": 23648, + "adequ": 18232, + "adequate": 22281, + "ader": 21365, + "adero": 49185, + "aders": 27672, + "ades": 5793, + "adh": 42301, + "adhd": 32649, + "adhe": 21175, + "adhesive": 38429, + "adi": 2486, + "adi": 8779, + "adia": 26874, + "adic": 36780, + "adid": 8086, + "adidas": 22396, + "adidas": 9589, + "adidasoriginals": 48575, + "adies": 45834, + "adifference": 37217, + "adilla": 41167, + "ading": 15000, + "adio": 15060, + "adirond": 36843, + "adish": 49009, + "adity": 28596, + "aditya": 37186, + "adityanath": 44437, + "adjac": 32517, + "adjacent": 33836, + "adjec": 45512, + "adju": 16413, + "adjun": 45995, + "adjust": 13784, + "adjust": 28073, + "adjustable": 20476, + "adjusted": 30515, + "adjusting": 41132, + "adjustment": 36081, + "adjustments": 36331, + "adl": 49351, + "adler": 30222, + "adm": 9892, + "adm": 33604, + "admi": 11666, + "admin": 12528, + "admini": 6434, + "administr": 12174, + "administration": 9502, + "administrative": 22424, + "administrator": 22603, + "administrators": 36123, + "admins": 49297, + "admir": 17031, + "admiral": 21013, + "admiration": 39569, + "admire": 17791, + "admired": 36103, + "admirer": 48344, + "admiring": 29835, + "admission": 11315, + "admissions": 22463, + "admit": 13769, + "admits": 16332, + "admitted": 20427, + "admitting": 46148, + "adn": 40339, + "adnan": 42037, + "ado": 4775, + "ado": 2933, + "adobe": 29256, + "adobe": 16484, + "adog": 44913, + "adol": 33512, + "adole": 22704, + "adolescent": 36793, + "adolescents": 45656, + "adolf": 41179, + "adon": 25907, + "adona": 48419, + "adop": 4183, + "adopt": 16441, + "adopt": 11159, + "adoptable": 36905, + "adoptdont": 19674, + "adoptdontshop": 20089, + "adopted": 12538, + "adopting": 30158, + "adoption": 11544, + "adopts": 40853, + "ador": 4992, + "ador": 9162, + "adora": 40031, + "adorable": 6298, + "adoration": 46781, + "adore": 15502, + "adored": 49233, + "adores": 30290, + "adorned": 44953, + "ados": 20079, + "adox": 32188, + "adp": 44426, + "adr": 46189, + "adren": 24204, + "adrenaline": 35552, + "adri": 5935, + "adrian": 25012, + "adrian": 13163, + "adriana": 41363, + "adrid": 26562, + "adrien": 47469, + "adrienne": 40081, + "ads": 2485, + "adu": 16882, + "adu": 24446, + "adukone": 30511, + "adul": 7222, + "adult": 42209, + "adult": 7115, + "adulthood": 40964, + "adults": 9391, + "adv": 1647, + "adv": 21018, + "advan": 33411, + "advance": 27291, + "advance": 7022, + "advanced": 7465, + "advancement": 35437, + "advances": 15852, + "advancing": 21355, + "advani": 48189, + "advant": 7017, + "advantage": 8573, + "advantaged": 38361, + "advantages": 23506, + "adven": 41670, + "advent": 3071, + "advent": 15199, + "adventcalendar": 43492, + "adventur": 29627, + "adventure": 17251, + "adventure": 4377, + "adventurer": 48098, + "adventures": 7941, + "adventurous": 31179, + "adver": 4806, + "adverse": 30348, + "adversity": 32516, + "advert": 19080, + "adverti": 5682, + "advertise": 31473, + "advertised": 38987, + "advertisement": 18713, + "advertiser": 41829, + "advertisers": 45472, + "advertising": 8158, + "adverts": 44306, + "advice": 4973, + "advis": 4634, + "advise": 25962, + "advised": 23196, + "adviser": 20367, + "advisers": 40984, + "advises": 42761, + "advising": 39648, + "advisor": 12380, + "advisors": 23197, + "advisory": 10224, + "advoc": 6657, + "advocacy": 14443, + "advocate": 12044, + "advocates": 17757, + "adwords": 48343, + "ady": 41446, + "ady": 8781, + "ae": 5548, + "ae": 4542, + "aea": 37048, + "aed": 26912, + "aege": 42304, + "ael": 41533, + "ael": 43340, + "aen": 43085, + "aer": 10195, + "aeri": 27685, + "aerial": 44866, + "aerial": 12440, + "aero": 10196, + "aero": 25026, + "aerob": 42824, + "aeron": 37286, + "aeronau": 42816, + "aerop": 27735, + "aerosmith": 43253, + "aerospace": 20530, + "aes": 10617, + "aes": 35677, + "aest": 40694, + "aesthe": 21181, + "aesthetic": 16179, + "aesthetics": 29295, + "aew": 47108, + "af": 702, + "af": 4391, + "afa": 24953, + "afan": 47474, + "afar": 41637, + "afar": 37866, + "afb": 27022, + "afc": 29742, + "afc": 6571, + "afcb": 44276, + "afcon": 30019, + "afd": 44626, + "afe": 30487, + "afe": 13912, + "afer": 44707, + "aff": 8849, + "aff": 14864, + "affair": 13998, + "affairs": 9830, + "affe": 4556, + "affect": 11361, + "affected": 9715, + "affecting": 18448, + "affection": 33780, + "affection": 28381, + "affectionate": 42578, + "affects": 17285, + "affili": 12120, + "affiliate": 18652, + "affiliated": 37540, + "affiliation": 48377, + "affinity": 41451, + "affir": 25343, + "affirm": 42711, + "affirm": 48625, + "affirmation": 47495, + "affl": 34036, + "affleck": 35584, + "afford": 7951, + "afford": 13223, + "affordability": 44828, + "affordable": 43944, + "affordable": 8926, + "afg": 33994, + "afgh": 9029, + "afghan": 15919, + "afghanistan": 9836, + "afi": 24074, + "afi": 31958, + "afil": 27209, + "afire": 42010, + "afirst": 38601, + "afl": 15132, + "afl": 14356, + "aflo": 41959, + "afm": 38385, + "afootball": 41694, + "afor": 43102, + "afore": 41468, + "afp": 18311, + "afraid": 9474, + "afri": 13888, + "afric": 2136, + "africa": 3093, + "african": 17471, + "african": 4736, + "africans": 26534, + "afridi": 37651, + "afrika": 45833, + "afrin": 45586, + "afro": 16267, + "afro": 21795, + "afs": 48960, + "aft": 22693, + "after": 2278, + "after": 953, + "afterdark": 48966, + "afterlife": 46790, + "aftermath": 20958, + "afterno": 22330, + "afternoon": 39035, + "afternoon": 2716, + "afternoons": 31631, + "afterparty": 35305, + "afterwards": 23911, + "ag": 602, + "ag": 5241, + "aga": 1050, + "aga": 4654, + "again": 1495, + "against": 23838, + "against": 1601, + "agame": 46943, + "agan": 42946, + "agan": 9178, + "agar": 13199, + "agar": 17544, + "agarwal": 43117, + "agas": 20430, + "agate": 25454, + "agatha": 43896, + "agave": 42671, + "agawa": 39433, + "agazine": 44942, + "age": 4758, + "age": 805, + "aged": 3889, + "ageing": 25349, + "agen": 10101, + "agen": 43696, + "agencies": 13887, + "agency": 44885, + "agency": 6270, + "agend": 48653, + "agenda": 8728, + "agent": 21210, + "agent": 6576, + "agents": 10199, + "agentsof": 37074, + "agentsofshield": 38801, + "ager": 44847, + "ager": 10443, + "agers": 22123, + "ages": 2321, + "agg": 45482, + "aggarwal": 39386, + "agger": 27836, + "aggi": 36844, + "aggie": 44244, + "aggie": 37618, + "aggies": 31047, + "aggio": 36685, + "aggrav": 35203, + "aggre": 10426, + "aggreg": 41968, + "aggregate": 41318, + "aggression": 28900, + "aggressive": 16295, + "aggressively": 48667, + "agh": 17917, + "agh": 14402, + "aghan": 31276, + "agi": 24036, + "agi": 17645, + "agic": 37652, + "agile": 16276, + "agility": 32161, + "aging": 4336, + "agio": 41746, + "agirl": 35469, + "agle": 37035, + "agle": 16702, + "agles": 36374, + "agles": 22679, + "aglia": 46912, + "agm": 19162, + "agn": 36474, + "agna": 43626, + "agne": 29374, + "agne": 48303, + "agnes": 26213, + "agno": 41540, + "ago": 6276, + "ago": 1468, + "agomez": 27127, + "agon": 26775, + "agon": 14901, + "agony": 36977, + "agor": 38920, + "agos": 32657, + "agov": 34227, + "agp": 46048, + "agr": 36639, + "agra": 26660, + "agra": 29830, + "agram": 2447, + "agre": 3180, + "agreat": 37594, + "agree": 5953, + "agreed": 12774, + "agreeing": 40720, + "agreement": 8286, + "agreements": 25865, + "agrees": 17854, + "agri": 20527, + "agri": 30326, + "agricul": 7234, + "agricultural": 15440, + "agriculture": 9720, + "agro": 33178, + "agro": 44589, + "agron": 41314, + "agroup": 40099, + "ags": 16926, + "agt": 39681, + "agu": 3922, + "agu": 36544, + "agua": 18482, + "aguchi": 49206, + "ague": 2095, + "aguero": 42964, + "agues": 7000, + "aguil": 27946, + "aguilar": 44715, + "ah": 1772, + "ah": 1288, + "aha": 12082, + "aha": 8429, + "ahah": 38661, + "ahaha": 32423, + "ahahaha": 42620, + "aham": 36036, + "ahan": 45061, + "ahan": 19255, + "ahar": 31038, + "ahar": 38760, + "ahe": 27688, + "ahead": 3158, + "ahem": 39995, + "ahh": 13152, + "ahhh": 14769, + "ahhhh": 21054, + "ahhhhh": 36392, + "ahi": 45349, + "ahi": 24154, + "ahl": 30433, + "ahmad": 32167, + "ahmad": 16902, + "ahmadi": 38656, + "ahmadiyya": 44865, + "ahmed": 19491, + "ahmed": 12081, + "ahmedabad": 26966, + "ahn": 33405, + "aho": 28114, + "aho": 38444, + "ahora": 43113, + "ahouse": 33197, + "ahoy": 38652, + "ahs": 16937, + "ahu": 11908, + "ahu": 16515, + "ai": 2014, + "ai": 2215, + "aia": 27046, + "aib": 34780, + "aic": 29454, + "aid": 13723, + "aid": 5182, + "aida": 33830, + "aidan": 48814, + "aidan": 26945, + "aide": 31558, + "aide": 9746, + "aided": 48707, + "aiden": 40020, + "aides": 49082, + "aids": 11759, + "aig": 27295, + "aig": 46989, + "aii": 22478, + "aik": 42575, + "aiken": 46342, + "ail": 1457, + "ail": 9154, + "ailed": 38919, + "ailing": 29999, + "ails": 27024, + "aim": 6787, + "aim": 11255, + "aime": 39872, + "aimed": 20247, + "aimee": 36318, + "aiming": 21768, + "aimo": 36706, + "aims": 13326, + "ain": 8326, + "ain": 2210, + "aine": 48983, + "aine": 17634, + "ains": 27621, + "aint": 29543, + "aint": 13099, + "ainted": 39933, + "aioli": 43949, + "air": 1281, + "air": 1922, + "aira": 35085, + "aira": 46444, + "airasia": 48020, + "airbnb": 23098, + "airborne": 22755, + "airbus": 15324, + "aircraft": 7706, + "airdrop": 38434, + "aire": 7682, + "aired": 21938, + "aires": 17034, + "airfield": 40525, + "airforce": 23511, + "airing": 20453, + "airline": 14847, + "airlines": 8929, + "airmen": 44499, + "airplane": 16451, + "airplanes": 33319, + "airplay": 47024, + "airpollution": 47362, + "airport": 48337, + "airport": 3259, + "airports": 21543, + "airs": 18539, + "airshow": 27139, + "airsoft": 30134, + "airspace": 49280, + "airstrikes": 37220, + "airtel": 34784, + "airtime": 46617, + "airwaves": 43910, + "airways": 14299, + "airy": 44453, + "ais": 7616, + "ais": 11393, + "aise": 30505, + "aish": 21946, + "aisha": 40211, + "aishwar": 29687, + "aishwarya": 44019, + "aisle": 26917, + "ait": 25613, + "ait": 40814, + "aj": 3990, + "aj": 6342, + "aja": 42343, + "aja": 19633, + "ajax": 21933, + "ajay": 22494, + "ajay": 28726, + "ajaydevgn": 35515, + "aje": 48818, + "aje": 33315, + "ajes": 38791, + "aji": 26102, + "aji": 21153, + "ajit": 42261, + "ajith": 24118, + "ajo": 26958, + "aju": 36855, + "ak": 819, + "ak": 1196, + "aka": 19154, + "aka": 3412, + "akaif": 45736, + "akan": 43678, + "akan": 38244, + "akapoor": 40064, + "akarta": 48603, + "akb": 41962, + "akbar": 27180, + "ake": 10558, + "ake": 5776, + "aked": 6115, + "aker": 14245, + "aker": 3074, + "akers": 5788, + "akes": 4764, + "akest": 46679, + "akh": 14821, + "akh": 30660, + "akhan": 28158, + "akhi": 41660, + "akhilesh": 48495, + "akhtar": 45458, + "aki": 18173, + "aki": 6592, + "akin": 24630, + "akin": 13601, + "aking": 1809, + "akins": 48568, + "akira": 34001, + "akis": 27732, + "akistan": 46221, + "akley": 39908, + "ako": 44027, + "ako": 14541, + "akon": 47105, + "akos": 44659, + "akrish": 37434, + "akron": 26115, + "aks": 2953, + "aksh": 28226, + "akshay": 21483, + "akshay": 38914, + "akshaykumar": 23624, + "akshi": 42634, + "aku": 18151, + "aku": 20815, + "aky": 11977, + "al": 526, + "al": 566, + "ala": 12783, + "ala": 3449, + "alab": 6365, + "alabam": 45880, + "alabama": 8422, + "alach": 24622, + "alad": 23074, + "aladdin": 29951, + "alai": 47072, + "alain": 28999, + "alam": 16612, + "alam": 16012, + "alamo": 41922, + "alamo": 34632, + "alan": 9563, + "alan": 5773, + "alana": 43405, + "aland": 34304, + "aland": 6819, + "alar": 34333, + "alarm": 11321, + "alarming": 37209, + "alarms": 31236, + "alarts": 31422, + "alas": 7276, + "alas": 22412, + "alaska": 9562, + "alaskan": 33898, + "alastair": 42062, + "alay": 30289, + "alay": 36450, + "alaya": 36397, + "alb": 45248, + "alba": 25254, + "alban": 10882, + "albania": 29170, + "albanian": 47721, + "albans": 44119, + "albany": 17359, + "albat": 42797, + "albeit": 38984, + "alber": 6413, + "albert": 34174, + "albert": 9507, + "alberta": 11048, + "alberto": 22714, + "albi": 18512, + "albino": 48062, + "albion": 24071, + "albu": 2216, + "album": 40712, + "album": 2431, + "albums": 10705, + "albuquerque": 31079, + "alcat": 35361, + "alche": 37909, + "alchemist": 38913, + "alchemy": 39501, + "alco": 6848, + "alco": 45446, + "alcohol": 9426, + "alcoholic": 25098, + "ald": 4539, + "ald": 2928, + "alda": 46440, + "alde": 33114, + "alden": 17155, + "alden": 27710, + "aldenrichards": 20051, + "alder": 18220, + "alder": 46571, + "aldi": 23204, + "aldo": 9933, + "aldridge": 38084, + "alds": 14285, + "aldu": 6505, + "aldub": 10532, + "aldub": 15247, + "ale": 1440, + "ale": 1336, + "alea": 26518, + "aleague": 38909, + "alec": 29804, + "alec": 19954, + "alecoscino": 47948, + "aled": 4970, + "alee": 24515, + "alej": 23440, + "alejandro": 32950, + "alek": 26906, + "alek": 43310, + "aleksand": 48429, + "alem": 11825, + "aleppo": 19258, + "aler": 25674, + "aler": 27335, + "alert": 4662, + "alerts": 22144, + "ales": 44171, + "ales": 5962, + "aless": 21864, + "alessandro": 37344, + "alestine": 31945, + "alex": 2959, + "alex": 4134, + "alexa": 16273, + "alexand": 10696, + "alexander": 25527, + "alexander": 7563, + "alexandra": 19054, + "alexandre": 35711, + "alexandria": 21171, + "alexis": 35023, + "alexis": 14243, + "aley": 21635, + "alf": 27098, + "alfa": 23482, + "alfar": 38870, + "alfie": 28598, + "alfon": 31947, + "alfonso": 41784, + "alfre": 20982, + "alfred": 16553, + "alfredo": 32291, + "algae": 25654, + "algar": 36291, + "algarve": 40290, + "alge": 24336, + "algebra": 33694, + "alger": 18568, + "algeria": 25257, + "algon": 33007, + "algori": 14912, + "algorithm": 23295, + "algorithms": 26039, + "alham": 23352, + "alhamdulil": 35129, + "alhamdulillah": 38982, + "ali": 835, + "ali": 3558, + "alia": 2492, + "aliaa": 36468, + "alian": 3464, + "alias": 40026, + "alibaba": 39231, + "alic": 25265, + "alice": 23759, + "alice": 9192, + "alici": 31630, + "alicia": 20914, + "alie": 8697, + "alien": 22846, + "alien": 9639, + "aliens": 14883, + "alier": 39493, + "alies": 38086, + "alife": 41347, + "alife": 21100, + "alig": 21272, + "alight": 36157, + "align": 31160, + "aligned": 29292, + "alignment": 27267, + "alik": 31141, + "alike": 12665, + "alim": 42075, + "alin": 42746, + "alin": 40063, + "alina": 39529, + "aline": 21799, + "aling": 5169, + "alion": 19049, + "alis": 21308, + "alis": 20114, + "alisa": 38918, + "alisation": 42143, + "alise": 36718, + "alised": 25099, + "alism": 5607, + "alison": 28653, + "alison": 16970, + "alist": 44900, + "alist": 3320, + "alistair": 40551, + "alistic": 22302, + "alists": 5653, + "alit": 45566, + "alities": 27925, + "ality": 1694, + "alive": 40467, + "alive": 4716, + "aliz": 30979, + "alization": 8026, + "alize": 10268, + "alized": 6141, + "alizer": 38922, + "alizes": 26181, + "alizing": 13023, + "alk": 30246, + "alk": 21577, + "alkal": 33450, + "alkaline": 39210, + "all": 813, + "all": 615, + "alla": 13884, + "alla": 14000, + "allabout": 43996, + "allah": 6378, + "allan": 36552, + "allan": 15404, + "allblacks": 47728, + "allday": 35862, + "alle": 4870, + "alle": 29478, + "alled": 7379, + "alleg": 7456, + "allegations": 16992, + "alleged": 12133, + "allegedly": 14177, + "alleges": 45051, + "allegh": 41479, + "allegheny": 47851, + "allegi": 28832, + "allegiance": 30955, + "allen": 16712, + "allen": 6386, + "allenge": 31387, + "aller": 10116, + "aller": 30630, + "allergic": 28809, + "allergies": 28247, + "allergy": 24408, + "allery": 32542, + "alles": 43354, + "allevi": 31682, + "alleviate": 44799, + "alley": 36205, + "alley": 10329, + "allez": 49137, + "alli": 4123, + "alli": 15268, + "alliance": 45404, + "alliance": 8945, + "alliances": 48403, + "allianz": 45740, + "allie": 25040, + "allied": 20045, + "allies": 17277, + "alligator": 28574, + "allin": 45007, + "allin": 22395, + "alline": 48182, + "alling": 2992, + "allis": 45309, + "allison": 34602, + "allison": 16578, + "allman": 42611, + "allo": 8107, + "allo": 18389, + "allocated": 42716, + "allocation": 35139, + "allon": 46693, + "allot": 26363, + "allotment": 33750, + "allow": 5645, + "allow": 6722, + "allowance": 35696, + "allowed": 7885, + "allowing": 12458, + "allows": 9966, + "alloy": 22467, + "alls": 1997, + "allstar": 31247, + "allstar": 22974, + "allstars": 31198, + "allthe": 29253, + "allu": 20157, + "alluarjun": 39333, + "allure": 41814, + "ally": 7461, + "ally": 769, + "alm": 28303, + "alma": 32933, + "alma": 18337, + "alman": 29394, + "almanac": 41268, + "almighty": 21898, + "almond": 15646, + "almonds": 30468, + "almost": 47534, + "almost": 2671, + "aln": 47203, + "alo": 3435, + "alo": 6183, + "aloe": 30728, + "alog": 15813, + "alogue": 9101, + "aloha": 23160, + "aloils": 49002, + "alom": 22236, + "alon": 14097, + "alon": 42846, + "alone": 4702, + "along": 8300, + "along": 2528, + "alongside": 8646, + "alonso": 25704, + "aloo": 46187, + "alore": 14323, + "alot": 16945, + "alou": 43180, + "aloud": 30028, + "alove": 46669, + "alove": 37045, + "alp": 32020, + "alp": 39342, + "alpac": 30128, + "alpaca": 42561, + "alph": 6720, + "alpha": 11807, + "alpha": 8624, + "alphabe": 45796, + "alphabet": 22335, + "alphon": 37865, + "alpine": 17055, + "alps": 18191, + "already": 2426, + "alright": 10866, + "als": 23982, + "als": 938, + "alsace": 49388, + "also": 1446, + "alt": 9995, + "alt": 10006, + "alta": 24470, + "alta": 25378, + "altaf": 47342, + "altam": 45624, + "altar": 16385, + "alter": 4949, + "alter": 21393, + "altered": 25201, + "altern": 47463, + "alternate": 15926, + "alternati": 16699, + "alternative": 37327, + "alternative": 8248, + "alternatives": 25041, + "alth": 23463, + "alth": 5863, + "although": 9421, + "alti": 35531, + "alties": 17276, + "altitude": 23241, + "altman": 48100, + "alto": 35053, + "alto": 17518, + "altogether": 45689, + "alton": 41331, + "alton": 36550, + "altrin": 38458, + "altrincham": 44718, + "alty": 5546, + "alu": 4776, + "alu": 27991, + "alum": 5404, + "alum": 10553, + "alumin": 14563, + "alumini": 22908, + "aluminium": 23631, + "aluminum": 15251, + "alumna": 30313, + "alumni": 6646, + "alumnus": 23633, + "alums": 30155, + "alv": 20928, + "alvar": 25196, + "alvarez": 26924, + "alvaro": 41941, + "alves": 38547, + "alvin": 27023, + "alway": 14046, + "alway": 43764, + "always": 24997, + "always": 1466, + "alwx": 32768, + "aly": 6468, + "aly": 12910, + "alyn": 49150, + "alyss": 29490, + "alyssa": 18898, + "alz": 12936, + "alz": 41128, + "alzheim": 15212, + "alzheimer": 21151, + "alzheimers": 34592, + "am": 548, + "am": 687, + "ama": 18206, + "ama": 1696, + "amad": 45095, + "amade": 37366, + "amag": 32049, + "amal": 15315, + "amal": 36753, + "aman": 19890, + "aman": 10110, + "amand": 14560, + "amanda": 10036, + "amar": 6424, + "amar": 19607, + "amara": 48522, + "amari": 42565, + "amarillo": 40449, + "amarine": 45591, + "amarketing": 30788, + "amas": 22716, + "amas": 15667, + "amat": 38664, + "amat": 25455, + "amate": 12453, + "amateur": 14287, + "amaya": 47210, + "amaz": 1185, + "amaze": 24846, + "amazed": 18944, + "amazing": 15949, + "amazing": 1370, + "amazingly": 20368, + "amazon": 13630, + "amazon": 4140, + "amb": 9042, + "amb": 16853, + "amba": 27003, + "ambani": 45967, + "ambas": 5634, + "ambassad": 5758, + "ambassador": 6795, + "ambassadors": 16832, + "ambed": 42089, + "ambedkar": 48131, + "amber": 18292, + "amber": 9986, + "ambi": 11844, + "ambient": 23447, + "ambigu": 35702, + "ambition": 20673, + "ambitions": 34152, + "ambitious": 18666, + "ambro": 17585, + "ambrose": 24253, + "ambu": 34423, + "ambul": 13944, + "ambulance": 15555, + "ambush": 40725, + "amc": 24942, + "amc": 16921, + "amd": 20845, + "ame": 3995, + "ame": 780, + "amed": 5660, + "ameen": 24229, + "amel": 31988, + "amel": 10960, + "ameli": 21599, + "amelia": 21433, + "amell": 48198, + "amen": 18716, + "amen": 12335, + "amend": 12425, + "amendment": 15019, + "amendments": 40901, + "amenities": 30096, + "ament": 27528, + "amer": 17081, + "amer": 16147, + "ameri": 40422, + "americ": 1283, + "america": 2224, + "americafirst": 43216, + "american": 8746, + "american": 2151, + "americana": 26221, + "americanair": 42538, + "americani": 39726, + "americans": 6676, + "americas": 33343, + "americas": 18142, + "ames": 5469, + "ameter": 23393, + "amethy": 30291, + "amethyst": 31485, + "amex": 46390, + "amg": 21324, + "amher": 32311, + "amherst": 39065, + "ami": 6100, + "ami": 3065, + "amic": 25824, + "amic": 21383, + "amid": 18908, + "amid": 11953, + "amide": 30952, + "amidst": 25172, + "amie": 36901, + "amig": 40294, + "amiga": 35329, + "amigo": 44991, + "amigos": 28176, + "amii": 35462, + "amiibo": 38871, + "amily": 36732, + "amin": 14337, + "amin": 20235, + "amina": 47531, + "amination": 30355, + "amine": 35823, + "aming": 3507, + "amino": 33464, + "amir": 26029, + "amir": 21973, + "amis": 29829, + "amish": 24958, + "amit": 15083, + "amit": 25255, + "amitabh": 48124, + "amitshah": 32374, + "aml": 43185, + "amma": 29786, + "amman": 29243, + "ammo": 33474, + "ammunition": 35060, + "amn": 24073, + "amne": 14596, + "amnesia": 41741, + "amnesty": 46330, + "amnesty": 21177, + "amo": 4833, + "amo": 11156, + "amodi": 9826, + "amon": 17492, + "amon": 24046, + "among": 12310, + "among": 4265, + "amongst": 12520, + "amoo": 26977, + "amor": 19977, + "amor": 15973, + "amore": 38937, + "amore": 22691, + "amores": 36338, + "amos": 18133, + "amoto": 25492, + "amount": 6403, + "amounts": 16747, + "amour": 29908, + "amovie": 41062, + "amp": 3521, + "amp": 6259, + "amped": 22640, + "amphi": 16379, + "amphibious": 45206, + "amphitheater": 41285, + "amphitheatre": 44039, + "ample": 34162, + "amples": 14536, + "ampli": 15647, + "amplifier": 31743, + "amplify": 45308, + "amps": 19252, + "ampton": 29410, + "ampton": 9347, + "amr": 30916, + "amreading": 16546, + "amrit": 33849, + "ams": 1396, + "amster": 9110, + "amsterdam": 9441, + "amtrak": 27855, + "amu": 11347, + "amu": 32336, + "amur": 35014, + "amura": 35487, + "amus": 36269, + "amuse": 21421, + "amuse": 44367, + "amused": 30212, + "amusement": 32570, + "amusic": 20266, + "amusing": 31789, + "amwriting": 9660, + "amy": 10547, + "amy": 5187, + "an": 514, + "an": 550, + "ana": 6588, + "ana": 1388, + "anab": 34742, + "anada": 27948, + "anag": 12115, + "anagh": 40774, + "anaheim": 23728, + "anak": 34814, + "anak": 38658, + "anal": 2785, + "analo": 34179, + "analog": 19963, + "analogue": 46031, + "analy": 4611, + "analyse": 47246, + "analyses": 39695, + "analysis": 5296, + "analyst": 14198, + "analysts": 28075, + "analytical": 34550, + "analytics": 8558, + "analyze": 28519, + "analyzing": 32107, + "anam": 29525, + "anan": 37215, + "anand": 25073, + "anand": 22083, + "anap": 41566, + "anarch": 46405, + "anarchi": 39879, + "anarchy": 27707, + "anas": 31382, + "anas": 12633, + "anast": 48902, + "anasta": 22915, + "anastasi": 36534, + "anastasia": 37975, + "anat": 10045, + "anath": 31277, + "anatom": 33759, + "anatomy": 15376, + "anc": 1124, + "anc": 17758, + "anca": 14583, + "ance": 7165, + "ance": 884, + "anced": 5071, + "ancer": 17415, + "ancers": 37296, + "ances": 3515, + "ancestor": 43904, + "ancestors": 24405, + "ancestral": 41615, + "ancestry": 30922, + "anch": 9489, + "anche": 34679, + "ancho": 26610, + "anchor": 20030, + "anchor": 13201, + "anchorage": 31950, + "anchored": 45926, + "anchors": 37830, + "anci": 4192, + "ancient": 31495, + "ancient": 5810, + "ancies": 21647, + "ancing": 7797, + "anco": 15459, + "ancy": 16282, + "ancy": 3633, + "and": 672, + "and": 537, + "anda": 2911, + "andalu": 31443, + "andco": 36302, + "ande": 26889, + "ande": 30354, + "ander": 3740, + "ander": 3935, + "anders": 10880, + "andersen": 32661, + "anderson": 26683, + "anderson": 6510, + "andes": 24052, + "andfriends": 36871, + "andhi": 21617, + "andhra": 32452, + "andi": 28870, + "andi": 14354, + "andie": 46318, + "andme": 42831, + "ando": 35950, + "ando": 5986, + "andolan": 48965, + "andon": 36488, + "andor": 45243, + "andover": 44177, + "andr": 22661, + "andra": 46795, + "andra": 21730, + "andre": 2657, + "andre": 9400, + "andrea": 10895, + "andreas": 20444, + "andrei": 42137, + "andres": 25197, + "andretti": 44291, + "andrew": 11717, + "andrew": 4847, + "andrews": 14506, + "andri": 37208, + "andro": 4417, + "andro": 17980, + "android": 24284, + "android": 5191, + "androidgames": 46572, + "andromeda": 42942, + "andré": 35609, + "ands": 32257, + "andthe": 22111, + "andu": 44200, + "andum": 47266, + "andy": 9447, + "andy": 2888, + "ane": 5846, + "ane": 3051, + "anec": 33965, + "anem": 41395, + "anemone": 49019, + "aneous": 48273, + "anes": 15381, + "anese": 48778, + "anesthe": 30622, + "anesthesia": 43353, + "anew": 39084, + "anew": 47341, + "anews": 20919, + "aney": 22387, + "anfield": 26993, + "ang": 883, + "ang": 2704, + "anga": 11641, + "angames": 43178, + "angan": 28264, + "angas": 46180, + "ange": 2960, + "ange": 3039, + "angel": 5029, + "angel": 5130, + "angela": 12354, + "angeles": 7382, + "angeli": 15265, + "angelic": 41038, + "angelica": 38582, + "angelina": 28890, + "angelo": 14342, + "angelou": 41328, + "angels": 7809, + "anger": 32737, + "anger": 6788, + "angerous": 39716, + "angers": 29756, + "angh": 34030, + "angi": 28003, + "angi": 24301, + "angie": 18859, + "angle": 21749, + "angle": 6946, + "angled": 32322, + "angler": 22284, + "anglers": 41608, + "angles": 18627, + "anglesey": 31850, + "anglia": 32076, + "anglic": 28322, + "anglican": 33284, + "angling": 36824, + "anglo": 39515, + "anglo": 30408, + "ango": 19090, + "angola": 36636, + "angor": 41740, + "angp": 19992, + "angry": 33910, + "angry": 9054, + "angs": 18441, + "angst": 41714, + "angu": 11209, + "angular": 43584, + "angular": 24981, + "angularjs": 48608, + "angus": 19688, + "ani": 1326, + "ani": 3624, + "ania": 9866, + "anian": 9945, + "anians": 39393, + "anic": 23113, + "anie": 26697, + "anie": 7671, + "anil": 28589, + "anil": 34619, + "anim": 2190, + "animal": 10697, + "animal": 4668, + "animalrights": 42859, + "animals": 4995, + "animate": 40076, + "animated": 13360, + "animation": 10344, + "animations": 42870, + "animator": 42591, + "anime": 23314, + "anime": 6469, + "anin": 45735, + "aning": 30972, + "anir": 27089, + "anirud": 35278, + "anirudhofficial": 45917, + "anis": 40986, + "anis": 47556, + "anism": 20947, + "anist": 16729, + "anistan": 9727, + "aniston": 47344, + "anit": 23683, + "anita": 18544, + "anium": 14794, + "anj": 22443, + "anja": 43440, + "anjali": 38834, + "anjo": 47353, + "ank": 13339, + "ank": 10029, + "anka": 45324, + "ankara": 34309, + "ankle": 14777, + "ankles": 48688, + "ann": 850, + "ann": 5424, + "anna": 13821, + "anna": 2160, + "annab": 22336, + "annabelle": 47661, + "annah": 39166, + "annah": 14327, + "annak": 41720, + "annan": 32166, + "annapolis": 34491, + "annas": 48467, + "anne": 9139, + "anne": 4083, + "anned": 27352, + "anner": 12642, + "annes": 24343, + "annette": 36821, + "annex": 42958, + "annex": 46389, + "anni": 2438, + "anni": 13728, + "annie": 37270, + "annie": 12173, + "annies": 43184, + "annihil": 32734, + "annis": 24742, + "anniv": 31399, + "anniver": 29671, + "annivers": 42836, + "anniversaire": 30882, + "anniversary": 3048, + "anno": 9901, + "anno": 26871, + "annon": 26385, + "annot": 30411, + "announ": 1806, + "announce": 3682, + "announced": 4103, + "announcement": 6932, + "announcements": 23735, + "announcer": 33626, + "announces": 6500, + "announcing": 11593, + "annoy": 45138, + "annoyed": 29863, + "annoying": 15248, + "annu": 21698, + "annual": 2906, + "annually": 23703, + "anny": 34313, + "anny": 5291, + "ano": 5617, + "ano": 2658, + "anom": 21612, + "anomaly": 46811, + "anon": 47079, + "anon": 13667, + "anonym": 38605, + "anonymous": 15036, + "anoo": 25690, + "anor": 13243, + "anor": 16596, + "anos": 20132, + "another": 29274, + "another": 1380, + "anova": 24116, + "ans": 24586, + "ans": 885, + "ansari": 40748, + "ansel": 40356, + "answ": 3369, + "answe": 14391, + "answer": 4518, + "answered": 14499, + "answering": 18280, + "answers": 8692, + "ant": 1103, + "ant": 773, + "anta": 3023, + "antag": 41745, + "antal": 39355, + "antalya": 47440, + "antan": 32899, + "antarc": 21338, + "antarctic": 27077, + "antarctica": 22587, + "ante": 19311, + "ante": 9769, + "antebellum": 41683, + "antelope": 39177, + "anten": 35517, + "antenna": 26370, + "anter": 46508, + "antes": 14927, + "antgrasso": 39074, + "anth": 3737, + "anth": 29741, + "antha": 47981, + "anthe": 34167, + "anthem": 12504, + "anthi": 45261, + "anthology": 21009, + "anthony": 17477, + "anthony": 6113, + "anthro": 10019, + "anthropo": 18538, + "anthropology": 32407, + "anthus": 37639, + "anti": 3120, + "anti": 3564, + "antibio": 18954, + "antibiotic": 34387, + "antibiotics": 29499, + "antibody": 49018, + "antic": 8260, + "anticip": 11435, + "anticipate": 38280, + "anticipated": 18605, + "anticipating": 48067, + "anticipation": 26983, + "antics": 37126, + "antidote": 45476, + "antifa": 35926, + "antigua": 39910, + "antine": 17641, + "antino": 27818, + "antioxid": 23010, + "antioxidant": 37452, + "antioxidants": 34208, + "antiqu": 21745, + "antique": 46517, + "antique": 9060, + "antiques": 17365, + "antis": 19748, + "antisemitism": 36630, + "antit": 37833, + "antitrust": 49343, + "antlers": 47720, + "antly": 5265, + "anto": 16826, + "anto": 24486, + "antoine": 25188, + "anton": 5497, + "anton": 19644, + "antoni": 39958, + "antonio": 30497, + "antonio": 7842, + "antony": 30707, + "antrim": 40252, + "ants": 1589, + "antv": 47520, + "antw": 44460, + "antwer": 26970, + "antwerp": 33797, + "antz": 25684, + "anu": 8537, + "anu": 17152, + "anup": 29617, + "anus": 27084, + "anush": 22765, + "anushka": 42080, + "anushka": 39822, + "anushkasharma": 44203, + "anwar": 34261, + "anxi": 9021, + "anxiety": 11103, + "anxious": 27793, + "any": 1307, + "any": 1504, + "anya": 11173, + "anybody": 10071, + "anyi": 41632, + "anymore": 7372, + "anyone": 2302, + "anything": 3582, + "anytime": 13924, + "anyway": 8931, + "anyways": 19778, + "anywhere": 8863, + "anz": 14445, + "anz": 19425, + "anza": 14669, + "anzac": 31977, + "ao": 7313, + "ao": 5703, + "aoa": 47119, + "aoc": 31918, + "aofficial": 30840, + "aoki": 33602, + "aol": 40643, + "aon": 30928, + "aon": 48476, + "aor": 32044, + "aos": 46860, + "ap": 688, + "ap": 2728, + "apa": 36954, + "apa": 13537, + "apac": 34320, + "apache": 23921, + "apal": 38017, + "apan": 36562, + "apar": 9161, + "apark": 32528, + "apart": 6474, + "apart": 7803, + "aparthe": 25121, + "apartheid": 26597, + "apartment": 8285, + "apartments": 15791, + "aparty": 26767, + "apat": 31755, + "apathy": 18145, + "apc": 20300, + "apd": 44563, + "ape": 6098, + "ape": 2609, + "apec": 47530, + "aper": 13681, + "aper": 5858, + "apers": 15846, + "apes": 9550, + "apeu": 19040, + "apex": 41935, + "apex": 23712, + "aph": 16341, + "aph": 29491, + "apha": 47104, + "apho": 21758, + "aphra": 44147, + "api": 23342, + "api": 14674, + "apia": 44259, + "apic": 40679, + "aping": 18456, + "apink": 35725, + "apis": 37575, + "apk": 27648, + "apo": 4089, + "apo": 19758, + "apocaly": 13932, + "apocalypse": 17571, + "apocalyptic": 35675, + "apol": 5023, + "apolice": 45663, + "apolis": 9598, + "apollo": 48213, + "apollo": 11554, + "apolo": 31094, + "apolog": 25530, + "apologe": 42908, + "apologi": 14977, + "apologies": 21959, + "apologise": 39608, + "apologize": 22879, + "apologizes": 35298, + "apology": 20768, + "apor": 21871, + "apore": 6679, + "apost": 20309, + "apostle": 33051, + "apostles": 48457, + "app": 882, + "app": 2231, + "appa": 4884, + "appa": 13110, + "appalach": 30523, + "appalachian": 36806, + "appalling": 44797, + "appar": 26698, + "apparatus": 37716, + "apparel": 13972, + "apparent": 23963, + "apparently": 5287, + "appe": 3748, + "appe": 45949, + "appeal": 9625, + "appealing": 25909, + "appeals": 22447, + "appear": 5544, + "appear": 9308, + "appearance": 7238, + "appearances": 17214, + "appeared": 11561, + "appearing": 18759, + "appears": 8743, + "appell": 43833, + "appen": 37201, + "appen": 26589, + "apper": 18780, + "appet": 21686, + "appeti": 24179, + "appetite": 24481, + "appetizer": 36065, + "applau": 24713, + "applaud": 42152, + "applause": 22650, + "apple": 8629, + "apple": 3055, + "applemusic": 21390, + "apples": 14032, + "appleton": 45250, + "appli": 15495, + "appliance": 33677, + "appliances": 22134, + "applic": 4235, + "applicable": 37927, + "applicants": 28035, + "application": 7241, + "applications": 7341, + "applied": 12636, + "applies": 24910, + "apply": 4356, + "applying": 17965, + "appo": 5433, + "appoint": 36190, + "appointed": 11087, + "appointment": 10890, + "appointments": 23439, + "appoints": 25132, + "apprais": 36972, + "appraisal": 46108, + "appreci": 3474, + "appreciate": 6263, + "appreciated": 9264, + "appreciates": 36573, + "appreciating": 39352, + "appreciation": 9212, + "appreciationday": 37438, + "appreciative": 45074, + "appren": 10582, + "apprentic": 15662, + "apprentice": 19122, + "apprentice": 17985, + "apprentices": 38252, + "apprenticeship": 26939, + "apprenticeships": 35425, + "appro": 2398, + "approach": 7781, + "approach": 6241, + "approached": 36499, + "approaches": 14962, + "approaching": 12164, + "appropri": 8446, + "appropriate": 10768, + "appropriately": 30383, + "appropriation": 49110, + "approval": 13549, + "approve": 19064, + "approved": 9412, + "approves": 18107, + "approx": 18266, + "approxim": 14201, + "approximately": 16128, + "apps": 7020, + "appstore": 31377, + "appt": 48112, + "appy": 34420, + "apr": 39396, + "apr": 11177, + "apra": 37027, + "apric": 25923, + "apricot": 30815, + "april": 23548, + "april": 2484, + "apro": 42712, + "apro": 49051, + "apron": 29502, + "aps": 8868, + "apse": 31843, + "apt": 17921, + "aptly": 47313, + "apu": 22166, + "apur": 36900, + "apur": 45193, + "aq": 14018, + "aq": 26862, + "aqu": 4458, + "aqua": 18613, + "aquaculture": 41885, + "aquaman": 35098, + "aquari": 37605, + "aquarium": 16814, + "aquarius": 38879, + "aquatic": 22658, + "aque": 35927, + "aque": 37268, + "aqui": 36826, + "aquino": 33796, + "ar": 516, + "ar": 625, + "ara": 24161, + "ara": 3340, + "arab": 5405, + "arab": 12028, + "arabia": 11746, + "arabian": 24663, + "arabic": 16709, + "arabs": 39155, + "arac": 47620, + "arach": 37689, + "arag": 41502, + "araj": 45142, + "arak": 23416, + "aram": 19223, + "aram": 21473, + "arama": 49066, + "aran": 20839, + "aran": 19641, + "aras": 36399, + "arat": 30856, + "arav": 35836, + "arbit": 20267, + "arbitr": 22702, + "arbitration": 34845, + "arbor": 33516, + "arbor": 24878, + "arboretum": 41719, + "arc": 4997, + "arc": 11592, + "arca": 25189, + "arca": 37612, + "arcade": 13331, + "arcadia": 38372, + "arch": 2458, + "arch": 8557, + "archa": 45619, + "archae": 10121, + "archaeological": 26163, + "archaeologists": 45035, + "archaeology": 14868, + "archan": 33359, + "archbishop": 23994, + "arche": 22474, + "archer": 21824, + "archers": 38407, + "archery": 23935, + "arches": 30771, + "archi": 4479, + "archie": 20557, + "archipel": 39750, + "archipelago": 43025, + "architec": 3359, + "architect": 12192, + "architects": 13290, + "architectural": 15360, + "architecture": 39038, + "architecture": 4920, + "archival": 39249, + "archive": 42257, + "archive": 10548, + "archived": 42379, + "archives": 9411, + "archy": 15643, + "arctic": 29716, + "arctic": 9138, + "ard": 3793, + "ard": 746, + "arden": 44600, + "arden": 27057, + "ardi": 23932, + "ardi": 19837, + "ardo": 35735, + "ardo": 9394, + "ards": 1654, + "ardu": 20906, + "arduino": 25398, + "are": 1076, + "are": 631, + "area": 2445, + "areas": 5429, + "arec": 18136, + "areclipse": 36030, + "ared": 5369, + "arel": 12798, + "arella": 24784, + "arelli": 48619, + "aren": 4033, + "aren": 4318, + "arena": 5463, + "arenas": 47860, + "arent": 37487, + "arer": 14857, + "arers": 33159, + "ares": 12224, + "arest": 11708, + "aret": 22247, + "areth": 47725, + "aretha": 42090, + "areyou": 37607, + "arez": 13108, + "arg": 27285, + "argent": 7812, + "argentina": 9789, + "argentine": 32582, + "argon": 40737, + "argos": 37443, + "argu": 7440, + "arguably": 30899, + "argue": 19788, + "argued": 48153, + "argues": 30045, + "arguing": 26549, + "argument": 16224, + "arguments": 24693, + "argus": 44300, + "argy": 21066, + "argyle": 36179, + "argyll": 40667, + "ari": 1221, + "ari": 3681, + "aria": 8883, + "arial": 42431, + "arian": 29980, + "arian": 6953, + "ariana": 14892, + "arianag": 23025, + "arianagrande": 23321, + "arianism": 44351, + "arians": 19104, + "arias": 22567, + "arie": 18774, + "ariel": 47959, + "ariel": 21025, + "aries": 5213, + "arif": 46621, + "arily": 12993, + "arin": 29564, + "arin": 18612, + "arina": 29271, + "arine": 29586, + "aring": 2142, + "ario": 8862, + "arios": 25392, + "aris": 15227, + "arise": 26490, + "arist": 12110, + "aristo": 25666, + "aristotle": 49156, + "arities": 31069, + "arity": 16608, + "arium": 11809, + "arius": 21482, + "ariz": 6516, + "arized": 40167, + "arizon": 28936, + "arizona": 7106, + "arjun": 24565, + "arjun": 20477, + "arjuna": 43835, + "ark": 11921, + "ark": 12010, + "arkansas": 12227, + "arkham": 36381, + "arl": 48542, + "arlington": 44940, + "arlington": 17865, + "arly": 3637, + "arm": 5671, + "arm": 4793, + "arma": 15887, + "arma": 38716, + "armad": 37897, + "armada": 34938, + "armagh": 44313, + "armani": 31314, + "armb": 37096, + "armchair": 45757, + "armed": 40471, + "armed": 8202, + "armen": 13145, + "armenia": 22008, + "armenian": 24891, + "armies": 46686, + "armin": 45481, + "arming": 19766, + "armist": 38150, + "armistice": 46765, + "armor": 16167, + "armored": 28214, + "armory": 38610, + "armour": 18503, + "armoured": 42514, + "arms": 5706, + "armstrong": 15005, + "army": 13541, + "army": 3133, + "armys": 27311, + "arn": 9348, + "arn": 37597, + "arnau": 45556, + "arne": 43509, + "arney": 35962, + "arnold": 49096, + "arnold": 13609, + "arns": 46692, + "aro": 7514, + "aro": 11551, + "aroa": 48209, + "arom": 16831, + "aroma": 40143, + "aroma": 26390, + "aromas": 47439, + "aromatherapy": 42584, + "aromatic": 39669, + "aron": 30855, + "aron": 28926, + "aroo": 47581, + "arora": 31897, + "arosa": 44264, + "arose": 44262, + "around": 35615, + "around": 1630, + "arqu": 35654, + "arquitec": 41703, + "arr": 39106, + "arr": 42489, + "arra": 32918, + "arra": 43827, + "arrahman": 44554, + "arran": 45722, + "arrang": 16711, + "arrange": 15410, + "arrange": 26311, + "arranged": 22451, + "arrangement": 23822, + "arrangements": 23792, + "arranging": 35321, + "array": 17293, + "arre": 4374, + "arrell": 28846, + "arrest": 9320, + "arrested": 5845, + "arresting": 43930, + "arrests": 20683, + "arri": 2115, + "arrival": 9073, + "arrivals": 19583, + "arrive": 8851, + "arrived": 3514, + "arrives": 9905, + "arriving": 10884, + "arro": 15729, + "arrog": 26997, + "arrogance": 47025, + "arrogant": 40582, + "arrow": 30920, + "arrow": 11149, + "arrowhead": 46393, + "arrows": 24768, + "arroyo": 45237, + "ars": 42815, + "ars": 864, + "arse": 22665, + "arsen": 5330, + "arsenal": 45234, + "arsenal": 6084, + "arsene": 32117, + "arson": 29937, + "art": 1486, + "art": 794, + "arta": 12031, + "arte": 13482, + "arte": 12947, + "artem": 40387, + "artemis": 45256, + "arten": 37043, + "arter": 29449, + "artery": 40062, + "artes": 48629, + "artforsale": 48239, + "artgallery": 31982, + "arth": 7146, + "arth": 20265, + "arthistory": 39313, + "arthr": 20807, + "arthritis": 22916, + "arthro": 43255, + "arthur": 35660, + "arthur": 8550, + "arti": 1635, + "arti": 34601, + "artic": 3003, + "articho": 30937, + "artichoke": 39647, + "article": 3550, + "articles": 11939, + "articul": 40343, + "articulate": 45444, + "artif": 8950, + "artifact": 37718, + "artifacts": 30249, + "artificial": 19357, + "artificial": 12040, + "artificialintelligence": 20799, + "artillery": 24465, + "artin": 33168, + "artin": 48540, + "artis": 41794, + "artisan": 36389, + "artisan": 21535, + "artisans": 40140, + "artist": 14326, + "artist": 2456, + "artiste": 41402, + "artistic": 12421, + "artiston": 48443, + "artistry": 38570, + "artists": 4899, + "artistson": 32127, + "artistsontwitter": 39469, + "artlovers": 35617, + "arto": 28464, + "artof": 31751, + "artoftheday": 43990, + "arton": 46744, + "arts": 22040, + "arts": 3812, + "artsy": 31588, + "arturo": 38591, + "artwit": 36713, + "artwork": 4188, + "artworks": 26215, + "arty": 45417, + "arty": 25916, + "aru": 13757, + "aru": 23907, + "aruba": 40131, + "arugula": 40770, + "arum": 48732, + "arun": 16105, + "arun": 31877, + "arunach": 47260, + "arunjaitley": 44874, + "arus": 22644, + "arvin": 16971, + "arvind": 21209, + "arvind": 41079, + "arvindkejriwal": 22971, + "arvo": 45726, + "arwx": 29824, + "ary": 4617, + "ary": 856, + "arya": 23594, + "aryan": 34966, + "as": 587, + "as": 601, + "asa": 39676, + "asa": 11914, + "asad": 42376, + "asaki": 22455, + "asam": 40603, + "asan": 22379, + "asan": 17841, + "asana": 42363, + "asant": 25536, + "asants": 37766, + "asap": 24199, + "asap": 10822, + "asar": 24733, + "asar": 49299, + "asb": 31186, + "asbe": 32113, + "asbestos": 33765, + "asc": 22720, + "asc": 23305, + "ascen": 20767, + "ascension": 35499, + "ascent": 36625, + "asci": 12753, + "asco": 25578, + "asco": 17488, + "ascot": 23723, + "ascri": 15506, + "asd": 36988, + "asda": 29391, + "asdf": 36857, + "asdfghj": 42758, + "asdfghjkl": 47660, + "ase": 8083, + "ase": 894, + "asean": 24472, + "aseball": 46903, + "ased": 2134, + "asen": 41085, + "aser": 39615, + "aser": 7209, + "ases": 3762, + "asf": 25863, + "asg": 34813, + "ash": 2067, + "ash": 2612, + "asha": 40572, + "asha": 13472, + "ashamed": 20633, + "ashby": 46531, + "ashe": 48523, + "ashe": 31752, + "asher": 37585, + "ashes": 12587, + "asheville": 28897, + "ashford": 37796, + "ashi": 15563, + "ashi": 15934, + "ashish": 33145, + "ashland": 39938, + "ashleigh": 49356, + "ashley": 17825, + "ashley": 8957, + "asho": 20273, + "ashok": 38141, + "ashore": 31194, + "ashram": 43445, + "ashton": 43264, + "ashton": 12228, + "ashtra": 18118, + "asi": 3596, + "asi": 12562, + "asia": 5741, + "asian": 21737, + "asian": 7128, + "asiangames": 49108, + "asians": 36771, + "asics": 31097, + "aside": 13676, + "asif": 37302, + "asim": 46050, + "asin": 48432, + "asin": 44347, + "asing": 4194, + "asingly": 15803, + "asion": 31753, + "asis": 12398, + "ask": 11027, + "ask": 2765, + "asked": 3993, + "asking": 5914, + "asks": 7953, + "asl": 41650, + "asleep": 10749, + "asley": 28206, + "asli": 44290, + "asm": 13851, + "asma": 38497, + "asmsg": 19839, + "aso": 30343, + "aso": 27932, + "asober": 43749, + "asocial": 48557, + "ason": 1163, + "asone": 31249, + "asons": 4249, + "asos": 37924, + "asot": 47968, + "asp": 17814, + "asp": 36666, + "asparag": 20301, + "asparagus": 20604, + "aspe": 10894, + "aspect": 19681, + "aspects": 18203, + "aspen": 35695, + "aspen": 25712, + "asper": 32991, + "asph": 28019, + "asphalt": 30574, + "aspir": 12669, + "aspirations": 36127, + "aspire": 24836, + "aspiring": 21862, + "asports": 43695, + "asr": 48052, + "asroma": 41000, + "ass": 12664, + "ass": 5301, + "assa": 47715, + "assad": 18699, + "assam": 19930, + "assan": 26352, + "assange": 27565, + "assas": 9603, + "assassin": 14366, + "assassin": 20029, + "assassinated": 40488, + "assassination": 24907, + "assassins": 34918, + "assassinscre": 36428, + "assassinscreed": 46082, + "assau": 7908, + "assaul": 19596, + "assault": 9679, + "assaulted": 30785, + "assaulting": 44143, + "asse": 3166, + "asse": 38600, + "assel": 37582, + "assemb": 5531, + "assemble": 26169, + "assembled": 22627, + "assemblies": 47406, + "assembling": 38670, + "assembly": 34542, + "assembly": 7059, + "assen": 38651, + "asser": 25665, + "asses": 21596, + "assess": 9209, + "assess": 23211, + "assessed": 44160, + "assessing": 31364, + "assessment": 10590, + "assessments": 32753, + "asset": 48463, + "asset": 13039, + "assets": 13170, + "assi": 2907, + "assi": 39540, + "assie": 31624, + "assign": 14190, + "assigned": 25767, + "assignment": 17342, + "assignments": 34257, + "assim": 36394, + "assimil": 43467, + "assist": 26558, + "assist": 10286, + "assistance": 11685, + "assistant": 6799, + "assistants": 31054, + "assisted": 18095, + "assisting": 24243, + "assists": 12675, + "assn": 44208, + "asso": 17617, + "assoc": 18891, + "associ": 3566, + "associate": 11777, + "associated": 11164, + "associates": 17358, + "association": 5578, + "associations": 33209, + "assor": 38604, + "assorted": 36701, + "assortment": 43112, + "asst": 24767, + "assu": 8328, + "assume": 19294, + "assumed": 37661, + "assuming": 29422, + "assump": 41182, + "assumption": 40773, + "assumptions": 45948, + "assurance": 28408, + "assure": 39161, + "assured": 25591, + "assures": 41988, + "assy": 29940, + "assy": 12963, + "ast": 1761, + "ast": 1242, + "asta": 43269, + "aste": 25033, + "aste": 25579, + "aster": 11013, + "aster": 9526, + "asteroid": 32253, + "asters": 33139, + "asth": 16684, + "asthma": 24610, + "asthour": 41238, + "astic": 15876, + "asting": 29984, + "astle": 46141, + "asto": 47275, + "aston": 24760, + "aston": 13879, + "astoni": 21962, + "astonishing": 27110, + "astonmartin": 40760, + "astor": 26391, + "astor": 47086, + "astoria": 34798, + "astounding": 37748, + "astr": 37609, + "astra": 47205, + "astra": 36079, + "astral": 45889, + "astri": 31243, + "astrid": 46499, + "astro": 8563, + "astro": 15318, + "astrology": 28526, + "astron": 7982, + "astronaut": 18376, + "astronauts": 29733, + "astronom": 23264, + "astronomer": 40036, + "astronomers": 44268, + "astronomical": 39775, + "astronomy": 17472, + "astrophotography": 38559, + "astros": 17598, + "asts": 10452, + "astu": 43137, + "astur": 45795, + "asu": 13157, + "asu": 16001, + "asun": 36044, + "asure": 3813, + "asus": 27269, + "aswell": 42978, + "asx": 38906, + "asy": 8524, + "asy": 2333, + "asylum": 15638, + "asym": 32539, + "at": 527, + "at": 536, + "ata": 4236, + "atable": 23909, + "atal": 24877, + "atal": 24797, + "atan": 33446, + "atar": 20128, + "atar": 7995, + "atari": 21549, + "atas": 30057, + "atay": 39518, + "atc": 28383, + "atch": 15938, + "atd": 33890, + "ate": 992, + "ate": 671, + "ateam": 42784, + "ateau": 16359, + "atec": 37352, + "atech": 31306, + "ated": 14589, + "ated": 943, + "atedly": 24698, + "atee": 32839, + "ateful": 5419, + "atelier": 29932, + "ately": 3862, + "atem": 17116, + "aten": 47984, + "atene": 30405, + "ateneo": 33904, + "ater": 18597, + "ater": 5877, + "ateral": 18819, + "aters": 22364, + "ates": 20370, + "ates": 1150, + "atest": 1705, + "ateur": 43677, + "atf": 28013, + "ath": 1374, + "ath": 1649, + "atha": 22530, + "atham": 23383, + "athan": 41260, + "athan": 26701, + "athe": 8963, + "athed": 47402, + "atheism": 25823, + "atheist": 22571, + "atheists": 47155, + "athen": 29112, + "athena": 30705, + "athens": 13524, + "ather": 6171, + "ather": 1817, + "athered": 34091, + "athers": 17266, + "athi": 28918, + "athing": 36069, + "athle": 3310, + "athlete": 7388, + "athletes": 7125, + "athletic": 33182, + "athletic": 9028, + "athletics": 7019, + "athlon": 14670, + "athome": 38217, + "athon": 4951, + "aths": 28835, + "athy": 34488, + "athy": 13183, + "ati": 591, + "ati": 6751, + "atia": 10908, + "atic": 20248, + "atic": 2647, + "atically": 13558, + "atics": 15666, + "atie": 30137, + "aties": 40060, + "atif": 41592, + "atiku": 37912, + "atile": 15474, + "atility": 23373, + "atime": 20158, + "atin": 36903, + "atin": 23047, + "atine": 39741, + "ating": 25653, + "ating": 1074, + "atio": 35401, + "ation": 2265, + "ation": 656, + "ational": 14205, + "ational": 3108, + "ationals": 44593, + "ationday": 20082, + "ations": 986, + "atis": 45456, + "atis": 41142, + "atism": 45638, + "ative": 18422, + "ative": 1648, + "atively": 11929, + "atives": 5629, + "ativity": 25166, + "atkins": 27734, + "atkinson": 28908, + "atl": 5411, + "atl": 10629, + "atla": 36043, + "atlan": 6818, + "atlanta": 39964, + "atlanta": 6839, + "atlantic": 28804, + "atlantic": 8189, + "atlantis": 27790, + "atlas": 15775, + "atle": 21170, + "atleast": 33231, + "atleti": 46067, + "atletico": 27501, + "atm": 14127, + "atmo": 8271, + "atmosphere": 10506, + "atmospheric": 24223, + "ato": 7987, + "ato": 4364, + "atoday": 26799, + "atom": 22418, + "atom": 24031, + "atomic": 18996, + "atoms": 41434, + "aton": 31525, + "aton": 10012, + "atop": 17455, + "ator": 10748, + "ator": 1962, + "atore": 28314, + "atorial": 32040, + "atories": 35678, + "atorium": 41306, + "ators": 3389, + "atory": 5920, + "atos": 41643, + "atour": 42967, + "atown": 24000, + "atp": 38105, + "atp": 19817, + "atr": 43247, + "atra": 20227, + "atra": 14401, + "atravel": 36981, + "atre": 46057, + "atri": 13882, + "atri": 38889, + "atric": 32238, + "atric": 13652, + "atrics": 36253, + "atrist": 41879, + "atrium": 29725, + "atrix": 43003, + "atro": 18724, + "atroc": 36197, + "atrocities": 37551, + "atry": 28334, + "ats": 46890, + "ats": 1032, + "atsu": 26531, + "att": 1017, + "att": 7103, + "atta": 7282, + "atta": 9146, + "attach": 43676, + "attach": 35653, + "attached": 11038, + "attachment": 28638, + "attack": 24971, + "attack": 3815, + "attacked": 12366, + "attacker": 39288, + "attackers": 47701, + "attacking": 16813, + "attacks": 7321, + "attain": 46459, + "attar": 37110, + "attemp": 4933, + "attempt": 7409, + "attempted": 17408, + "attempting": 18195, + "attempts": 15610, + "atten": 4084, + "atten": 32408, + "attenborough": 45860, + "attend": 9841, + "attend": 5802, + "attendance": 11928, + "attendant": 35424, + "attended": 8140, + "attendees": 14648, + "attending": 6696, + "attends": 22248, + "attention": 4936, + "atters": 30675, + "atthe": 21489, + "atti": 49265, + "atti": 16235, + "attic": 26766, + "attire": 21222, + "attitude": 10648, + "attitudes": 27611, + "attle": 14685, + "attle": 5030, + "attn": 25677, + "attor": 8856, + "attorney": 10372, + "attorneys": 29113, + "attrac": 7154, + "attract": 17010, + "attracted": 28493, + "attracting": 31909, + "attraction": 16807, + "attractions": 22307, + "attractive": 12231, + "attracts": 31024, + "attribu": 24624, + "attributed": 37520, + "attributes": 40763, + "attu": 43173, + "atty": 36705, + "atu": 15191, + "atu": 24295, + "atuesday": 34841, + "atul": 1744, + "atul": 43948, + "atum": 48295, + "atur": 14986, + "aturday": 29027, + "ature": 25305, + "ature": 4490, + "atures": 7358, + "atus": 14795, + "atv": 19598, + "atwood": 45680, + "atwork": 39680, + "atx": 34849, + "atx": 20136, + "aty": 40974, + "aty": 33107, + "atz": 30432, + "au": 627, + "au": 2566, + "aua": 45906, + "aub": 45938, + "auberg": 49382, + "aubre": 25899, + "aubrey": 34110, + "auburn": 42269, + "auburn": 14534, + "auc": 24489, + "auch": 43024, + "auck": 14588, + "auckland": 16072, + "auction": 48160, + "auction": 6462, + "auctioned": 41073, + "auctions": 24876, + "aucus": 47374, + "aud": 16107, + "aud": 19711, + "audi": 5091, + "audi": 10277, + "audible": 33227, + "audience": 6863, + "audiences": 22328, + "audio": 13792, + "audio": 5766, + "audiobook": 26282, + "audit": 12505, + "audit": 17625, + "auditi": 37377, + "audition": 18673, + "auditions": 21134, + "auditor": 38050, + "auditorium": 15063, + "audre": 16075, + "audrey": 18812, + "audu": 27934, + "audubon": 40275, + "auer": 33460, + "auf": 28924, + "aug": 15397, + "aug": 5720, + "auga": 22797, + "augh": 28310, + "augh": 14005, + "augmente": 48356, + "augmented": 32708, + "augu": 2610, + "august": 24353, + "august": 3171, + "augusta": 26144, + "augustine": 27397, + "augustus": 36835, + "auk": 19058, + "aul": 20695, + "aul": 34391, + "ault": 47253, + "ault": 10219, + "aun": 10608, + "aun": 38721, + "aunt": 12685, + "auntie": 23783, + "aunty": 29528, + "aur": 8156, + "aur": 17282, + "aura": 27728, + "aure": 36010, + "aureli": 35980, + "auror": 30067, + "aurora": 13500, + "aus": 10624, + "aus": 7630, + "ausa": 37384, + "ausbiz": 46543, + "ausch": 33926, + "auschwitz": 36523, + "ausopen": 27831, + "ausp": 35039, + "auspicious": 38806, + "auspol": 8241, + "aussi": 19762, + "aussie": 40230, + "aussie": 14424, + "aussies": 35727, + "aust": 26301, + "aust": 25418, + "austen": 29885, + "auster": 25030, + "austerity": 26982, + "austin": 12845, + "austin": 5125, + "austinmahone": 34678, + "austr": 2518, + "australi": 13798, + "australia": 3444, + "australian": 23630, + "australian": 6258, + "australians": 31488, + "austri": 8946, + "austria": 11960, + "austrian": 20638, + "ausv": 35206, + "ausvotes": 34661, + "aut": 12343, + "auth": 2381, + "auth": 38247, + "authent": 18158, + "authentic": 41266, + "authentic": 10369, + "authentication": 39746, + "authenticity": 35734, + "autho": 34552, + "author": 14447, + "author": 4358, + "authored": 37928, + "authori": 19207, + "authorities": 12729, + "authority": 10524, + "authorization": 48854, + "authorized": 28463, + "authors": 10765, + "auti": 8200, + "autism": 36256, + "autism": 11244, + "autisma": 43324, + "autistic": 29360, + "auto": 3917, + "auto": 5668, + "autobiography": 31509, + "autodesk": 40415, + "autograph": 10657, + "autograph": 13722, + "autographed": 16309, + "autographs": 17376, + "autoimmune": 45509, + "autom": 4114, + "automate": 43203, + "automated": 19022, + "automatic": 12126, + "automatically": 20725, + "automation": 12328, + "automobi": 44813, + "automobile": 25258, + "automotive": 12607, + "auton": 13100, + "autonews": 43975, + "autonom": 17870, + "autonomous": 20722, + "autonomy": 39223, + "autopsy": 44436, + "autos": 31118, + "autoshow": 46788, + "auts": 21140, + "autu": 5445, + "autum": 31783, + "autumn": 28940, + "autumn": 6110, + "autumnal": 35481, + "aux": 18154, + "aux": 8909, + "auxiliary": 37778, + "av": 722, + "av": 8484, + "ava": 12385, + "avage": 31505, + "avail": 1651, + "avail": 16686, + "availability": 17551, + "available": 1685, + "aval": 18012, + "avalan": 23970, + "avalanche": 25815, + "avalley": 45082, + "avalon": 30436, + "avan": 27971, + "avan": 33351, + "avant": 24305, + "avar": 33423, + "avatar": 18219, + "ave": 10062, + "ave": 4860, + "avec": 25828, + "aved": 47918, + "avel": 46817, + "avel": 48088, + "aven": 5963, + "aven": 32971, + "aveng": 21935, + "avenger": 24799, + "avengers": 39413, + "avengers": 12016, + "avengersendgame": 49342, + "avent": 22700, + "avenue": 7042, + "aver": 8788, + "aver": 11403, + "average": 6254, + "averaged": 37310, + "averages": 48982, + "averaging": 35266, + "avery": 20313, + "aves": 14023, + "avfc": 21304, + "avg": 19452, + "avgeek": 11114, + "avi": 3324, + "avi": 11297, + "avia": 38710, + "avian": 24115, + "aviation": 27717, + "aviation": 7617, + "aviator": 38921, + "aviators": 48011, + "avici": 46192, + "avicii": 49158, + "avid": 19118, + "avier": 14598, + "avila": 45339, + "aville": 40689, + "avin": 46204, + "avis": 45163, + "avis": 19765, + "aviv": 22130, + "aviva": 47122, + "aviz": 27607, + "avl": 44749, + "avo": 4496, + "avo": 32400, + "avoc": 12291, + "avocado": 14135, + "avocados": 48911, + "avoi": 16797, + "avoid": 30448, + "avoid": 5983, + "avoidance": 47983, + "avoided": 32103, + "avoiding": 22086, + "avoids": 48220, + "avon": 22790, + "avon": 17348, + "avril": 37763, + "avs": 31896, + "avut": 44472, + "avy": 29973, + "aw": 808, + "aw": 5557, + "awa": 4820, + "awa": 6872, + "await": 20769, + "awaited": 20092, + "awaiting": 14872, + "awaits": 15635, + "awak": 9776, + "awak": 41387, + "awake": 14695, + "awaken": 35412, + "awakening": 17017, + "awakens": 23191, + "awal": 42447, + "awal": 35090, + "awan": 48869, + "awan": 20420, + "awar": 5745, + "award": 36310, + "award": 2047, + "awarded": 7368, + "awarding": 37089, + "awards": 34528, + "awards": 2320, + "aware": 4427, + "aware": 7196, + "awareness": 19217, + "awareness": 4823, + "awarenessmonth": 34278, + "awarenessweek": 35294, + "away": 21088, + "away": 1520, + "aways": 12782, + "awaz": 18586, + "awd": 34846, + "awe": 1693, + "awe": 14106, + "aweather": 42142, + "aweather": 28681, + "awec": 38916, + "aweed": 29724, + "awesom": 16727, + "awesome": 30390, + "awesome": 1848, + "awesomeness": 22430, + "awful": 13617, + "awg": 46350, + "awgs": 35275, + "awh": 39566, + "awhile": 19171, + "awi": 15167, + "awil": 47271, + "awilliams": 42163, + "awk": 8888, + "awk": 40943, + "awkward": 42337, + "awkward": 10304, + "awn": 46222, + "awp": 43300, + "aws": 19658, + "awsome": 47196, + "awson": 36286, + "aww": 11568, + "awww": 15634, + "awwww": 26460, + "awx": 28385, + "ax": 3165, + "ax": 9203, + "axe": 19861, + "axel": 47889, + "axel": 32131, + "axes": 45970, + "axi": 30672, + "axial": 46550, + "axis": 19614, + "axle": 39003, + "axx": 47411, + "ay": 658, + "ay": 551, + "aya": 5917, + "ayala": 39827, + "ayama": 41194, + "ayan": 37781, + "ayan": 16269, + "ayana": 37400, + "ayas": 40904, + "ayat": 44902, + "ayat": 35720, + "aye": 21661, + "aye": 12446, + "ayer": 24852, + "ayers": 42783, + "ayesha": 46570, + "ayi": 33025, + "ayles": 44706, + "ayne": 35669, + "ayo": 21929, + "ayo": 18708, + "ayr": 23002, + "ayr": 36473, + "ayrshire": 32687, + "ays": 785, + "ayu": 40769, + "ayurve": 27185, + "ayurveda": 38986, + "ayush": 44831, + "ayy": 32514, + "ayyy": 41052, + "az": 854, + "az": 5468, + "aza": 22883, + "azad": 37838, + "azalea": 34087, + "azam": 34727, + "azar": 27911, + "azcardinals": 48846, + "aze": 41157, + "aze": 28485, + "azer": 19169, + "azerbai": 20649, + "azerbaijan": 23888, + "azhar": 47019, + "azi": 23914, + "azi": 18452, + "azine": 29140, + "azione": 48335, + "aziz": 41205, + "aziz": 29630, + "azo": 41227, + "azon": 36854, + "azores": 42826, + "azte": 33270, + "aztec": 34749, + "aztecs": 49387, + "azu": 27701, + "azu": 46963, + "azul": 39807, + "azure": 18514, + "azwx": 30262, + "azy": 24783, + "azz": 9817, + "azz": 26453, + "azza": 22255, + "azzi": 18758, + "azzle": 39974, + "azzo": 26779, + "azzur": 37055, + "azzy": 44534, + "añ": 23716, + "años": 41634, + "b": 65, + "b": 321, + "ba": 932, + "ba": 1792, + "baa": 33004, + "baahu": 34145, + "baahubali": 38663, + "bab": 1202, + "bab": 19039, + "baba": 12631, + "babe": 31177, + "babe": 7716, + "babes": 14253, + "babies": 6635, + "babs": 36217, + "babu": 21623, + "baby": 7268, + "baby": 1794, + "babygirl": 39554, + "babylon": 31928, + "babymetal": 45013, + "babys": 22266, + "babysitting": 34186, + "bac": 2791, + "bac": 25867, + "bacca": 40708, + "bach": 11773, + "bach": 8758, + "bachchan": 17690, + "bachel": 11283, + "bachelor": 45508, + "bachelor": 16766, + "bachelore": 26009, + "bachelorette": 29093, + "bacher": 49211, + "back": 1663, + "back": 893, + "backbone": 35635, + "backdrop": 20802, + "backed": 12721, + "backer": 22183, + "backers": 32934, + "background": 5994, + "backgrounds": 28215, + "backing": 14935, + "backlash": 31519, + "backpack": 14894, + "backpacking": 29524, + "backpacks": 37063, + "backs": 7562, + "backseat": 48812, + "backstage": 9236, + "backstreet": 46337, + "backthe": 26127, + "backto": 18703, + "backtoschool": 28730, + "backtothe": 43059, + "backup": 14415, + "backward": 37964, + "backwards": 21283, + "backyard": 12608, + "bacon": 48666, + "bacon": 7104, + "bacter": 11814, + "bacteria": 16556, + "bacterial": 26101, + "bad": 2564, + "bad": 2103, + "bada": 37475, + "badan": 39149, + "badass": 11616, + "baddest": 38112, + "baden": 36690, + "bader": 42254, + "badge": 11301, + "badger": 32686, + "badger": 22363, + "badgers": 22521, + "badges": 20084, + "badlands": 43192, + "badly": 13684, + "badminton": 21412, + "badoo": 33192, + "bados": 25755, + "bae": 32834, + "bae": 6855, + "baek": 18557, + "baek": 32702, + "baekhyun": 21572, + "baes": 46332, + "baf": 13616, + "baff": 35693, + "bafta": 29199, + "bag": 3408, + "bag": 3365, + "bage": 9698, + "bagel": 28777, + "bagels": 37489, + "baggage": 31402, + "bagged": 34047, + "bagh": 21659, + "bagh": 37271, + "baghdad": 30763, + "bago": 25105, + "bags": 6136, + "bagu": 27749, + "baguette": 45334, + "bah": 8372, + "bah": 16685, + "baha": 29592, + "baham": 43718, + "bahamas": 21224, + "bahan": 28704, + "bahn": 33452, + "bahrain": 12503, + "bai": 6232, + "bai": 23339, + "bail": 22933, + "bail": 16986, + "bailey": 27535, + "bailey": 10180, + "bain": 40784, + "bain": 21593, + "bair": 29059, + "baird": 40474, + "bait": 18010, + "baj": 20713, + "baja": 40418, + "baja": 28374, + "bajo": 32619, + "bak": 4059, + "bak": 23742, + "bakar": 41414, + "bake": 20736, + "bake": 11878, + "baked": 10364, + "baker": 27303, + "baker": 7743, + "bakers": 35293, + "bakers": 40231, + "bakersfield": 40149, + "bakery": 13377, + "bakes": 43057, + "bakhta": 44912, + "bakhtawar": 46937, + "bakhtawarbz": 47118, + "baking": 11467, + "baku": 46417, + "baku": 31852, + "bal": 1398, + "bal": 2282, + "bala": 20291, + "balaji": 48694, + "balance": 42894, + "balance": 6827, + "balanced": 15273, + "balances": 37733, + "balancing": 23541, + "balboa": 45098, + "balcony": 16169, + "bald": 11153, + "bald": 14875, + "baldhead": 29191, + "baldwin": 16242, + "bale": 48573, + "bale": 18873, + "bales": 42879, + "bali": 16432, + "bali": 10900, + "balkan": 48499, + "balkans": 42987, + "ball": 3807, + "ball": 1069, + "balla": 42246, + "ballad": 33472, + "ballarat": 46645, + "ballard": 31750, + "baller": 49194, + "baller": 25655, + "ballerina": 34962, + "ballers": 34173, + "ballet": 10703, + "balli": 29406, + "ballin": 47444, + "ballin": 33057, + "balling": 47588, + "ballis": 46675, + "ballistic": 36667, + "ballo": 8871, + "ballon": 36469, + "balloon": 13634, + "balloons": 18130, + "ballot": 14185, + "ballots": 35051, + "ballpark": 26080, + "ballroom": 15493, + "balls": 6927, + "bally": 17275, + "bally": 29451, + "balm": 24962, + "balmain": 45929, + "balo": 12395, + "baloch": 23173, + "balochistan": 21918, + "balot": 44615, + "balotelli": 45721, + "bals": 44154, + "balsam": 29121, + "balsamic": 32654, + "balt": 24441, + "balti": 8400, + "baltic": 23817, + "baltimore": 38502, + "baltimore": 9582, + "balu": 38093, + "bam": 6383, + "bam": 12686, + "bama": 20021, + "bambam": 34538, + "bambi": 46596, + "bamboo": 49322, + "bamboo": 16748, + "ban": 1159, + "ban": 2777, + "bana": 18428, + "banan": 38410, + "banana": 8922, + "bananas": 19121, + "banc": 39252, + "band": 4613, + "band": 1963, + "banda": 31865, + "bandai": 42054, + "bandana": 39265, + "bandcamp": 32229, + "banded": 37804, + "bandic": 44400, + "bandit": 27639, + "bandits": 33940, + "bandra": 41393, + "bands": 7858, + "bandung": 29512, + "bandwagon": 36432, + "bandwidth": 48859, + "bane": 9597, + "banerjee": 48102, + "banff": 29565, + "bang": 3524, + "bang": 6907, + "bangalore": 14697, + "banger": 24872, + "bangers": 38311, + "banging": 33033, + "bangkok": 12351, + "bangla": 10339, + "bangla": 45928, + "bangladesh": 11245, + "bangle": 37634, + "bangor": 31190, + "bangs": 27992, + "bangtan": 39131, + "bani": 19732, + "banjo": 27014, + "bank": 7061, + "bank": 2723, + "banker": 27316, + "bankers": 30599, + "bankholiday": 48868, + "banking": 9566, + "bankno": 49201, + "bankof": 39120, + "bankrup": 21904, + "bankrupt": 23077, + "bankrupt": 37288, + "bankruptcy": 23978, + "banks": 6367, + "banksy": 33350, + "bann": 5304, + "banned": 12012, + "banner": 9185, + "banners": 23145, + "banning": 26246, + "bannon": 29710, + "bano": 42947, + "banquet": 14254, + "bans": 15146, + "bant": 23301, + "bant": 46657, + "banter": 25535, + "bao": 39487, + "bao": 20408, + "bap": 7415, + "bap": 23754, + "bapti": 15477, + "baptism": 36765, + "baptist": 13274, + "baptiste": 45770, + "baptized": 45400, + "bar": 1040, + "bar": 2411, + "bara": 19345, + "barack": 18670, + "barack": 22481, + "barackobama": 18885, + "barak": 47419, + "barak": 16260, + "barang": 38446, + "barb": 24173, + "barb": 20913, + "barbados": 26992, + "barbar": 7906, + "barbara": 10937, + "barbarian": 42530, + "barbe": 18372, + "barbecue": 23501, + "barber": 19517, + "barber": 12296, + "barbershop": 37707, + "barbican": 47668, + "barbie": 16923, + "barca": 22942, + "barcel": 6134, + "barcelon": 47820, + "barcelona": 6412, + "barclay": 48877, + "barclay": 45276, + "barclays": 29538, + "bard": 39812, + "bard": 17514, + "bare": 16023, + "bare": 14318, + "barefoot": 30327, + "barely": 12684, + "bargain": 15076, + "bargaining": 41282, + "bargains": 34126, + "barge": 28272, + "bari": 21428, + "bari": 28016, + "barista": 31078, + "barit": 46300, + "bark": 32333, + "bark": 16560, + "barker": 20618, + "barking": 32676, + "barkley": 30266, + "barley": 22607, + "barlow": 25483, + "barn": 10490, + "barn": 10942, + "barnab": 43272, + "barnard": 44332, + "barne": 42527, + "barnes": 13102, + "barnet": 41943, + "barnett": 27650, + "barney": 24563, + "barns": 43759, + "barnsley": 37109, + "barnsley": 32153, + "baro": 17422, + "baro": 30817, + "baron": 48371, + "baron": 19349, + "baroness": 45056, + "barons": 45596, + "baroque": 25065, + "barr": 39473, + "barr": 22492, + "barra": 28442, + "barra": 33542, + "barrabest": 41376, + "barrac": 40835, + "barracks": 35822, + "barre": 13840, + "barre": 38257, + "barred": 33261, + "barrel": 11703, + "barrels": 22059, + "barren": 46743, + "barrett": 18701, + "barri": 8660, + "barric": 29189, + "barrie": 27090, + "barrier": 15706, + "barriers": 16321, + "barrington": 48954, + "barron": 34881, + "barrow": 42568, + "barrow": 24983, + "barry": 18028, + "barry": 8461, + "barrymore": 49310, + "bars": 8616, + "barstool": 44826, + "bart": 14838, + "bart": 12870, + "bartender": 33498, + "barthol": 48989, + "bartlett": 37130, + "bartol": 38209, + "barton": 48853, + "barton": 20345, + "baru": 16356, + "barun": 38278, + "barunsob": 41398, + "barça": 32788, + "bas": 1244, + "bas": 11420, + "basa": 26142, + "base": 2776, + "base": 4579, + "baseball": 23479, + "baseball": 3470, + "based": 35196, + "based": 2812, + "basel": 42803, + "basel": 20903, + "baseline": 40648, + "baseman": 45910, + "basement": 14792, + "bases": 20496, + "bash": 20462, + "bash": 10972, + "bashing": 37545, + "bashir": 42799, + "basic": 40452, + "basic": 7696, + "basically": 9125, + "basics": 15825, + "basil": 19225, + "basil": 14936, + "basilica": 27879, + "basin": 16117, + "basing": 47321, + "basis": 12278, + "baske": 3713, + "basket": 10338, + "basketball": 40023, + "basketball": 3835, + "baskets": 27787, + "basking": 39769, + "basque": 37175, + "bass": 22831, + "bass": 5992, + "bassett": 45992, + "bassist": 26496, + "bast": 28092, + "basti": 8559, + "bastille": 41874, + "bat": 2121, + "bat": 6575, + "bata": 39277, + "batb": 33962, + "batch": 9413, + "bate": 25034, + "bate": 28277, + "bateman": 41635, + "bates": 21727, + "batgirl": 46460, + "bath": 6064, + "bath": 5713, + "bathing": 20144, + "bathro": 21201, + "bathroom": 8470, + "bathrooms": 26434, + "baths": 19442, + "bathtub": 39942, + "bathurst": 36365, + "bati": 23362, + "bati": 37589, + "batman": 27811, + "batman": 7223, + "baton": 24331, + "bats": 14984, + "batsman": 35432, + "batt": 2407, + "batt": 48595, + "battalion": 20820, + "batter": 12654, + "batter": 31855, + "battered": 34375, + "batteries": 16666, + "battersea": 35839, + "battery": 7870, + "batting": 17401, + "battle": 7344, + "battle": 3528, + "battled": 37837, + "battlefield": 16055, + "battlefront": 42214, + "battleof": 47560, + "battles": 14213, + "battleship": 35165, + "battling": 17268, + "bau": 6055, + "bau": 34840, + "bauer": 22903, + "baugh": 41301, + "baum": 19840, + "bautista": 31881, + "bav": 21075, + "bavaria": 39977, + "bavarian": 44458, + "baw": 19808, + "bax": 21216, + "baxter": 26168, + "bay": 3631, + "bay": 2174, + "baya": 31573, + "bayan": 43895, + "bayarea": 28260, + "bayer": 48548, + "bayer": 29183, + "bayern": 14666, + "baylor": 21721, + "bayou": 33955, + "bays": 40156, + "baz": 10430, + "baz": 25268, + "bazaar": 20070, + "bazar": 49298, + "bb": 1174, + "bb": 3529, + "bba": 27762, + "bball": 15664, + "bbb": 33535, + "bbc": 5123, + "bbc": 5188, + "bbcc": 39052, + "bbce": 33818, + "bbcnews": 29370, + "bbcone": 28259, + "bbcqt": 37343, + "bbcr": 35802, + "bbcra": 17115, + "bbcradi": 49213, + "bbcradio": 22876, + "bbcsport": 49321, + "bbcspringwatch": 37358, + "bbctwo": 40395, + "bbcworld": 47340, + "bbe": 37559, + "bbed": 9077, + "bber": 7933, + "bbers": 36494, + "bbhutto": 28085, + "bbhuttozardari": 28135, + "bbi": 37047, + "bbin": 38553, + "bbing": 9787, + "bbins": 42504, + "bbl": 21961, + "bble": 26570, + "bble": 5924, + "bbled": 37626, + "bbles": 18093, + "bblo": 21231, + "bbloggers": 26614, + "bbly": 43031, + "bbm": 25382, + "bbmas": 22145, + "bbn": 28427, + "bbnaija": 20984, + "bbo": 21892, + "bbq": 41270, + "bbq": 6726, + "bbs": 10002, + "bbuk": 45978, + "bby": 11166, + "bby": 3810, + "bc": 3116, + "bc": 2162, + "bcc": 41509, + "bcci": 36138, + "bce": 36510, + "bcfc": 34359, + "bch": 36684, + "bcn": 25766, + "bcoz": 46373, + "bcpoli": 24389, + "bcs": 24909, + "bcu": 28299, + "bd": 24358, + "bd": 11165, + "bday": 33022, + "bday": 5781, + "bdg": 48418, + "bds": 26732, + "be": 571, + "be": 655, + "bea": 21886, + "bea": 20925, + "beach": 6068, + "beach": 2117, + "beaches": 12183, + "beachlife": 43824, + "beacon": 36883, + "beacon": 18858, + "beacons": 39395, + "bead": 31621, + "bead": 23557, + "beaded": 26661, + "beads": 14099, + "beagle": 30044, + "beak": 36498, + "beal": 45769, + "beale": 39717, + "beam": 35339, + "beam": 13663, + "beams": 23993, + "bean": 16471, + "bean": 5328, + "beanie": 21534, + "beans": 8302, + "bear": 6375, + "bear": 4298, + "bearable": 38608, + "bearcats": 33242, + "beard": 26157, + "beard": 9052, + "bearded": 28459, + "beardown": 43687, + "beards": 33020, + "bearer": 30686, + "bearers": 47986, + "bearing": 18370, + "bearings": 42083, + "bearish": 34829, + "bears": 6182, + "beasley": 43349, + "beast": 20847, + "beast": 6957, + "beastmode": 43076, + "beasts": 21771, + "beat": 3774, + "beat": 3018, + "beaten": 10864, + "beater": 41974, + "beati": 44386, + "beating": 10078, + "beatles": 11961, + "beatport": 31421, + "beatrice": 36922, + "beats": 6289, + "beatthe": 40550, + "beatty": 39903, + "beatz": 33363, + "beau": 1016, + "beau": 14298, + "beaufort": 45423, + "beaumont": 32857, + "beaut": 24559, + "beauti": 1154, + "beauties": 14874, + "beautiful": 13662, + "beautiful": 1215, + "beautifully": 10627, + "beauty": 12881, + "beauty": 2488, + "beav": 23260, + "beaver": 26432, + "beaver": 22874, + "beavers": 34513, + "beavs": 43909, + "bebe": 23331, + "bec": 6899, + "bec": 10773, + "became": 5464, + "because": 32714, + "because": 1631, + "becca": 27088, + "bech": 44055, + "beck": 8256, + "beck": 10396, + "becker": 26918, + "beckett": 27249, + "beckham": 18764, + "becky": 32406, + "becky": 18921, + "become": 2989, + "becomes": 6766, + "becoming": 6208, + "bed": 4152, + "bed": 2722, + "bedding": 31761, + "bedford": 20779, + "bedi": 39181, + "bedro": 18415, + "bedroom": 8411, + "bedrooms": 23996, + "beds": 13914, + "bedside": 47473, + "bedtime": 22115, + "bee": 6097, + "bee": 5028, + "beech": 32733, + "beech": 27596, + "beef": 21703, + "beef": 6529, + "beek": 37915, + "been": 33986, + "been": 1025, + "beep": 33432, + "beer": 8885, + "beer": 2544, + "beers": 10907, + "bees": 36249, + "bees": 9100, + "beet": 12582, + "beet": 28621, + "beethoven": 23656, + "beetle": 16534, + "beetles": 36317, + "beetro": 29251, + "beetroot": 31638, + "beets": 36087, + "before": 20898, + "before": 1348, + "beg": 2219, + "beg": 22401, + "began": 8636, + "begg": 36769, + "begging": 25371, + "begin": 19197, + "begin": 4947, + "beginner": 24351, + "beginners": 21930, + "beginning": 5791, + "beginnings": 22581, + "begins": 4635, + "begs": 43531, + "begun": 10514, + "beh": 21971, + "beh": 41612, + "beha": 5737, + "behalf": 11470, + "behave": 28825, + "behaved": 41617, + "behavi": 6149, + "behaving": 40745, + "behavior": 10461, + "behavioral": 25135, + "behaviors": 37741, + "behaviour": 14655, + "behavioural": 46019, + "behe": 42329, + "behin": 2335, + "behind": 2403, + "behindthe": 21104, + "behindthescenes": 26253, + "behold": 15929, + "bei": 38991, + "bei": 23227, + "beige": 26677, + "beij": 11547, + "beijing": 11796, + "bein": 39117, + "bein": 24168, + "being": 13481, + "being": 1265, + "beings": 17998, + "beingsalmankhan": 19637, + "beir": 20176, + "beirut": 22352, + "beit": 26963, + "bek": 46846, + "bek": 26135, + "bekind": 46691, + "bel": 1308, + "bel": 3543, + "bela": 30555, + "belarus": 30849, + "belated": 20256, + "belfast": 35100, + "belfast": 10015, + "belgi": 7001, + "belgian": 15008, + "belgium": 10239, + "belgrade": 30502, + "beli": 1859, + "beli": 45842, + "belichick": 46132, + "belie": 20854, + "beliebers": 27714, + "belief": 14802, + "beliefs": 20575, + "believ": 4972, + "believe": 15819, + "believe": 2649, + "believed": 13380, + "believein": 24294, + "believeinfilm": 37375, + "believer": 26057, + "believers": 28434, + "believes": 12017, + "believing": 19551, + "belinda": 44415, + "belize": 27990, + "bell": 5417, + "bell": 3718, + "bella": 18282, + "bella": 10418, + "bellamy": 34461, + "bellator": 31985, + "belle": 13587, + "belle": 11496, + "belles": 40678, + "bellevue": 32715, + "belli": 43335, + "bellletstalk": 42695, + "bello": 21954, + "bells": 12811, + "bellum": 35493, + "belly": 25901, + "belly": 10404, + "belmont": 25612, + "belo": 8379, + "belo": 41649, + "belong": 16453, + "belong": 13596, + "belonged": 39893, + "belonging": 28193, + "belongs": 14395, + "beloved": 9363, + "below": 3788, + "bels": 43127, + "belt": 36416, + "belt": 7373, + "belts": 21888, + "belvedere": 48003, + "ben": 1465, + "ben": 3518, + "bena": 46249, + "bench": 17770, + "bench": 8771, + "benches": 36349, + "benchmark": 31775, + "bend": 22100, + "bend": 13332, + "bender": 22551, + "bendigo": 48197, + "bending": 33897, + "bene": 12091, + "bene": 47151, + "beneath": 16850, + "bened": 13216, + "benedic": 24402, + "benedict": 47896, + "benedict": 18027, + "benef": 3260, + "benefici": 38593, + "beneficial": 24660, + "beneficiaries": 42160, + "benefit": 6399, + "benefited": 48266, + "benefiting": 29474, + "benefits": 5465, + "benefitting": 47222, + "benevol": 47060, + "benfica": 33873, + "beng": 6962, + "bengal": 17404, + "bengal": 16374, + "bengali": 33774, + "bengals": 23737, + "bengaluru": 21707, + "benghazi": 25967, + "benin": 40296, + "benitez": 46711, + "benjam": 10550, + "benjamin": 38647, + "benjamin": 12131, + "benji": 43548, + "benn": 39097, + "bennet": 48536, + "bennett": 12186, + "benny": 42369, + "benny": 20595, + "beno": 35268, + "benoit": 44373, + "benson": 19578, + "bent": 9809, + "bent": 18369, + "bentley": 16859, + "benton": 30812, + "benz": 27937, + "benz": 13470, + "ber": 867, + "ber": 1516, + "bera": 32802, + "bere": 17458, + "bered": 9193, + "beren": 33654, + "beret": 41658, + "berg": 12022, + "berg": 3294, + "bergen": 22918, + "berger": 35933, + "berger": 13873, + "bergh": 35120, + "bergman": 42597, + "bergs": 43592, + "berk": 15633, + "berke": 14639, + "berkeley": 46049, + "berkeley": 16667, + "berkshire": 27300, + "berlin": 23532, + "berlin": 5891, + "berman": 21514, + "bermu": 21032, + "bermuda": 24644, + "bern": 9195, + "bern": 18382, + "bernade": 46242, + "bernar": 11962, + "bernard": 14579, + "bernardino": 35328, + "bernardo": 27137, + "bernardo": 28696, + "bernardokath": 29081, + "bernat": 40578, + "berni": 18798, + "bernie": 40093, + "bernie": 10503, + "berniesanders": 23745, + "bernstein": 33936, + "berra": 15089, + "berries": 8319, + "berry": 15334, + "berry": 3488, + "bers": 6408, + "berser": 39037, + "bert": 17340, + "bert": 2358, + "berta": 45187, + "berth": 28317, + "bertie": 47182, + "berto": 34073, + "bertr": 36962, + "bertrand": 41594, + "berts": 30205, + "berty": 35973, + "berwick": 40407, + "bery": 11411, + "bes": 26911, + "bes": 3635, + "beside": 13519, + "besides": 17596, + "bespoke": 15612, + "bess": 43791, + "best": 3419, + "best": 949, + "bestbuy": 29749, + "bestest": 31199, + "bestfan": 23880, + "bestfanarmy": 24590, + "bestfriend": 29832, + "bestfriend": 11856, + "bestfriends": 23555, + "besti": 35210, + "bestie": 17188, + "besties": 27346, + "besto": 28615, + "bestof": 27892, + "bestof": 39533, + "bestseller": 25841, + "bestselling": 28632, + "bet": 1051, + "bet": 4430, + "beta": 43188, + "beta": 9505, + "betes": 10255, + "beth": 9993, + "beth": 4892, + "bethan": 18781, + "bethany": 39130, + "bethany": 27952, + "bethe": 12624, + "bethel": 33410, + "bethesda": 32527, + "bethle": 30760, + "bethlehem": 31827, + "betis": 45590, + "beto": 33721, + "betra": 18436, + "betrayal": 33171, + "betrayed": 35692, + "bets": 17107, + "betsy": 28946, + "bett": 17715, + "bett": 20489, + "betta": 36387, + "bette": 35855, + "better": 10320, + "better": 1539, + "bettertogether": 47392, + "betting": 14319, + "betts": 38637, + "betty": 36175, + "betty": 14350, + "between": 1957, + "beu": 38660, + "bev": 40324, + "bev": 30968, + "bever": 9924, + "beverage": 18694, + "beverages": 28521, + "beverley": 39165, + "beverly": 30906, + "beverly": 16728, + "beverlyhills": 45363, + "beware": 14532, + "bewithyou": 36787, + "bex": 18676, + "bex": 24748, + "bexhill": 49200, + "bey": 3234, + "bey": 6767, + "beyon": 11447, + "beyonce": 16632, + "beyoncé": 19219, + "beyond": 22246, + "beyond": 4432, + "bez": 28592, + "bez": 46764, + "bezos": 45000, + "bf": 19858, + "bf": 7990, + "bfc": 37183, + "bff": 11984, + "bffs": 31462, + "bfi": 34244, + "bg": 16674, + "bg": 11295, + "bgc": 47598, + "bgs": 47963, + "bgt": 40665, + "bh": 9930, + "bh": 13603, + "bha": 6144, + "bha": 33068, + "bhafc": 30779, + "bhagat": 49136, + "bhai": 48370, + "bhai": 20508, + "bhak": 34501, + "bham": 31874, + "bham": 23491, + "bhan": 27356, + "bhand": 48679, + "bhar": 9108, + "bharat": 27454, + "bharat": 17430, + "bharti": 46803, + "bhat": 23784, + "bhatt": 36143, + "bhav": 44950, + "bhi": 28943, + "bhi": 21955, + "bhk": 45070, + "bhm": 38741, + "bho": 19721, + "bhopal": 44573, + "bhp": 29776, + "bhs": 29195, + "bhu": 9172, + "bhuban": 38729, + "bhubanes": 41213, + "bhubaneswar": 45888, + "bhushan": 40884, + "bhutan": 32391, + "bhutto": 30153, + "bi": 717, + "bi": 3035, + "bia": 3841, + "biaf": 26961, + "biafra": 36355, + "bian": 19531, + "bian": 9027, + "bianca": 25854, + "bianchi": 45720, + "bians": 28141, + "bias": 11268, + "biased": 22178, + "bib": 44607, + "bib": 21022, + "bibi": 31182, + "bibl": 20912, + "bible": 26738, + "bible": 7583, + "bibli": 23465, + "biblical": 22841, + "biblio": 49131, + "bic": 5960, + "bic": 10675, + "bice": 35589, + "biceps": 46735, + "bick": 27238, + "bicy": 9247, + "bicycle": 11652, + "bicycles": 31326, + "bid": 21035, + "bid": 5553, + "bidding": 23237, + "bide": 45178, + "biden": 19451, + "bids": 16148, + "bie": 5561, + "bie": 4173, + "bieber": 48725, + "bieber": 7535, + "bien": 19176, + "bien": 25742, + "biennale": 33776, + "biennial": 36609, + "bier": 27226, + "bier": 23508, + "bies": 7867, + "big": 1915, + "big": 1205, + "bigbaldhead": 30325, + "bigbang": 41680, + "bigbang": 23734, + "bigdata": 9440, + "bige": 37762, + "bigfoot": 37095, + "bigg": 15312, + "bigg": 35399, + "biggboss": 27056, + "bigger": 6806, + "biggest": 19483, + "biggest": 3505, + "biggie": 28392, + "biggs": 46507, + "bigh": 18106, + "bighit": 35508, + "bigo": 14278, + "bigolive": 20735, + "bigotry": 37269, + "bigre": 36330, + "bih": 33471, + "bihar": 22849, + "bij": 42478, + "bik": 30306, + "bike": 11686, + "bike": 3701, + "biker": 36100, + "biker": 23449, + "bikers": 29468, + "bikes": 9227, + "bikin": 12638, + "biking": 19157, + "bikini": 14531, + "bil": 3092, + "bil": 20506, + "bilateral": 25599, + "bilbao": 34802, + "bild": 35512, + "bile": 25943, + "bilingual": 29623, + "bilities": 13582, + "bility": 4694, + "bill": 4444, + "bill": 2886, + "billboard": 10856, + "billboards": 34741, + "billed": 37558, + "billi": 7693, + "billie": 23990, + "billing": 31797, + "billings": 43615, + "billion": 14520, + "billion": 5729, + "billionaire": 19475, + "billionaires": 41590, + "billions": 20742, + "bills": 9810, + "billsmafia": 48845, + "billy": 15626, + "billy": 6814, + "bilt": 44770, + "bilt": 26654, + "bim": 46737, + "bim": 24775, + "bin": 4849, + "bin": 5346, + "binance": 43520, + "binary": 23497, + "bind": 44513, + "binder": 30541, + "binding": 21287, + "bine": 34848, + "bing": 24818, + "bing": 5665, + "binge": 22600, + "bingham": 43785, + "bingham": 47296, + "bingo": 18418, + "bino": 29172, + "bino": 24313, + "bins": 26934, + "bint": 43647, + "bio": 2830, + "bio": 5162, + "biode": 43502, + "biodegradable": 47740, + "biodiversity": 17428, + "biof": 45158, + "biographical": 49232, + "biography": 15423, + "biological": 18821, + "biologist": 35149, + "biology": 9796, + "biom": 13010, + "biomar": 44549, + "biomass": 36746, + "biome": 26218, + "biomed": 29280, + "biomedical": 33117, + "bionic": 46201, + "biop": 15009, + "biopic": 27942, + "bios": 48505, + "biotech": 22514, + "biotechnology": 40375, + "biotic": 33773, + "biotics": 41371, + "bious": 31845, + "bipartisan": 32266, + "bipolar": 37097, + "bique": 27809, + "bir": 921, + "bir": 16284, + "birch": 31569, + "birch": 22907, + "bird": 6908, + "bird": 3329, + "birdie": 29612, + "birdies": 45618, + "birding": 15851, + "birdman": 41915, + "birdphotography": 47999, + "birds": 41951, + "birds": 4337, + "birdwatching": 33497, + "birk": 48289, + "birken": 40661, + "birmin": 37482, + "birmingham": 38580, + "birmingham": 7720, + "birth": 1128, + "birth": 5397, + "birthday": 7381, + "birthday": 1166, + "birthdays": 17954, + "birthplace": 31429, + "biryani": 46489, + "bis": 5064, + "bis": 14461, + "biscu": 11532, + "biscuit": 18731, + "biscuits": 18248, + "bisexual": 36829, + "bish": 33690, + "bish": 31461, + "bishop": 20625, + "bishop": 8024, + "bishops": 31579, + "bison": 19741, + "bistro": 21770, + "bit": 3010, + "bit": 2010, + "bitcoin": 30848, + "bitcoin": 6366, + "bite": 41613, + "biting": 23016, + "bits": 7747, + "bitt": 39251, + "bius": 45525, + "bix": 46579, + "biz": 8212, + "biz": 5431, + "biza": 47013, + "bizar": 14886, + "bizarre": 16965, + "bizhour": 39462, + "bizitalk": 34929, + "bj": 4592, + "bj": 18229, + "bjj": 27437, + "bjor": 26525, + "bjp": 37264, + "bjp": 6178, + "bk": 15099, + "bk": 14083, + "bkk": 36433, + "bl": 833, + "bl": 9467, + "bla": 2205, + "bla": 19630, + "blac": 21008, + "black": 2025, + "black": 1449, + "blackand": 12809, + "blackandwhite": 23688, + "blackandwhite": 19506, + "blackandwhitephotography": 27544, + "blackberry": 16470, + "blackbird": 38526, + "blackburn": 23789, + "blackfish": 42193, + "blackfriday": 16445, + "blackgirl": 43591, + "blackhawks": 19203, + "blackhistory": 46982, + "blackhistorymonth": 20135, + "blacklist": 30295, + "blacklivesmatter": 23467, + "blackmail": 47295, + "blackops": 43519, + "blackout": 21733, + "blackpanther": 36592, + "blackpink": 20339, + "blackpool": 21031, + "blacks": 16351, + "blackwell": 42642, + "blad": 36635, + "bladder": 33593, + "blade": 10264, + "blades": 16893, + "blah": 29212, + "blaine": 32457, + "blair": 31824, + "blair": 14749, + "blake": 20229, + "blake": 9579, + "blame": 10695, + "blamed": 32906, + "blames": 27841, + "blaming": 29287, + "blan": 4609, + "blanc": 30936, + "blanc": 13301, + "blanca": 40670, + "blanchard": 40177, + "blanche": 34875, + "blanchett": 49378, + "blanco": 26801, + "bland": 44372, + "bland": 30799, + "blank": 15134, + "blanket": 12878, + "blankets": 24042, + "blanks": 48599, + "blasio": 35553, + "blasphe": 36622, + "blast": 46349, + "blast": 5964, + "blasted": 38976, + "blaster": 36341, + "blasting": 26178, + "blasts": 23067, + "blat": 22048, + "blatant": 41391, + "blatt": 39138, + "blau": 45307, + "blaz": 43413, + "blaze": 15497, + "blazer": 17606, + "blazers": 16984, + "blazing": 25267, + "bldg": 22981, + "ble": 1447, + "ble": 1059, + "bleach": 27034, + "bleak": 40355, + "bled": 12006, + "bleed": 23027, + "bleed": 24791, + "bleedblue": 39160, + "bleeding": 20311, + "bleeds": 47339, + "blen": 25651, + "blend": 10780, + "blended": 25813, + "blender": 25066, + "blending": 34307, + "blends": 28572, + "bler": 31305, + "bler": 11979, + "blers": 26930, + "bles": 5763, + "bless": 9640, + "bless": 5387, + "blessed": 4411, + "blessing": 10729, + "blessings": 11185, + "bleu": 30114, + "blew": 18176, + "bley": 43176, + "bli": 1450, + "bli": 28051, + "blin": 9678, + "blin": 5406, + "blind": 17248, + "blind": 8351, + "blinded": 49149, + "blindness": 38812, + "blinds": 32449, + "bling": 39764, + "bling": 7097, + "blink": 18976, + "bliss": 28531, + "bliss": 12893, + "blissful": 42145, + "blit": 39327, + "blitz": 42151, + "blitz": 17548, + "blizz": 13075, + "blizzard": 16111, + "blk": 42950, + "blk": 22872, + "blm": 30957, + "bln": 47348, + "blo": 1204, + "blo": 25505, + "blob": 49312, + "bloc": 30961, + "block": 4638, + "block": 4593, + "blockade": 33489, + "blockbuster": 19939, + "blockchain": 6653, + "blocked": 9106, + "blocker": 44767, + "blocking": 12652, + "blocks": 10113, + "blog": 16376, + "blog": 2589, + "blogg": 33282, + "blogged": 41380, + "blogger": 21352, + "blogger": 7806, + "bloggerrt": 48898, + "bloggers": 11627, + "blogging": 18090, + "blogpost": 41842, + "blogs": 16682, + "bloke": 24384, + "blom": 48996, + "blon": 7958, + "blond": 32426, + "blonde": 10711, + "blondes": 45130, + "blondie": 39236, + "bloo": 2373, + "blood": 9231, + "blood": 3590, + "blooded": 41946, + "bloodh": 48480, + "bloods": 39539, + "bloody": 38568, + "bloody": 9468, + "bloom": 7311, + "bloom": 10257, + "bloomberg": 43109, + "bloomberg": 21238, + "bloomfield": 40342, + "blooming": 45175, + "blooming": 19266, + "bloomington": 34731, + "blooms": 21439, + "bloss": 10017, + "blossom": 14472, + "blossoms": 21916, + "blot": 41710, + "blou": 44506, + "blouse": 23525, + "blow": 15230, + "blow": 10211, + "blower": 25832, + "blowing": 12087, + "blown": 11848, + "blowout": 34857, + "blows": 21063, + "blr": 47250, + "bls": 39458, + "blu": 1263, + "blu": 10273, + "blue": 3829, + "blue": 1746, + "bluebells": 47150, + "blueberries": 29551, + "blueberry": 18251, + "bluebird": 40747, + "bluec": 43194, + "bluef": 41174, + "bluegrass": 26241, + "bluejays": 18684, + "blueprint": 30594, + "blues": 17566, + "blues": 5159, + "blueslyrix": 47068, + "bluet": 13469, + "bluetooth": 14052, + "bluewave": 40025, + "bluff": 27232, + "bluffs": 48844, + "blum": 34818, + "blumen": 38714, + "blun": 34472, + "blunt": 19305, + "blur": 12102, + "blur": 27976, + "bluray": 36818, + "blurred": 38013, + "blurry": 21977, + "blush": 22889, + "blvd": 12578, + "bly": 20930, + "bly": 4426, + "bm": 4773, + "bm": 15916, + "bma": 42573, + "bmc": 27807, + "bmi": 40642, + "bmo": 39083, + "bms": 34074, + "bmw": 26637, + "bmw": 7869, + "bmx": 22535, + "bn": 10496, + "bn": 7992, + "bnb": 20010, + "bnha": 49336, + "bnp": 47910, + "bnw": 35903, + "bo": 647, + "bo": 2525, + "boa": 14732, + "boar": 7837, + "boar": 35473, + "board": 10419, + "board": 1972, + "boarded": 43052, + "boarder": 37414, + "boardgame": 47829, + "boardgames": 32646, + "boarding": 10086, + "boardroom": 47937, + "boards": 7963, + "boardwalk": 29043, + "boast": 44467, + "boasts": 30309, + "boat": 12426, + "boat": 4440, + "boath": 45461, + "boating": 21951, + "boats": 10080, + "boatsales": 46244, + "bob": 8444, + "bob": 4423, + "boba": 39948, + "bobb": 16891, + "bobble": 38796, + "bobblehead": 33451, + "bobby": 17847, + "bobby": 7816, + "bobc": 26153, + "bobcat": 37896, + "bobcats": 27568, + "bobo": 38939, + "bobs": 45533, + "boc": 27307, + "boc": 39042, + "boca": 26094, + "bock": 24961, + "bod": 17904, + "bod": 26340, + "boda": 42030, + "bode": 28452, + "bode": 40429, + "bodega": 47350, + "bodied": 36892, + "bodies": 9799, + "bodily": 49119, + "body": 7132, + "body": 1774, + "bodybuilding": 24538, + "bodyguard": 35565, + "boe": 23476, + "boe": 21773, + "boeh": 38002, + "boehner": 44599, + "boeing": 48135, + "boeing": 11857, + "boer": 44889, + "boer": 40768, + "bog": 23426, + "bog": 28318, + "bogo": 35769, + "bogota": 47059, + "bogus": 42907, + "boh": 43238, + "bohe": 40541, + "bohemi": 21552, + "bohemian": 25753, + "boho": 25444, + "boi": 37129, + "boi": 12673, + "boil": 31332, + "boiled": 23886, + "boiler": 28212, + "boiler": 25615, + "boiling": 32019, + "bois": 47742, + "bois": 21640, + "boise": 23304, + "bok": 26671, + "bok": 15289, + "boko": 30929, + "boks": 40216, + "bol": 2860, + "bol": 8413, + "bola": 12840, + "bold": 26975, + "bold": 8911, + "boldand": 48413, + "boldly": 44778, + "boli": 12722, + "bolic": 27343, + "bolivia": 28628, + "bollah": 36336, + "bolly": 25302, + "bollywood": 32448, + "bollywood": 9604, + "bolo": 40236, + "bolog": 22818, + "bologna": 27513, + "bolster": 47304, + "bolt": 13131, + "bolton": 48757, + "bolton": 16598, + "bolts": 26028, + "bom": 3012, + "bom": 19469, + "bomb": 18091, + "bomb": 6331, + "bombar": 25544, + "bombardier": 42700, + "bombay": 48602, + "bombay": 23890, + "bombed": 24542, + "bomber": 15436, + "bombers": 21786, + "bombing": 14475, + "bombings": 43236, + "bombs": 14410, + "bombshell": 36340, + "bon": 1871, + "bon": 4216, + "bona": 33342, + "bonanza": 40304, + "bond": 37022, + "bond": 6826, + "bonded": 37390, + "bondi": 40092, + "bonding": 19609, + "bonds": 15786, + "bone": 22502, + "bone": 6195, + "bones": 9476, + "bonfire": 23151, + "bongo": 47519, + "boni": 32269, + "boni": 46356, + "bonita": 42896, + "bonjour": 33176, + "bonkers": 39865, + "bonn": 38969, + "bonnar": 47191, + "bonnaroo": 48777, + "bonne": 25844, + "bonnet": 30636, + "bonnie": 18555, + "bono": 24476, + "bons": 42883, + "bonsai": 44129, + "bonus": 8164, + "bonuses": 35144, + "boo": 824, + "boo": 7317, + "boogie": 22639, + "book": 2828, + "book": 1116, + "bookboost": 31257, + "bookclub": 34438, + "bookday": 26327, + "booked": 12584, + "booker": 21302, + "bookfest": 39381, + "booking": 10145, + "bookings": 18345, + "booklet": 27405, + "bookmark": 33596, + "bookof": 45629, + "bookreview": 27362, + "books": 44382, + "books": 2161, + "bookshelf": 34821, + "bookshop": 24705, + "bookstore": 17999, + "bookstores": 46416, + "bookworm": 20743, + "boom": 9609, + "boom": 7121, + "boomer": 33819, + "boomer": 31766, + "boomers": 37988, + "booming": 33487, + "boon": 24979, + "boon": 35821, + "boone": 23453, + "boop": 45047, + "boost": 44639, + "boost": 6260, + "boosted": 37631, + "booster": 20877, + "boosters": 46859, + "boosting": 28480, + "boosts": 29247, + "boot": 10843, + "boot": 8087, + "bootcamp": 22051, + "booted": 42564, + "booth": 47895, + "booth": 3971, + "booths": 32653, + "booties": 46188, + "bootleg": 38139, + "boots": 7319, + "booze": 24341, + "bop": 19720, + "bor": 1141, + "bor": 15093, + "bora": 24736, + "bord": 36891, + "bordeaux": 22009, + "border": 16304, + "border": 6177, + "borderlands": 38676, + "borders": 13900, + "bore": 14084, + "bore": 24638, + "bored": 8933, + "boredom": 31460, + "boretum": 38902, + "borg": 14770, + "borgh": 17180, + "boring": 12519, + "boris": 31212, + "boris": 15704, + "borisjohnson": 44481, + "born": 17695, + "born": 2683, + "borne": 42910, + "borne": 9328, + "borneo": 33332, + "bornon": 41811, + "bornonthisday": 42757, + "boro": 26796, + "boro": 7974, + "borough": 22761, + "borough": 6203, + "borrow": 22293, + "borrowed": 28224, + "borrowing": 41045, + "borussia": 36764, + "bos": 14885, + "bos": 9644, + "bosa": 46946, + "bosch": 42009, + "bosch": 19466, + "bosco": 36960, + "bose": 23142, + "bosh": 42244, + "bosni": 42924, + "bosnia": 31396, + "boss": 17935, + "boss": 4206, + "bosses": 23906, + "boston": 11540, + "boston": 4399, + "bostonmarathon": 44533, + "bot": 4136, + "bot": 6947, + "botan": 12554, + "botanic": 32560, + "botanical": 21026, + "botany": 22612, + "botd": 34451, + "both": 36575, + "both": 2212, + "bother": 21125, + "bothered": 27997, + "botox": 43449, + "bots": 13721, + "botswana": 27584, + "bott": 3520, + "bott": 37225, + "bottle": 37306, + "bottle": 5392, + "bottled": 29331, + "bottlen": 46439, + "bottles": 9754, + "bottling": 42006, + "bottom": 32314, + "bottom": 5931, + "bottoms": 31524, + "bou": 3728, + "bou": 23165, + "bouchard": 47930, + "boudo": 48827, + "bought": 4142, + "boul": 24830, + "boulder": 18260, + "boule": 17652, + "boulevard": 19504, + "boun": 5993, + "bounce": 14316, + "bouncing": 32060, + "bouncy": 43415, + "bound": 15140, + "bound": 4567, + "boundaries": 18690, + "boundary": 21344, + "bounds": 37469, + "bounty": 21142, + "bouquet": 20961, + "bour": 2934, + "bour": 35486, + "bourbon": 48118, + "bourbon": 14652, + "bourdain": 48095, + "bourg": 20690, + "bourgeo": 45672, + "bourn": 39143, + "bourne": 13789, + "bourne": 5192, + "bournemouth": 20911, + "bout": 19982, + "bout": 8123, + "bouti": 10926, + "boutique": 12179, + "bow": 2297, + "bow": 4040, + "bowden": 48538, + "bowed": 49130, + "bowel": 36880, + "bowen": 25368, + "bower": 40414, + "bowers": 42238, + "bowie": 13036, + "bowing": 46398, + "bowl": 26719, + "bowl": 3814, + "bowled": 39987, + "bowler": 25528, + "bowlers": 42632, + "bowles": 41611, + "bowling": 10390, + "bowls": 17787, + "bowman": 22052, + "bows": 17000, + "bowser": 38234, + "bowski": 48311, + "box": 2774, + "box": 2063, + "boxed": 24190, + "boxer": 40394, + "boxer": 15363, + "boxers": 31019, + "boxes": 8350, + "boxing": 33669, + "boxing": 5554, + "boy": 2927, + "boy": 1876, + "boyband": 31568, + "boyce": 44480, + "boycot": 46208, + "boycott": 31615, + "boycott": 19559, + "boyd": 18295, + "boyfriend": 7328, + "boyfriends": 36541, + "boyle": 22802, + "boys": 25223, + "boys": 2034, + "boyz": 16152, + "bp": 23410, + "bp": 11558, + "bpa": 43855, + "bpd": 48587, + "bpl": 28901, + "bpm": 40338, + "bps": 37794, + "br": 711, + "br": 7532, + "bra": 1195, + "bra": 5860, + "brac": 6663, + "brace": 8376, + "brace": 9183, + "bracelet": 8969, + "bracelets": 20027, + "braces": 19249, + "brack": 25676, + "bracket": 14780, + "brackets": 36183, + "brad": 4848, + "brad": 9405, + "bradbury": 45097, + "braden": 46842, + "bradford": 15062, + "bradley": 31905, + "bradley": 10952, + "brador": 24062, + "bradshaw": 37556, + "brady": 42494, + "brady": 11117, + "brae": 42874, + "brae": 40040, + "brag": 30110, + "bragg": 38545, + "bragging": 38199, + "brah": 20276, + "brahms": 45114, + "brai": 25048, + "braid": 31067, + "braided": 39997, + "braids": 34221, + "brain": 9454, + "brain": 4812, + "brains": 17129, + "brainstorming": 36607, + "braised": 28363, + "brake": 14937, + "brakes": 23456, + "bral": 31309, + "bram": 14815, + "bram": 39456, + "brampton": 35124, + "bran": 3684, + "bran": 28348, + "brance": 36072, + "brance": 15413, + "branch": 7998, + "branches": 15843, + "brand": 3910, + "brand": 2896, + "branded": 18097, + "brandi": 41003, + "branding": 10841, + "brando": 41892, + "brandon": 20423, + "brandon": 9166, + "brands": 8681, + "brandt": 22552, + "brandy": 26232, + "brane": 32340, + "branson": 28280, + "brant": 28951, + "brant": 47592, + "braries": 46377, + "brary": 24520, + "bras": 22611, + "brasil": 18991, + "brass": 24348, + "brass": 11655, + "brat": 26717, + "brat": 26631, + "brate": 41864, + "braun": 39129, + "braun": 29309, + "brave": 25461, + "brave": 7769, + "braved": 47663, + "bravely": 42303, + "bravery": 25831, + "braves": 14422, + "braving": 43258, + "bravo": 38613, + "bravo": 13006, + "braw": 37871, + "brawl": 26066, + "braxton": 37451, + "bray": 26256, + "bray": 22993, + "braz": 4625, + "brazil": 47459, + "brazil": 6305, + "brazili": 45697, + "brazilian": 12111, + "brb": 25316, + "brc": 40393, + "bre": 887, + "bre": 7782, + "brea": 7318, + "brea": 46538, + "breach": 21363, + "breaches": 45173, + "bread": 18886, + "bread": 5066, + "breads": 43064, + "break": 2206, + "break": 2568, + "breakable": 30691, + "breakaway": 42732, + "breakdown": 14519, + "breaker": 14814, + "breakers": 22270, + "breakfa": 45931, + "breakfast": 30210, + "breakfast": 3290, + "breaking": 14698, + "breaking": 2755, + "breakingbad": 38032, + "breakingnews": 23837, + "breakout": 16752, + "breaks": 7263, + "breakthrough": 18802, + "breakup": 38931, + "breast": 12930, + "breast": 9475, + "breastcancer": 40813, + "breastcancer": 30065, + "breastfeeding": 29033, + "breasts": 37637, + "breath": 9508, + "breath": 9576, + "breathe": 11364, + "breathing": 14959, + "breathtaking": 14709, + "brecht": 34622, + "breck": 44598, + "bred": 46929, + "bred": 16008, + "bree": 7892, + "bree": 37138, + "breed": 28030, + "breed": 13791, + "breeders": 37472, + "breeding": 16544, + "breeds": 29021, + "breen": 48013, + "brees": 46721, + "breeze": 13125, + "breezy": 21451, + "breit": 23864, + "breitbart": 37926, + "brek": 35494, + "bremen": 39861, + "bren": 5209, + "brenda": 23786, + "brendan": 35134, + "brendan": 15414, + "brendon": 36756, + "brennan": 22372, + "brenner": 42941, + "brent": 31439, + "brent": 16355, + "brentwood": 33108, + "brero": 47781, + "bres": 32561, + "bret": 38020, + "bret": 32548, + "brethren": 43134, + "breton": 32290, + "brett": 22591, + "brett": 12394, + "brev": 42882, + "brevi": 39475, + "brew": 5048, + "brew": 7253, + "brewco": 33582, + "brewed": 23238, + "brewer": 20756, + "breweries": 35277, + "brewers": 17618, + "brewery": 8850, + "brewing": 8275, + "brewingco": 45155, + "brews": 21663, + "brewster": 40274, + "brex": 22726, + "brexit": 27666, + "brexit": 5801, + "brgy": 35983, + "bri": 1036, + "bri": 18636, + "bria": 35890, + "brian": 9824, + "brian": 4989, + "brianna": 32308, + "briar": 46119, + "bribe": 40042, + "bribery": 41792, + "bric": 27055, + "brice": 40190, + "brick": 13937, + "brick": 9518, + "bricks": 21029, + "brics": 48196, + "brid": 16995, + "bridal": 36875, + "bridal": 14284, + "bride": 18342, + "bride": 8964, + "brides": 18067, + "bridesma": 28356, + "bridesmaid": 43399, + "bridesmaids": 47754, + "bridg": 20623, + "bridge": 8647, + "bridge": 2465, + "bridgeport": 45201, + "bridges": 11811, + "bridget": 27073, + "bridgewater": 38732, + "bridging": 38109, + "brie": 26622, + "brief": 9435, + "brief": 8954, + "briefed": 47326, + "briefing": 12991, + "briefly": 26980, + "briefs": 29557, + "brien": 13504, + "brier": 43995, + "brig": 11081, + "briga": 46448, + "brigade": 16032, + "briggs": 28108, + "brigh": 6710, + "bright": 10383, + "bright": 4852, + "brighten": 18208, + "brightening": 43929, + "brighter": 18507, + "brightest": 26159, + "brightly": 36298, + "brightness": 42280, + "brighton": 28416, + "brighton": 9470, + "brigitte": 44421, + "brill": 27342, + "brill": 28601, + "brilli": 3821, + "brilliance": 28146, + "brilliant": 4106, + "brilliantly": 26803, + "brin": 25620, + "bring": 11596, + "bring": 2430, + "bringback": 28969, + "bringbackour": 45403, + "bringing": 4777, + "brings": 5138, + "brink": 39296, + "brink": 28796, + "brioche": 45818, + "bris": 9385, + "bris": 15783, + "brisban": 30431, + "brisbane": 42932, + "brisbane": 12407, + "brisk": 43646, + "brisket": 31920, + "bristol": 18159, + "bristol": 8010, + "brit": 2318, + "brit": 20066, + "britain": 40802, + "britain": 6272, + "britanni": 31373, + "britannia": 36188, + "brite": 33827, + "briti": 8155, + "british": 8651, + "british": 3504, + "britishmuseum": 41858, + "britney": 37192, + "britney": 21853, + "britneyspears": 42990, + "brits": 21832, + "britt": 10811, + "britt": 25976, + "brittany": 38187, + "brittany": 18818, + "britton": 37422, + "brium": 46079, + "brixton": 30056, + "bro": 927, + "bro": 4410, + "broad": 3491, + "broad": 12623, + "broadband": 21050, + "broadcast": 8967, + "broadcaster": 29005, + "broadcasting": 14403, + "broadcasts": 46742, + "broader": 36029, + "broadway": 34599, + "broadway": 9092, + "broc": 15587, + "broccoli": 19094, + "broch": 21419, + "brochure": 25275, + "brock": 14841, + "brock": 16745, + "brodie": 42150, + "brody": 29608, + "broke": 42165, + "broke": 6509, + "broken": 26126, + "broken": 5107, + "broker": 34032, + "broker": 20449, + "brokerage": 41327, + "brokers": 28271, + "brom": 18972, + "brom": 33296, + "bromance": 35353, + "bromley": 35715, + "bron": 4011, + "bron": 10243, + "bronco": 43488, + "bronco": 34370, + "broncos": 12516, + "bronson": 37042, + "bronte": 48936, + "bronx": 48310, + "bronx": 17183, + "brony": 21084, + "bronze": 8459, + "broo": 5204, + "brooch": 21207, + "brook": 4782, + "brook": 7322, + "brooke": 28576, + "brooke": 12549, + "brookes": 39707, + "brooklyn": 23253, + "brooklyn": 6983, + "brooks": 42779, + "brooks": 9991, + "broom": 32046, + "broom": 28008, + "broome": 49335, + "bros": 7776, + "broth": 29994, + "brotha": 33974, + "brother": 12697, + "brother": 3157, + "brotherhood": 19059, + "brothers": 4548, + "brou": 27874, + "brough": 21033, + "brought": 4222, + "brov": 42881, + "brow": 6547, + "brow": 15895, + "broward": 34719, + "brown": 6315, + "brown": 2866, + "browne": 28440, + "brownie": 23045, + "brownies": 22312, + "browning": 32241, + "browns": 14051, + "brows": 14998, + "browse": 19060, + "browser": 19768, + "browsing": 29318, + "brox": 43539, + "brs": 47485, + "brt": 46936, + "bru": 1698, + "bru": 31028, + "bruce": 21223, + "bruce": 7085, + "bruh": 17575, + "bruins": 14736, + "bruise": 48048, + "bruised": 46502, + "brum": 23862, + "brum": 28078, + "brun": 6870, + "brunch": 9113, + "brune": 29057, + "brunei": 41898, + "brunette": 35528, + "bruno": 14568, + "brunomars": 41156, + "brunswick": 24012, + "brush": 27969, + "brush": 8594, + "brushed": 30298, + "brushes": 21550, + "brushing": 35072, + "brussels": 11020, + "brut": 39499, + "brutal": 42144, + "brutal": 14556, + "brutality": 31348, + "brutally": 28132, + "brute": 47552, + "brux": 49093, + "bry": 6587, + "bry": 28228, + "bryan": 16134, + "bryan": 10412, + "bryant": 12256, + "bryce": 19895, + "bryn": 36569, + "bryn": 42877, + "bryson": 38990, + "bs": 11783, + "bs": 1329, + "bsa": 46619, + "bsb": 23070, + "bsbi": 41728, + "bsbibotany": 42086, + "bsc": 32031, + "bsd": 41848, + "bse": 46341, + "bsf": 48314, + "bsgo": 48474, + "bsp": 47977, + "bst": 19698, + "bsu": 46385, + "bt": 3317, + "bt": 4205, + "btc": 10315, + "btcc": 30759, + "btn": 44681, + "bto": 35516, + "btob": 29379, + "btr": 39767, + "bts": 15154, + "bts": 4007, + "btsarmy": 30302, + "btsbbmas": 35297, + "btsx": 44971, + "btv": 38541, + "btw": 9520, + "btwn": 28284, + "bu": 609, + "bu": 5831, + "bub": 27704, + "bub": 33158, + "bubb": 9739, + "bubba": 28149, + "bubble": 28687, + "bubble": 10799, + "bubblegum": 48078, + "bubbles": 17648, + "bubbly": 31034, + "buc": 8207, + "buccane": 32830, + "buccaneers": 38058, + "buch": 22623, + "bucha": 43582, + "buchan": 27237, + "buchanan": 28975, + "bucharest": 37013, + "buck": 6061, + "buck": 11433, + "bucket": 22596, + "bucket": 10498, + "bucketlist": 30778, + "buckets": 27168, + "buckeye": 34549, + "buckeyes": 30741, + "buckingham": 28736, + "buckle": 21948, + "buckley": 25905, + "bucks": 6103, + "bucky": 35916, + "bucs": 20011, + "bud": 2942, + "bud": 10737, + "buda": 18520, + "buda": 49012, + "budapest": 19202, + "budd": 7296, + "buddha": 13981, + "buddhism": 23744, + "buddhist": 18697, + "buddies": 14543, + "budding": 31992, + "buddy": 40948, + "buddy": 6557, + "budge": 32005, + "budget": 46758, + "budget": 5639, + "budgeting": 43789, + "budgets": 36419, + "buds": 14665, + "budweiser": 40900, + "buen": 15640, + "buena": 30876, + "buenas": 48529, + "bueno": 46202, + "buenos": 26055, + "buf": 44417, + "buff": 5456, + "buff": 21416, + "buffal": 25836, + "buffalo": 31231, + "buffalo": 8054, + "buffalob": 38831, + "buffalobills": 44352, + "buffe": 13724, + "buffer": 33050, + "buffet": 17829, + "buffett": 34081, + "buffs": 28906, + "buffy": 33356, + "bug": 14453, + "bug": 8162, + "bugatti": 35451, + "buggy": 28963, + "bugs": 13850, + "buh": 31406, + "buhari": 14661, + "buick": 22000, + "buil": 1354, + "build": 22739, + "build": 3289, + "builder": 14474, + "builders": 17694, + "building": 21206, + "building": 2307, + "buildings": 8866, + "builds": 16449, + "buildthe": 41497, + "built": 45824, + "built": 3874, + "buk": 28084, + "buk": 24317, + "buka": 47778, + "bukit": 39888, + "bul": 2572, + "bul": 10200, + "bula": 18726, + "bulaga": 41575, + "bular": 32187, + "bulb": 22373, + "bulbs": 24808, + "bulgar": 15424, + "bulgaria": 20295, + "bulgarian": 38693, + "bulge": 47603, + "bulk": 19643, + "bull": 4537, + "bull": 6029, + "bulldo": 37675, + "bulldog": 34828, + "bulldog": 15611, + "bulldogs": 13916, + "bullet": 14340, + "bullet": 12465, + "bulletin": 19638, + "bulletproof": 43212, + "bullets": 22117, + "bullied": 34689, + "bullies": 39050, + "bullion": 49114, + "bullish": 22142, + "bullock": 33198, + "bullpen": 38081, + "bulls": 10313, + "bully": 43111, + "bully": 20190, + "bullying": 13548, + "bum": 27683, + "bum": 14226, + "bumble": 25585, + "bumble": 39303, + "bumblebee": 36911, + "bummed": 48456, + "bump": 9783, + "bump": 15877, + "bumped": 22495, + "bumper": 17881, + "bumping": 40196, + "bumps": 21115, + "bun": 2591, + "bun": 13665, + "bunch": 7796, + "bund": 41905, + "bunde": 18841, + "bundesliga": 21582, + "bundle": 11793, + "bundled": 47228, + "bundles": 29834, + "bundy": 37332, + "bung": 44748, + "bungal": 29549, + "bungalow": 33696, + "bunk": 41236, + "bunker": 23615, + "bunnies": 28998, + "bunny": 34198, + "bunny": 9258, + "buns": 22235, + "bunting": 30695, + "buon": 31350, + "buon": 48498, + "bur": 1039, + "bur": 17362, + "burbank": 34862, + "burberry": 30412, + "burch": 44588, + "burden": 18687, + "bure": 11902, + "bureau": 32098, + "bureau": 15400, + "burg": 19505, + "burg": 3499, + "burge": 20522, + "burger": 22356, + "burger": 6548, + "burgers": 13007, + "burgess": 26211, + "burgh": 18141, + "burgh": 4965, + "burgl": 25554, + "burglar": 43365, + "burglary": 32573, + "burgring": 40823, + "burgundy": 23650, + "buri": 46348, + "buri": 42614, + "burial": 22012, + "buried": 14233, + "burk": 48822, + "burke": 15340, + "burle": 27891, + "burlesque": 33732, + "burlington": 23370, + "burma": 30305, + "burmese": 47906, + "burn": 7934, + "burn": 4285, + "burnaby": 47541, + "burne": 27246, + "burned": 15022, + "burner": 23243, + "burnett": 28558, + "burnham": 36111, + "burning": 46107, + "burning": 8405, + "burnley": 24653, + "burnout": 36078, + "burns": 10234, + "burnt": 15185, + "burr": 30879, + "burrell": 49045, + "burrito": 23473, + "burritos": 47245, + "burroughs": 41337, + "burrows": 44846, + "burst": 13005, + "bursting": 32566, + "bursts": 37026, + "burt": 27162, + "burton": 42354, + "burton": 12704, + "burundi": 33595, + "bury": 12276, + "bury": 3899, + "burys": 32362, + "bus": 1319, + "bus": 2840, + "busan": 40172, + "busc": 35000, + "busch": 20475, + "buses": 12879, + "bush": 11191, + "bush": 6867, + "bushes": 37578, + "busiest": 32764, + "busine": 4598, + "busines": 25364, + "business": 8346, + "business": 1716, + "businesses": 7287, + "businessman": 25635, + "buss": 47764, + "bust": 31299, + "bust": 9959, + "busted": 18643, + "buster": 37219, + "buster": 12094, + "busters": 16362, + "busting": 29622, + "busy": 39332, + "busy": 4354, + "but": 2201, + "but": 767, + "butch": 35102, + "butcher": 18732, + "butchers": 42334, + "bute": 39240, + "butes": 14630, + "butler": 35867, + "butler": 10702, + "butt": 12500, + "butt": 31523, + "butte": 31678, + "butter": 5427, + "butter": 6952, + "butterflies": 16232, + "butterfly": 9738, + "buttermilk": 40180, + "butternut": 36867, + "buttery": 45535, + "button": 45480, + "button": 8007, + "buttons": 16188, + "butts": 25309, + "buu": 42313, + "buuren": 47752, + "buxton": 41370, + "buy": 11632, + "buy": 2131, + "buyer": 14682, + "buyers": 14663, + "buying": 6566, + "buys": 15560, + "buzz": 7866, + "buzz": 8706, + "buzzard": 47434, + "buzzer": 38064, + "buzzfeed": 26613, + "buzzing": 18511, + "bv": 18958, + "bv": 35861, + "bvb": 22454, + "bw": 17672, + "bw": 15120, + "bway": 26652, + "bwfc": 40918, + "bwo": 45902, + "bx": 33633, + "by": 1713, + "by": 638, + "bye": 20076, + "bye": 4460, + "byes": 47958, + "byl": 34994, + "byn": 46917, + "byn": 11890, + "byo": 28039, + "bypass": 26530, + "byr": 15534, + "byrd": 30369, + "byrne": 19676, + "byron": 43504, + "byron": 19775, + "bys": 26740, + "bystand": 46138, + "byte": 42798, + "bytes": 39538, + "bythe": 36621, + "byu": 41072, + "byu": 23770, + "byz": 35406, + "byzantine": 44081, + "bz": 13631, + "bé": 40365, + "bü": 38706, + "c": 66, + "c": 322, + "ca": 772, + "ca": 1684, + "caa": 19316, + "cab": 3033, + "cab": 11912, + "cabaret": 26263, + "cabbage": 18407, + "cabe": 32731, + "cabello": 34371, + "caber": 29062, + "cabernet": 33730, + "cabin": 14178, + "cabine": 23354, + "cabinet": 9937, + "cabinets": 33083, + "cabins": 48455, + "cable": 7925, + "cables": 22408, + "cabo": 37318, + "cabo": 28370, + "cabrera": 42338, + "cabs": 42048, + "cac": 8298, + "cac": 23872, + "cacao": 38022, + "cache": 28993, + "caching": 40655, + "cactus": 19794, + "cad": 6297, + "cad": 20166, + "caday": 34187, + "cadbury": 44698, + "caddy": 41521, + "cade": 10497, + "cade": 17306, + "cadet": 22764, + "cadets": 19160, + "cadillac": 18156, + "cae": 49264, + "caer": 28298, + "caes": 15740, + "caesar": 21642, + "caesars": 42162, + "caf": 3471, + "caf": 20867, + "cafc": 30748, + "cafe": 15201, + "cafe": 4979, + "cafes": 40166, + "cafeteria": 32817, + "caffe": 18258, + "caffe": 45416, + "caffeine": 22487, + "café": 15304, + "cag": 15714, + "cage": 11838, + "cages": 37939, + "cah": 40519, + "cahill": 33185, + "cai": 38971, + "cai": 36116, + "cain": 13747, + "caine": 16799, + "cair": 15804, + "cair": 46659, + "cairn": 31264, + "cairn": 42467, + "cairngor": 44067, + "cairns": 32941, + "cairo": 19615, + "cait": 14116, + "caitlin": 47768, + "caitlin": 26809, + "caitlyn": 35763, + "cajun": 43425, + "cajun": 33044, + "cak": 42986, + "cake": 15295, + "cake": 2972, + "cakeday": 46207, + "cakes": 5950, + "cal": 1198, + "cal": 6372, + "cala": 32133, + "calab": 31795, + "calais": 39886, + "calam": 28841, + "calc": 45055, + "calci": 22824, + "calcium": 27815, + "calcu": 15328, + "calcul": 15734, + "calculate": 37656, + "calculated": 40688, + "calculations": 44605, + "calculator": 26093, + "calculus": 35104, + "calcutta": 42901, + "calder": 29372, + "calder": 36817, + "caldwell": 30484, + "cale": 32674, + "caleb": 19619, + "caled": 28421, + "calend": 6057, + "calendar": 7122, + "calendars": 17229, + "calf": 17508, + "calgary": 27415, + "calgary": 10797, + "calhoun": 38929, + "cali": 2857, + "cali": 16337, + "caliber": 32820, + "calibr": 32597, + "calico": 45379, + "calif": 30839, + "califor": 3526, + "californi": 21303, + "california": 3729, + "call": 7950, + "call": 1620, + "calla": 20658, + "callahan": 43313, + "callaway": 42596, + "callback": 44764, + "calle": 47699, + "calle": 38144, + "called": 2726, + "caller": 30666, + "calli": 16338, + "callie": 36512, + "calligraphy": 27775, + "calling": 4597, + "callister": 49026, + "callme": 42449, + "callof": 41280, + "calls": 4572, + "callum": 23224, + "calm": 34990, + "calm": 7011, + "calming": 30690, + "calorie": 32679, + "calories": 18029, + "cals": 47714, + "calum": 16405, + "calvary": 40169, + "calvert": 47134, + "calves": 31857, + "calvin": 27642, + "calvin": 17345, + "caly": 10244, + "calyp": 29851, + "cam": 1004, + "cam": 5982, + "camar": 31991, + "camber": 44362, + "cambo": 14662, + "cambodia": 17347, + "cambridge": 24651, + "cambridge": 9334, + "cambridgeshire": 46139, + "camden": 38735, + "camden": 17984, + "came": 1986, + "camel": 27005, + "camel": 21914, + "camels": 41357, + "cameo": 19492, + "camer": 4961, + "camera": 3934, + "cameraman": 43347, + "cameras": 12172, + "camero": 20320, + "cameron": 19634, + "cameron": 8057, + "camerondallas": 40587, + "cameroon": 24061, + "camil": 37745, + "camila": 19919, + "camilla": 38897, + "camille": 26741, + "camino": 28529, + "camo": 28702, + "camo": 19716, + "camogie": 39547, + "camou": 23588, + "camoufla": 23667, + "camouflage": 29049, + "camp": 2854, + "camp": 2877, + "campa": 2793, + "campaig": 9448, + "campaign": 44524, + "campaign": 3193, + "campaigner": 46364, + "campaigners": 40272, + "campaigning": 19594, + "campaigns": 15669, + "campan": 31765, + "campbell": 29094, + "campbell": 8806, + "campe": 16672, + "campeon": 49109, + "campeones": 30105, + "camper": 41914, + "camper": 24522, + "campers": 26619, + "campfire": 32530, + "campground": 46969, + "camping": 9982, + "campo": 27600, + "campos": 48077, + "camps": 12806, + "campsite": 44243, + "campu": 19687, + "campus": 4560, + "campuses": 31895, + "camra": 46155, + "camry": 46472, + "cams": 32590, + "can": 950, + "can": 753, + "cana": 28341, + "canad": 13193, + "canada": 2698, + "canadaday": 39800, + "canadi": 4329, + "canadian": 22160, + "canadian": 5255, + "canadians": 18989, + "canadiens": 40932, + "canal": 28585, + "canal": 9535, + "canals": 38483, + "canaria": 47117, + "canary": 40409, + "canary": 24523, + "canberra": 16719, + "canc": 43189, + "cancel": 12026, + "cancel": 21546, + "canceled": 25874, + "cancell": 28027, + "cancellation": 38765, + "cancelled": 13270, + "cancels": 34089, + "cancer": 12690, + "cancer": 3148, + "cancers": 33201, + "cancun": 34721, + "cand": 4986, + "candace": 45623, + "candel": 47834, + "candi": 6034, + "candice": 30024, + "candid": 7884, + "candid": 19206, + "candidacy": 46248, + "candidate": 6475, + "candidates": 8619, + "candied": 43982, + "candies": 46305, + "candle": 18995, + "candle": 12674, + "candlelight": 34724, + "candles": 15472, + "candy": 20741, + "candy": 6417, + "cane": 23644, + "cane": 14716, + "canelo": 43210, + "canes": 21902, + "cani": 35592, + "canine": 27380, + "cann": 4139, + "cann": 23709, + "cannab": 7577, + "cannabis": 31837, + "cannabis": 8861, + "canne": 44252, + "canned": 27290, + "cannes": 13773, + "canni": 26389, + "canning": 38621, + "cannon": 28771, + "cannon": 15661, + "cannons": 46269, + "cannot": 4785, + "canny": 26986, + "cano": 31668, + "cano": 25937, + "canoe": 23503, + "canola": 40389, + "canon": 17749, + "canon": 9310, + "canopy": 26061, + "cans": 13707, + "cant": 13395, + "cant": 5784, + "canteen": 39230, + "canter": 19301, + "canterbury": 22271, + "canti": 42845, + "cantina": 47472, + "canton": 37735, + "canton": 25363, + "cantore": 41769, + "cantwait": 33760, + "canu": 20171, + "canucks": 24321, + "canv": 30714, + "canvas": 22441, + "canvas": 7483, + "canvass": 40054, + "canvassing": 33783, + "cany": 47674, + "canyon": 41246, + "canyon": 9755, + "cao": 29207, + "cap": 1289, + "cap": 3938, + "capabilities": 19512, + "capability": 25885, + "capable": 14742, + "capac": 24665, + "capacity": 8970, + "capcom": 28342, + "cape": 10288, + "cape": 6631, + "capecod": 41339, + "capes": 38785, + "capetown": 20059, + "capit": 6889, + "capita": 41833, + "capital": 11198, + "capital": 5439, + "capitalism": 20068, + "capitalist": 37015, + "capitals": 29579, + "capitol": 43880, + "capitol": 11375, + "capo": 45477, + "capp": 16718, + "capped": 24659, + "capping": 42656, + "cappuccino": 37402, + "capri": 48699, + "capri": 30982, + "capric": 28667, + "capricorn": 46314, + "caps": 23185, + "capsu": 15608, + "capsul": 40341, + "capsule": 20627, + "capsules": 32870, + "capt": 45815, + "capt": 17369, + "captain": 14958, + "captain": 4621, + "captainamerica": 46229, + "captainmarvel": 48492, + "captains": 18706, + "caption": 11327, + "captions": 41878, + "captiv": 19776, + "captivating": 30580, + "captive": 29038, + "captivity": 41141, + "capture": 8818, + "captured": 8020, + "captures": 15305, + "capturing": 19548, + "capu": 44241, + "car": 811, + "car": 1615, + "cara": 20016, + "carab": 32251, + "carac": 30029, + "caracas": 45854, + "caramel": 14788, + "carameli": 41739, + "caramelized": 43854, + "carat": 32981, + "carav": 13814, + "caravan": 18566, + "carb": 21379, + "carbo": 43235, + "carbon": 14038, + "carbon": 7549, + "carbs": 29313, + "carcin": 31587, + "carcinoma": 46810, + "card": 10793, + "card": 2601, + "cardam": 49008, + "cardboard": 19845, + "cardi": 6211, + "cardi": 29677, + "cardiac": 21256, + "cardiff": 22488, + "cardiff": 9781, + "cardigan": 30501, + "cardin": 8457, + "cardinal": 46310, + "cardinal": 16472, + "cardinals": 12837, + "cardio": 15003, + "cardio": 23455, + "cardiology": 37276, + "cardiovascular": 29291, + "cardo": 40625, + "cards": 4094, + "care": 2050, + "care": 1776, + "cared": 27675, + "career": 20609, + "career": 3061, + "careers": 10090, + "careful": 11999, + "carefully": 15789, + "caregi": 22042, + "caregiver": 46372, + "caregivers": 35909, + "careless": 47325, + "carers": 26484, + "cares": 10968, + "caretaker": 48037, + "carey": 14895, + "cargo": 12490, + "cari": 18497, + "cari": 37273, + "carib": 9757, + "caribbean": 10368, + "caribou": 42135, + "caric": 25337, + "caricature": 38857, + "carina": 44357, + "caring": 13083, + "carl": 8273, + "carl": 9482, + "carla": 25552, + "carleton": 46496, + "carlin": 47559, + "carlisle": 23276, + "carlo": 17861, + "carlo": 15266, + "carlos": 9538, + "carlow": 44745, + "carls": 39635, + "carlson": 24114, + "carlton": 18934, + "carly": 23166, + "carly": 22689, + "carlyle": 46555, + "carmel": 30757, + "carmel": 25601, + "carmen": 41427, + "carmen": 18834, + "carmichael": 41657, + "carn": 21597, + "carnage": 31385, + "carnation": 44577, + "carnaval": 47238, + "carne": 17053, + "carne": 42885, + "carnegie": 25287, + "carney": 34194, + "carni": 8438, + "carnival": 36708, + "carnival": 10577, + "caro": 30317, + "caro": 29344, + "carol": 4242, + "carol": 11489, + "carole": 31955, + "carolin": 26418, + "carolina": 7027, + "caroline": 31064, + "caroline": 12641, + "carols": 33269, + "carolyn": 25825, + "carou": 32224, + "carousel": 36665, + "carp": 26085, + "carpen": 15584, + "carpenter": 18475, + "carpet": 6922, + "carpets": 34612, + "carr": 26951, + "carr": 17136, + "carra": 32332, + "carre": 31114, + "carrera": 32952, + "carri": 4739, + "carriage": 47885, + "carriage": 21087, + "carrick": 44052, + "carrie": 30334, + "carrie": 15848, + "carried": 12960, + "carrier": 12308, + "carriers": 26865, + "carries": 17982, + "carrieunderwood": 47338, + "carrington": 48759, + "carroll": 41911, + "carroll": 14893, + "carrot": 15435, + "carrots": 19299, + "carry": 31863, + "carry": 6998, + "carrying": 9920, + "cars": 3346, + "carsforsale": 45222, + "carson": 41766, + "carson": 13171, + "cart": 27705, + "cart": 13065, + "cartag": 45042, + "cartagena": 47157, + "carte": 44949, + "cartel": 30529, + "carter": 27330, + "carter": 7260, + "cartier": 32951, + "carto": 5487, + "carton": 41812, + "cartoon": 33082, + "cartoon": 7651, + "cartoonist": 30793, + "cartoons": 17673, + "cartri": 47084, + "cartridge": 29432, + "cartridges": 49249, + "carts": 27581, + "cartunesapp": 32888, + "caruso": 45192, + "carve": 40152, + "carved": 15127, + "carver": 28850, + "carving": 19428, + "carvings": 48123, + "cary": 22844, + "cas": 1671, + "cas": 13831, + "casa": 14643, + "casablanc": 36572, + "casablanca": 41950, + "casc": 36714, + "casca": 43296, + "cascade": 29065, + "cascades": 46454, + "case": 17698, + "case": 2068, + "cases": 6888, + "casey": 24899, + "casey": 12836, + "cash": 11050, + "cash": 5131, + "cashback": 36368, + "cashe": 32233, + "cashew": 39531, + "cashi": 29517, + "cashier": 34547, + "cashmere": 34566, + "casi": 38350, + "casino": 10473, + "casio": 32261, + "cask": 26299, + "casm": 35198, + "casper": 35892, + "cass": 22556, + "cassandra": 35289, + "casser": 31093, + "casserole": 36045, + "cassette": 19717, + "cassi": 14942, + "cassidy": 21757, + "cassie": 29323, + "cassini": 46554, + "cast": 2509, + "cast": 1970, + "caste": 32693, + "casted": 33838, + "castel": 43306, + "castell": 31792, + "caster": 32101, + "caster": 8449, + "casters": 29721, + "castic": 47737, + "castillo": 30813, + "casting": 7087, + "castle": 12496, + "castle": 3540, + "castles": 24766, + "castro": 16950, + "casts": 10595, + "casu": 15345, + "casual": 10129, + "casually": 18840, + "casualties": 30244, + "casualty": 31222, + "cat": 1481, + "cat": 2368, + "cata": 42279, + "catal": 12792, + "catalan": 30532, + "catalina": 36576, + "catalo": 34740, + "catalog": 20036, + "catalogue": 20985, + "catalonia": 27039, + "catalunya": 44132, + "cataly": 15894, + "catalyst": 25387, + "catan": 45893, + "catap": 39514, + "catar": 35801, + "catastro": 22736, + "catastrophe": 41422, + "catastrophic": 34448, + "catch": 18901, + "catch": 3042, + "catcher": 15965, + "catchers": 39060, + "catches": 17213, + "catching": 8617, + "catchy": 37114, + "catday": 32243, + "cate": 6357, + "cate": 24510, + "cated": 31823, + "categor": 17006, + "categori": 40117, + "categories": 19971, + "category": 9432, + "cater": 16634, + "cater": 38101, + "catering": 16697, + "caterpillar": 27111, + "catfish": 26077, + "cath": 9196, + "cath": 30811, + "cathar": 43784, + "cathe": 7174, + "cathedr": 46370, + "cathedral": 7865, + "catherine": 35035, + "catherine": 12339, + "catho": 7595, + "cathol": 16315, + "catholic": 20382, + "catholic": 7757, + "catholics": 36808, + "cathy": 40326, + "cathy": 22731, + "cation": 21367, + "cato": 33558, + "cats": 38800, + "cats": 3989, + "catsofinstagram": 39901, + "catsoftwitter": 17273, + "catt": 37339, + "cattle": 48799, + "cattle": 13644, + "caturday": 20892, + "catwalk": 36565, + "catwoman": 47251, + "cau": 1121, + "cau": 45529, + "caucus": 18847, + "caught": 4520, + "caul": 23460, + "cauley": 41682, + "caulfield": 44906, + "cauli": 20123, + "cauliflower": 23802, + "cause": 18982, + "cause": 1394, + "caused": 8940, + "causes": 9775, + "causeway": 35034, + "causing": 10779, + "caution": 15656, + "cautious": 36579, + "cav": 4942, + "cav": 45935, + "cava": 48682, + "caval": 24537, + "cavali": 20783, + "cavalier": 44488, + "cavaliers": 30194, + "cavalry": 32467, + "cave": 25441, + "cave": 9654, + "cavendish": 42945, + "caver": 41487, + "caves": 22096, + "cavi": 27360, + "caviar": 31228, + "cavill": 40492, + "cavity": 43156, + "cavs": 16800, + "caw": 38405, + "caw": 43804, + "cawx": 26739, + "cay": 11876, + "cay": 37399, + "cayenne": 43650, + "cayman": 33737, + "caz": 48451, + "cb": 4034, + "cb": 8830, + "cba": 38472, + "cbb": 31487, + "cbc": 14096, + "cbc": 14523, + "cbd": 13176, + "cbe": 43639, + "cbi": 30875, + "cbj": 35608, + "cbn": 26579, + "cbp": 46723, + "cbr": 28762, + "cbs": 16788, + "cbs": 8009, + "cc": 2976, + "cc": 2021, + "cca": 17987, + "ccc": 21856, + "ccd": 48556, + "ccg": 37755, + "cch": 21789, + "cchini": 28467, + "cci": 32942, + "cci": 8196, + "ccl": 43773, + "ccm": 40435, + "cco": 28786, + "ccot": 24950, + "ccp": 43045, + "ccs": 30400, + "cctv": 23097, + "ccu": 49023, + "cd": 4308, + "cd": 4480, + "cda": 45565, + "cdc": 41098, + "cdc": 25779, + "cdn": 8886, + "cdn": 26802, + "cdnpoli": 11645, + "cdo": 47187, + "cdp": 39624, + "cds": 20784, + "cdt": 18455, + "ce": 685, + "ce": 629, + "cea": 28355, + "cean": 34409, + "cean": 37295, + "cease": 32856, + "cease": 25499, + "ceasefire": 38291, + "cebu": 20146, + "cec": 29694, + "cec": 40029, + "cecil": 26987, + "cecil": 27169, + "cecilia": 35440, + "ced": 25634, + "ced": 2323, + "cedar": 24167, + "cedar": 13799, + "cedric": 36608, + "cee": 45966, + "cee": 15015, + "cees": 47914, + "ceil": 27275, + "ceiling": 12374, + "ceilings": 33770, + "cek": 45544, + "cel": 2269, + "cel": 7597, + "cele": 1314, + "celeb": 38862, + "celeb": 19393, + "celebr": 1372, + "celebrate": 31414, + "celebrate": 2694, + "celebrated": 9184, + "celebrates": 7564, + "celebrating": 3382, + "celebration": 4615, + "celebrations": 10825, + "celebratory": 34115, + "celebrities": 17071, + "celebrity": 23981, + "celebrity": 7320, + "celebs": 19803, + "celed": 25741, + "celer": 9621, + "celery": 30990, + "celeste": 29364, + "celesti": 29497, + "celestial": 32669, + "celi": 25567, + "celia": 44489, + "celine": 33644, + "cell": 9316, + "cell": 5533, + "cellar": 24282, + "cellars": 44976, + "cellence": 34687, + "cello": 23013, + "cellphone": 39029, + "cells": 8890, + "cellu": 16791, + "cellular": 23268, + "cels": 24021, + "celsius": 47057, + "celtic": 21897, + "celtic": 10523, + "celticfc": 38612, + "celtics": 16226, + "cem": 41435, + "ceme": 10517, + "cement": 4369, + "cements": 19448, + "cemetery": 11660, + "cen": 1306, + "cen": 30106, + "cena": 21591, + "cence": 24410, + "cency": 41259, + "cene": 30038, + "censor": 24230, + "censor": 44709, + "censored": 30951, + "censorship": 27284, + "census": 23677, + "cent": 1784, + "cent": 3662, + "centenary": 22422, + "centennial": 20895, + "center": 16651, + "center": 2119, + "centered": 24584, + "centers": 14494, + "centi": 48889, + "centime": 48687, + "centr": 2370, + "central": 13448, + "central": 3339, + "centre": 26310, + "centre": 2916, + "centred": 47925, + "centres": 19354, + "centri": 30872, + "centric": 19297, + "centro": 37178, + "cents": 11934, + "centu": 16818, + "centuri": 36816, + "centuries": 19014, + "century": 26134, + "century": 4275, + "ceo": 46340, + "ceo": 3559, + "ceos": 28332, + "cep": 2632, + "cep": 48714, + "ceph": 44343, + "cept": 3678, + "ception": 12346, + "cer": 1364, + "cer": 1925, + "cera": 34608, + "ceram": 10677, + "ceramic": 15112, + "ceramics": 22438, + "cere": 3984, + "cere": 22085, + "cereal": 17581, + "cereals": 48618, + "cerebral": 39073, + "ceremon": 15796, + "ceremonial": 33281, + "ceremonies": 21547, + "ceremony": 5193, + "cern": 44851, + "cers": 13638, + "cert": 27522, + "certain": 8526, + "certain": 7883, + "certainly": 10883, + "certainty": 20054, + "certi": 4888, + "certific": 9443, + "certificate": 11786, + "certificates": 25281, + "certification": 14735, + "certified": 9288, + "cerv": 25738, + "cervical": 35953, + "ces": 28715, + "ces": 1604, + "cesar": 37025, + "cesar": 28603, + "cess": 2314, + "cess": 1554, + "cessna": 36596, + "cest": 27245, + "cester": 15769, + "cester": 12718, + "cet": 14960, + "cett": 46708, + "ceu": 37457, + "cevic": 48369, + "cey": 20971, + "cf": 10189, + "cf": 11171, + "cfa": 34521, + "cfb": 32931, + "cfc": 11577, + "cfd": 46171, + "cfl": 46320, + "cfl": 22332, + "cfo": 26937, + "cfp": 40756, + "cfr": 44033, + "cfs": 32835, + "cg": 27118, + "cg": 14740, + "cgc": 38775, + "cgi": 30520, + "ch": 540, + "ch": 634, + "cha": 1587, + "cha": 4541, + "chab": 26670, + "chad": 13095, + "chad": 12923, + "chae": 9460, + "chaf": 38123, + "chag": 27989, + "chai": 31590, + "chai": 18919, + "chain": 13898, + "chain": 3946, + "chained": 34402, + "chains": 14438, + "chainsaw": 37617, + "chainz": 39687, + "chair": 4728, + "chair": 4269, + "chaired": 31664, + "chairing": 42205, + "chairman": 6901, + "chairperson": 31584, + "chairs": 12033, + "chak": 13702, + "chak": 41713, + "chakra": 38304, + "chakra": 33241, + "chal": 7397, + "chal": 30809, + "chale": 38099, + "chalet": 37907, + "chalk": 31362, + "chalk": 17846, + "chall": 2073, + "challeng": 4138, + "challenge": 29462, + "challenge": 2836, + "challenged": 17380, + "challenger": 18228, + "challengers": 46404, + "challenges": 6280, + "challenging": 11754, + "chalmers": 47955, + "cham": 1290, + "cham": 19951, + "chamber": 18983, + "chamber": 7642, + "chamberlain": 32756, + "chambers": 16501, + "chamele": 34759, + "chameleon": 41317, + "champ": 36813, + "champ": 6602, + "champag": 10283, + "champagne": 11007, + "champi": 1680, + "champion": 2643, + "champion": 3950, + "champions": 4227, + "championship": 3429, + "championships": 7047, + "championsleague": 27638, + "champs": 6240, + "chan": 1255, + "chan": 6704, + "chana": 48752, + "chanc": 13931, + "chance": 32940, + "chance": 2594, + "chancellor": 15886, + "chances": 10870, + "chand": 7126, + "chand": 41508, + "chandelier": 30570, + "chandi": 12482, + "chandigarh": 34106, + "chandler": 17595, + "chandra": 27082, + "chandra": 25348, + "chanel": 16951, + "chang": 2233, + "chang": 16461, + "change": 11608, + "change": 1799, + "changeable": 41335, + "changed": 4907, + "changer": 18406, + "changers": 35185, + "changes": 4938, + "changing": 40384, + "changing": 5621, + "changmin": 47410, + "chann": 8804, + "channel": 25837, + "channel": 3847, + "channeling": 28197, + "channels": 13961, + "channing": 37417, + "chant": 18165, + "chant": 13521, + "chanting": 32111, + "chants": 22723, + "chanyeol": 18805, + "chao": 31815, + "chaos": 10853, + "chaotic": 33501, + "chap": 3825, + "chap": 21939, + "chapel": 40859, + "chapel": 10137, + "chaplain": 38348, + "chaplin": 32545, + "chapman": 17968, + "chapp": 20634, + "chaps": 36823, + "chapter": 6014, + "chapters": 22936, + "char": 1054, + "char": 16017, + "chara": 35668, + "charac": 2792, + "character": 10997, + "character": 4009, + "characterdesign": 38149, + "characteri": 20920, + "characteristic": 44747, + "characteristics": 26037, + "characters": 6564, + "charan": 31851, + "charcoal": 19268, + "chard": 17524, + "chardon": 26599, + "chardonnay": 28161, + "charge": 25032, + "charge": 5948, + "chargeable": 35664, + "charged": 7916, + "charger": 13090, + "chargers": 17352, + "charges": 8962, + "charging": 12514, + "chariot": 38811, + "charis": 24449, + "charisma": 45041, + "charismatic": 37205, + "charitable": 23256, + "charities": 18493, + "charity": 20008, + "charity": 4607, + "charitytuesday": 42794, + "charl": 47736, + "charle": 10217, + "charles": 27983, + "charles": 5127, + "charleston": 15478, + "charley": 38027, + "charli": 21784, + "charli": 49392, + "charlie": 16764, + "charlie": 6393, + "charlotte": 18445, + "charlotte": 7871, + "charlottesville": 32027, + "charlton": 27048, + "charm": 10876, + "charmed": 39790, + "charming": 12177, + "charms": 21944, + "charred": 44085, + "chart": 42685, + "chart": 5053, + "charted": 27939, + "charter": 42345, + "charter": 13569, + "chartered": 31298, + "charters": 46626, + "charting": 39841, + "charts": 10728, + "chas": 10717, + "chas": 29838, + "chase": 21503, + "chase": 3859, + "chased": 30342, + "chaser": 29560, + "chasers": 34158, + "chases": 45011, + "chasing": 46909, + "chasing": 13376, + "chassis": 29188, + "chast": 42176, + "chasu": 41352, + "chat": 5355, + "chat": 2402, + "chatbots": 43994, + "chate": 30377, + "chateau": 44582, + "chateau": 23520, + "chath": 46849, + "chatham": 32030, + "chats": 13263, + "chatt": 21618, + "chattanoo": 28009, + "chattanooga": 29866, + "chatted": 34124, + "chatter": 33473, + "chatter": 41103, + "chatting": 12401, + "chatur": 33839, + "chau": 11263, + "chau": 37536, + "chauffe": 45440, + "chauhan": 46663, + "chav": 28997, + "chavez": 27480, + "chaw": 39639, + "chay": 45317, + "chaz": 47815, + "chc": 36233, + "chd": 41645, + "che": 983, + "che": 3842, + "chea": 39580, + "chead": 48358, + "cheap": 27036, + "cheap": 8678, + "cheape": 26164, + "cheaper": 17776, + "cheapest": 26640, + "cheat": 18180, + "cheated": 34285, + "cheating": 19722, + "chec": 1113, + "check": 7672, + "check": 1217, + "checked": 10387, + "checker": 45883, + "checkers": 48181, + "checking": 7441, + "checklist": 26989, + "checkout": 13101, + "checkpoint": 27531, + "checks": 13737, + "ched": 11341, + "ched": 2146, + "cheddar": 20551, + "chee": 5326, + "chee": 20944, + "cheek": 40000, + "cheek": 21227, + "cheeks": 23019, + "cheeky": 15068, + "cheer": 9733, + "cheer": 6918, + "cheered": 38111, + "cheerful": 28882, + "cheering": 14289, + "cheerleader": 29072, + "cheerleaders": 22343, + "cheerleading": 36366, + "cheers": 6562, + "chees": 15182, + "cheese": 10738, + "cheese": 4108, + "cheeseburger": 41200, + "cheesecake": 17803, + "cheeses": 36076, + "cheesy": 22093, + "cheetah": 27431, + "chef": 12137, + "chef": 4895, + "chefs": 14486, + "chek": 43745, + "chel": 3084, + "chel": 25970, + "chell": 46854, + "chelle": 30141, + "chelms": 34936, + "chelmsford": 39890, + "chelse": 19071, + "chelsea": 6031, + "chelseafc": 25927, + "chelten": 18889, + "cheltenham": 21589, + "chem": 5667, + "chem": 13698, + "chemi": 7179, + "chemical": 39376, + "chemical": 9208, + "chemicals": 17426, + "chemist": 23138, + "chemistry": 8841, + "chemo": 33095, + "chemo": 36348, + "chemotherapy": 41412, + "chemtrails": 46015, + "chen": 5907, + "chen": 8983, + "cheney": 43522, + "cheng": 32512, + "cheng": 30190, + "chenko": 29073, + "chennai": 28948, + "chennai": 12791, + "cheon": 11498, + "cheque": 28168, + "cher": 3597, + "cher": 3466, + "cheri": 26471, + "cherish": 20053, + "cherished": 42325, + "cherno": 35376, + "chernobyl": 40554, + "chero": 19844, + "cherokee": 22860, + "cherries": 27248, + "cherry": 21470, + "cherry": 7325, + "chers": 5789, + "chery": 38478, + "cheryl": 37784, + "cheryl": 20600, + "ches": 18346, + "ches": 1910, + "chesa": 28349, + "chesapeake": 32909, + "cheshire": 17130, + "chesney": 48747, + "chess": 27170, + "chess": 8397, + "chest": 18217, + "chest": 10563, + "chester": 10466, + "chester": 3343, + "chesterfield": 32975, + "chestnut": 21834, + "chet": 9663, + "chett": 24695, + "chev": 7152, + "chev": 41145, + "chevro": 12850, + "chevrolet": 13240, + "chevron": 33792, + "chevy": 16581, + "chew": 32645, + "chew": 22642, + "chewan": 23689, + "chewbacca": 49355, + "chewing": 31486, + "chewy": 42940, + "chey": 26968, + "chey": 31208, + "cheyenne": 34805, + "chez": 49183, + "chez": 10556, + "chf": 33021, + "chfield": 41619, + "chhat": 34127, + "chhattisgarh": 44246, + "chi": 1337, + "chi": 4039, + "chia": 19147, + "chiang": 33764, + "chibi": 22306, + "chic": 2627, + "chic": 9091, + "chica": 44190, + "chicag": 16778, + "chicago": 15038, + "chicago": 3530, + "chicagof": 40638, + "chicagofire": 46576, + "chicas": 40664, + "chichester": 43823, + "chick": 3170, + "chick": 11238, + "chicken": 26322, + "chicken": 3717, + "chickens": 21658, + "chickpea": 48109, + "chicks": 17810, + "chico": 30379, + "chie": 40046, + "chie": 12388, + "chief": 16830, + "chief": 3455, + "chiefs": 11419, + "chiev": 47761, + "chiff": 27407, + "chiffon": 31817, + "chig": 42952, + "chihu": 22857, + "chihuahu": 25437, + "chihuahua": 30181, + "chik": 45455, + "chil": 1333, + "child": 4392, + "child": 2913, + "childcare": 31133, + "childhood": 34772, + "childhood": 7551, + "childish": 31939, + "childre": 2135, + "children": 11101, + "children": 2153, + "childrens": 31551, + "childrens": 21553, + "childs": 39521, + "chile": 10022, + "chilean": 33186, + "chili": 13033, + "chill": 6498, + "chill": 6382, + "chilled": 23540, + "chillen": 45160, + "chilli": 26787, + "chilli": 17067, + "chillin": 10347, + "chilling": 10179, + "chillout": 39842, + "chills": 25460, + "chilly": 14450, + "chim": 10543, + "chimney": 26821, + "chimp": 44374, + "chin": 6555, + "chin": 8979, + "china": 38943, + "china": 2817, + "chinatown": 28582, + "chine": 4013, + "chinese": 30568, + "chinese": 4271, + "ching": 34621, + "ching": 1439, + "chino": 47181, + "chino": 27440, + "chinook": 41577, + "chinson": 33786, + "chio": 19650, + "chip": 19271, + "chip": 8730, + "chipmun": 46384, + "chipot": 17702, + "chipotle": 19284, + "chipp": 39854, + "chippe": 46541, + "chipped": 39892, + "chipping": 40323, + "chips": 8855, + "chir": 15564, + "chiro": 23413, + "chiroprac": 25987, + "chiropractic": 34437, + "chis": 19920, + "chistan": 20523, + "chiswick": 47290, + "chit": 13515, + "chit": 45626, + "chita": 49184, + "chitec": 39862, + "chive": 29222, + "chives": 34921, + "chk": 47424, + "chl": 38592, + "chley": 47748, + "chlo": 10374, + "chloe": 39966, + "chloe": 13992, + "chlor": 23135, + "chman": 35835, + "chment": 20848, + "chner": 48277, + "cho": 1327, + "cho": 5150, + "choa": 43077, + "choc": 32772, + "choc": 21983, + "choco": 46285, + "choco": 32692, + "chocol": 3443, + "chocolat": 44631, + "chocolate": 29389, + "chocolate": 3820, + "chocolates": 24120, + "choi": 23749, + "choic": 35606, + "choice": 23857, + "choice": 4051, + "choices": 11016, + "choir": 9214, + "choirs": 43277, + "choke": 30231, + "choked": 43521, + "choker": 39642, + "choking": 39993, + "chol": 19802, + "cholera": 45999, + "cholester": 26861, + "cholesterol": 27982, + "chom": 25151, + "chon": 20416, + "chon": 21601, + "chondri": 37379, + "chong": 26220, + "choo": 3869, + "choo": 24437, + "chool": 29578, + "chools": 41958, + "choose": 22756, + "choose": 5073, + "chooses": 29923, + "choosing": 13475, + "chop": 10458, + "chop": 16663, + "chopin": 42256, + "chopped": 22580, + "chopper": 24011, + "chopping": 35375, + "chopra": 24258, + "chops": 26321, + "chor": 7567, + "chor": 47795, + "choral": 26684, + "chord": 33005, + "chords": 36152, + "choreo": 17443, + "choreographer": 35952, + "choreography": 32749, + "chores": 40483, + "chori": 25718, + "chorizo": 30802, + "chorus": 20869, + "chos": 26559, + "chose": 11090, + "chosen": 10044, + "chou": 16960, + "chou": 42917, + "choudhary": 45503, + "chow": 20257, + "chow": 21657, + "chowder": 37886, + "chp": 35896, + "chr": 36918, + "chri": 1135, + "chris": 9907, + "chris": 2978, + "chrisbrown": 41035, + "chriss": 46745, + "chrissy": 44762, + "chrissy": 40485, + "christ": 1403, + "christ": 6703, + "christchurch": 27100, + "christen": 31956, + "christensen": 42226, + "christi": 3328, + "christi": 33213, + "christian": 11792, + "christian": 4729, + "christianity": 20000, + "christians": 14842, + "christie": 16084, + "christin": 30189, + "christina": 15925, + "christine": 42610, + "christine": 14712, + "christma": 12039, + "christmas": 18174, + "christmas": 1677, + "christmaseve": 44381, + "christmass": 44873, + "christop": 7917, + "christoph": 47844, + "christophe": 45486, + "christopher": 33349, + "christopher": 9630, + "christy": 28331, + "chro": 13207, + "chromatic": 44207, + "chrome": 24843, + "chrome": 9529, + "chromo": 35809, + "chron": 5577, + "chron": 39781, + "chronic": 10115, + "chronic": 13677, + "chronicle": 20034, + "chronicles": 18905, + "chrono": 29387, + "chronograph": 38397, + "chry": 13508, + "chrysler": 20078, + "chs": 40277, + "chs": 8391, + "chsnews": 44919, + "cht": 11384, + "chter": 47811, + "chu": 3799, + "chu": 13622, + "chubby": 29109, + "chuck": 13211, + "chuck": 9894, + "chuckle": 35733, + "chucky": 42026, + "chuffed": 27233, + "chuk": 25878, + "chuk": 27221, + "chul": 33001, + "chum": 46869, + "chum": 41767, + "chun": 14693, + "chun": 25391, + "chung": 28418, + "chunk": 30275, + "chunks": 45538, + "chunky": 27978, + "chups": 46331, + "chur": 2309, + "church": 14956, + "church": 2735, + "churches": 15539, + "churchill": 17527, + "chus": 36246, + "chut": 28788, + "chutney": 36261, + "chy": 15131, + "chy": 8096, + "chyna": 43398, + "châ": 48669, + "ci": 698, + "ci": 5798, + "cia": 4019, + "cial": 1143, + "cian": 32323, + "ciao": 37677, + "ciara": 31369, + "cible": 28873, + "cic": 14539, + "cic": 21517, + "cid": 27359, + "cide": 34178, + "cider": 13547, + "cides": 41326, + "cie": 19730, + "cier": 24067, + "cies": 6785, + "cif": 35698, + "cigar": 26031, + "cigar": 16525, + "cigare": 13044, + "cigarette": 18548, + "cigarettes": 22750, + "cigars": 20750, + "cii": 42408, + "cil": 9217, + "cil": 2998, + "cilan": 33998, + "cilantro": 34568, + "cili": 18977, + "ciliation": 25294, + "cim": 30021, + "cin": 2396, + "cin": 25367, + "cina": 39467, + "cincin": 13291, + "cincinnati": 14197, + "cinco": 25131, + "cincode": 40930, + "cincodemayo": 42542, + "cincy": 30015, + "cincy": 30286, + "cinde": 20660, + "cinderella": 21515, + "cindy": 34439, + "cindy": 18532, + "cine": 4015, + "cine": 27451, + "cinema": 38251, + "cinema": 6443, + "cinemas": 14845, + "cinematic": 25602, + "cinemato": 21919, + "cinematographer": 39059, + "cinematography": 33802, + "ciner": 39882, + "cing": 4014, + "cini": 25699, + "cinnam": 12768, + "cinnamon": 13460, + "cino": 18616, + "cio": 44584, + "cio": 9954, + "cion": 22024, + "ciones": 37155, + "cious": 38466, + "cip": 32884, + "cir": 2459, + "cir": 41135, + "circa": 10411, + "circle": 33574, + "circle": 7117, + "circles": 19411, + "circling": 46036, + "circu": 5143, + "circuit": 35583, + "circuit": 9801, + "circuits": 33260, + "circul": 16618, + "circular": 19733, + "circulare": 39525, + "circulareconomy": 39878, + "circulated": 46258, + "circulating": 42980, + "circulation": 27880, + "circum": 13406, + "circumstances": 18786, + "circus": 11833, + "cirque": 36049, + "cis": 9459, + "cis": 23513, + "cisco": 36689, + "cisco": 19290, + "cise": 19657, + "cisely": 33434, + "cision": 41957, + "cism": 24166, + "cist": 40906, + "cit": 4420, + "cit": 31294, + "citadel": 38036, + "citation": 33581, + "cite": 32641, + "cited": 25069, + "cites": 34490, + "citi": 4280, + "citi": 30270, + "cities": 5441, + "citing": 29088, + "citiz": 5816, + "citizen": 11720, + "citizen": 9814, + "citizens": 7949, + "citizenship": 17386, + "cito": 42636, + "citro": 27941, + "citroen": 35805, + "citrus": 17379, + "city": 5002, + "city": 1305, + "cityfc": 28751, + "cityo": 25709, + "cityof": 11595, + "cityscape": 40808, + "ciu": 39693, + "cius": 42559, + "civ": 40039, + "civic": 32240, + "civic": 11888, + "civil": 6923, + "civil": 6450, + "civilian": 21187, + "civilians": 18076, + "civilization": 22503, + "civilwar": 34524, + "ción": 44700, + "cj": 15238, + "cj": 15205, + "ck": 916, + "ck": 868, + "cke": 25224, + "cke": 40989, + "cked": 3441, + "cken": 25566, + "cker": 15509, + "cker": 4744, + "ckers": 37073, + "cket": 5525, + "ckett": 33899, + "ckey": 15029, + "ckey": 3657, + "cki": 36916, + "cki": 41055, + "cking": 4805, + "cko": 28818, + "cks": 2031, + "cky": 26229, + "cky": 3083, + "cl": 969, + "cl": 6482, + "cla": 940, + "cla": 20636, + "clad": 31606, + "cladding": 46411, + "clai": 29459, + "claim": 4290, + "claim": 6607, + "claimed": 9010, + "claiming": 15286, + "claims": 6852, + "clair": 31441, + "clair": 14039, + "claire": 20410, + "claire": 10460, + "clam": 13588, + "clam": 32598, + "clamation": 21793, + "clamp": 41501, + "clams": 38849, + "clan": 29252, + "clan": 14114, + "clancy": 37227, + "clans": 38279, + "clap": 30037, + "clap": 25546, + "clapham": 43619, + "clapton": 37683, + "clar": 3617, + "clara": 19468, + "clare": 18948, + "clare": 15927, + "claremont": 47789, + "clarence": 29320, + "clari": 15175, + "clarify": 37004, + "clarinet": 41178, + "clarity": 21323, + "clark": 13340, + "clark": 7521, + "clarke": 11548, + "clarkson": 25706, + "clas": 32003, + "clash": 38367, + "clash": 9359, + "clashes": 25193, + "clasico": 43567, + "class": 2876, + "class": 1874, + "classes": 6919, + "classi": 2507, + "classic": 9353, + "classic": 2713, + "classical": 22179, + "classical": 11355, + "classicalmusic": 27806, + "classiccar": 46906, + "classiccars": 21064, + "classics": 10634, + "classification": 26612, + "classified": 22056, + "classmate": 37090, + "classmates": 30062, + "classof": 25345, + "classroom": 9001, + "classrooms": 25768, + "classy": 11615, + "clau": 7526, + "claude": 17461, + "claudi": 39439, + "claudia": 21893, + "claudio": 31230, + "claus": 23317, + "clause": 26151, + "clave": 24111, + "claw": 49230, + "claw": 19106, + "claws": 29161, + "clay": 10402, + "clay": 8823, + "clays": 26128, + "clayton": 46445, + "clayton": 19413, + "clc": 31380, + "cle": 1321, + "cle": 2537, + "clean": 3572, + "clean": 3772, + "cleaned": 17468, + "cleanenergy": 43538, + "cleaner": 15619, + "cleaners": 33258, + "cleaning": 7210, + "cleanliness": 47886, + "cleans": 40827, + "cleanse": 28717, + "cleanser": 44170, + "cleansing": 25931, + "cleanup": 22353, + "clear": 4631, + "clear": 3143, + "clearance": 17959, + "cleared": 14880, + "clearer": 37031, + "clearing": 15481, + "clearly": 7767, + "clears": 29092, + "clearwater": 32124, + "cleary": 44342, + "cleats": 33486, + "cleavage": 44165, + "cled": 12827, + "clegg": 42915, + "clemens": 45896, + "clement": 22592, + "clement": 24714, + "clemente": 42461, + "clementine": 47112, + "clements": 49175, + "clemson": 38170, + "clemson": 19537, + "clen": 35547, + "cleo": 40344, + "cleop": 36287, + "cleopatra": 41212, + "cler": 11828, + "clergy": 42635, + "cleric": 43748, + "clerk": 22230, + "clermont": 47529, + "cles": 8077, + "cleve": 37599, + "clevel": 7701, + "cleveland": 30716, + "cleveland": 8430, + "clever": 30977, + "clever": 13385, + "clg": 47546, + "cli": 1503, + "clich": 44407, + "click": 16676, + "click": 3585, + "clicked": 29015, + "clicking": 26542, + "clicks": 31250, + "client": 48528, + "client": 7467, + "clients": 8114, + "clif": 13182, + "cliff": 23827, + "cliff": 10625, + "cliffe": 15170, + "clifford": 24226, + "cliffs": 20953, + "clifton": 23878, + "climat": 37283, + "climate": 7854, + "climate": 4589, + "climateaction": 31622, + "climatechange": 11055, + "climates": 46022, + "climax": 37033, + "climb": 7421, + "climb": 10649, + "climbed": 22528, + "climber": 36910, + "climbers": 47648, + "climbing": 9877, + "climbs": 29098, + "clin": 2879, + "clinch": 30404, + "clinched": 44064, + "cline": 37460, + "cling": 37068, + "cling": 4760, + "clinic": 7926, + "clinical": 35133, + "clinical": 9148, + "clinicians": 45866, + "clinics": 23330, + "clint": 37542, + "clint": 21160, + "clinton": 34403, + "clinton": 5820, + "clio": 46889, + "clip": 39712, + "clip": 9289, + "clipped": 45524, + "clipper": 42245, + "clippers": 23319, + "clipping": 47484, + "clips": 16594, + "clique": 34983, + "clive": 36086, + "clive": 21509, + "cll": 46091, + "cllr": 45743, + "cllr": 23034, + "clo": 1194, + "cloak": 36528, + "clock": 19878, + "clock": 6716, + "clocked": 49049, + "clocks": 25895, + "clockwise": 46150, + "clockwork": 42297, + "clon": 24477, + "clone": 22854, + "clones": 48047, + "clooney": 33161, + "clos": 48821, + "close": 10603, + "close": 2660, + "closed": 4552, + "closely": 13478, + "closer": 6377, + "closes": 11354, + "closest": 14975, + "closet": 14221, + "closeup": 35439, + "closing": 7101, + "closure": 13249, + "closures": 22923, + "cloth": 14559, + "clothes": 7080, + "clothing": 7425, + "clou": 4069, + "cloud": 12965, + "cloud": 3887, + "cloudcomputing": 41390, + "clouds": 6244, + "cloudy": 13106, + "clough": 42909, + "clover": 39574, + "clover": 22812, + "clow": 18386, + "clown": 15329, + "clowns": 30820, + "cls": 44251, + "clt": 29651, + "clt": 24236, + "clu": 996, + "club": 9642, + "club": 1736, + "clubbing": 48128, + "clubhouse": 26553, + "clubs": 9437, + "clue": 14994, + "clueless": 35350, + "clues": 23764, + "clusive": 41362, + "cluster": 15595, + "clusters": 33217, + "clut": 28507, + "clutch": 13953, + "clutter": 40804, + "cly": 12037, + "clyde": 39557, + "clyde": 18469, + "cm": 10190, + "cm": 3741, + "cma": 30554, + "cma": 31388, + "cmc": 45839, + "cmdr": 48250, + "cme": 34946, + "cmo": 24589, + "cmon": 42904, + "cmp": 46355, + "cms": 22520, + "cmt": 42727, + "cmu": 43046, + "cn": 3886, + "cn": 16200, + "cna": 48287, + "cnbc": 41242, + "cnbc": 24371, + "cnblue": 36018, + "cnc": 20571, + "cnet": 47487, + "cnews": 24319, + "cng": 41496, + "cnn": 22405, + "cnn": 8259, + "cns": 46095, + "cny": 31614, + "co": 622, + "co": 1320, + "coa": 29167, + "coach": 3275, + "coach": 2312, + "coached": 30228, + "coachella": 20222, + "coaches": 6924, + "coaching": 7766, + "coal": 10227, + "coal": 7919, + "coalition": 12920, + "coast": 6398, + "coast": 3720, + "coastal": 38246, + "coastal": 10852, + "coaster": 15944, + "coasters": 31548, + "coastguard": 40601, + "coastline": 27959, + "coasts": 42225, + "coat": 28869, + "coat": 7356, + "coated": 23401, + "coates": 36899, + "coating": 25369, + "coatings": 48706, + "coats": 18075, + "cob": 20140, + "cob": 32863, + "cobain": 36866, + "cobalt": 30896, + "cobb": 22719, + "cobble": 47894, + "cobra": 21574, + "coc": 23036, + "coc": 39498, + "coca": 21197, + "cocac": 26393, + "cocacola": 31248, + "cocaine": 20534, + "coch": 18599, + "cochran": 48798, + "cochrane": 41752, + "coco": 11850, + "coco": 13316, + "cocoa": 18074, + "cocon": 8597, + "coconut": 9581, + "cod": 16132, + "cod": 11915, + "code": 11582, + "code": 3217, + "coded": 33703, + "coden": 43914, + "coder": 41561, + "codes": 14566, + "codi": 39711, + "coding": 12647, + "cody": 23222, + "cody": 12666, + "coe": 15386, + "coed": 41028, + "coel": 45633, + "coer": 41198, + "coeur": 44986, + "coffe": 2255, + "coffee": 12898, + "coffee": 2453, + "coffees": 41184, + "coffey": 48066, + "cofficial": 18757, + "coffin": 29907, + "cog": 26362, + "cog": 35960, + "cogn": 12210, + "cognac": 44361, + "cognition": 46825, + "cognitive": 16584, + "cohe": 20669, + "cohen": 13381, + "coherent": 48450, + "cohort": 22782, + "coil": 25307, + "coim": 41528, + "coin": 14651, + "coin": 4170, + "coinci": 14015, + "coincidence": 19807, + "coins": 10530, + "coke": 39602, + "coke": 14035, + "col": 754, + "col": 9371, + "cola": 15444, + "colbert": 31647, + "colby": 32068, + "colchester": 31715, + "cold": 11146, + "cold": 3153, + "colder": 23859, + "coldest": 31438, + "coldplay": 27770, + "cole": 9305, + "cole": 8166, + "coleman": 15774, + "coles": 40265, + "coles": 30398, + "coli": 18877, + "coli": 15910, + "colin": 20989, + "colin": 10238, + "coliseum": 21836, + "coll": 25982, + "coll": 23898, + "colla": 2929, + "collab": 14013, + "collabor": 4437, + "collaborate": 21271, + "collaborated": 42265, + "collaborating": 25545, + "collaboration": 6642, + "collaborations": 36520, + "collaborative": 15841, + "collaborator": 48186, + "collaborators": 45901, + "collage": 11258, + "collagen": 36120, + "collap": 16881, + "collapse": 16520, + "collapsed": 25037, + "collapses": 43601, + "collar": 39662, + "collar": 13497, + "collateral": 44512, + "colle": 1801, + "colleague": 13067, + "colleagues": 8203, + "collec": 1733, + "collect": 10186, + "collected": 11980, + "collecti": 18530, + "collectible": 25680, + "collectibles": 21519, + "collecting": 10325, + "collection": 2548, + "collections": 12760, + "collective": 10162, + "collectively": 40687, + "collector": 13522, + "collectors": 20540, + "collects": 31576, + "colleen": 31020, + "college": 13512, + "college": 2229, + "colleges": 17357, + "collegi": 16311, + "collegiate": 18068, + "colli": 8262, + "collide": 27214, + "collie": 30611, + "collier": 35748, + "collin": 24056, + "collin": 32116, + "colling": 32319, + "collingwood": 45873, + "collins": 8684, + "collision": 15407, + "collo": 25115, + "colloqui": 37243, + "colloquium": 46514, + "collu": 25658, + "collusion": 33864, + "colo": 7300, + "colo": 27288, + "cologne": 22216, + "cology": 19187, + "colom": 8987, + "colombia": 12901, + "colombian": 28701, + "colombo": 33207, + "colon": 8280, + "colon": 29050, + "colonel": 22674, + "coloni": 22667, + "colonial": 16530, + "colonialism": 43385, + "colonies": 38738, + "colony": 18767, + "color": 4036, + "color": 3140, + "colorado": 34580, + "colorado": 6742, + "colorec": 41171, + "colored": 11775, + "colorful": 11444, + "colori": 28764, + "coloring": 17696, + "colorized": 46730, + "colors": 5389, + "colorstv": 28195, + "colorway": 44576, + "colossal": 40258, + "colosse": 48142, + "colossus": 34022, + "colour": 10240, + "colour": 4769, + "coloured": 17111, + "colourful": 15562, + "colouring": 31803, + "colours": 7626, + "cols": 35726, + "colt": 19726, + "colton": 32249, + "coltrane": 42333, + "colts": 16135, + "colum": 4164, + "columb": 31043, + "columbi": 25947, + "columbia": 9410, + "columbus": 11273, + "column": 10593, + "columnist": 28958, + "columns": 29056, + "com": 610, + "com": 2464, + "coma": 19620, + "comb": 3587, + "comb": 16380, + "combat": 35083, + "combat": 9275, + "combating": 46121, + "combe": 14363, + "combin": 25112, + "combination": 11312, + "combinations": 34950, + "combine": 12919, + "combined": 10427, + "combines": 22991, + "combining": 23561, + "combo": 10155, + "combos": 48117, + "combs": 30694, + "combu": 35629, + "combustion": 44654, + "comcast": 30043, + "come": 4225, + "come": 891, + "comeback": 8234, + "comedian": 13848, + "comedians": 33758, + "comedic": 43360, + "comedy": 19346, + "comedy": 4749, + "comer": 42997, + "comer": 20916, + "comers": 34436, + "comes": 2091, + "comet": 21405, + "comets": 40636, + "comey": 22957, + "comfor": 6563, + "comfort": 44000, + "comfort": 7808, + "comfortable": 8652, + "comfortably": 30392, + "comforting": 33835, + "comforts": 42243, + "comfy": 15736, + "comi": 40781, + "comic": 7729, + "comic": 4962, + "comicart": 46018, + "comicbook": 46564, + "comicbooks": 22018, + "comiccon": 18379, + "comicon": 43820, + "comics": 4256, + "comin": 18164, + "coming": 14916, + "coming": 1171, + "comingsoon": 19894, + "comm": 965, + "comm": 11413, + "comman": 39780, + "command": 18391, + "command": 11350, + "commander": 11265, + "commanders": 41667, + "commanding": 36933, + "commandments": 43409, + "commando": 31361, + "commands": 38163, + "comme": 29692, + "commemor": 9495, + "commemorate": 21242, + "commemorates": 45149, + "commemorating": 28734, + "commemoration": 29288, + "commemorative": 24623, + "commen": 15795, + "commence": 25059, + "commenced": 43908, + "commencement": 21666, + "commences": 48551, + "commend": 37555, + "commended": 40702, + "comment": 20035, + "comment": 5761, + "commentary": 14146, + "commentator": 32016, + "commented": 28328, + "commenting": 37292, + "comments": 6606, + "commer": 4028, + "commerce": 8333, + "commerci": 15601, + "commercial": 31802, + "commercial": 6287, + "commercials": 30724, + "commish": 45399, + "commissi": 6000, + "commission": 5292, + "commissioned": 16565, + "commissioner": 10221, + "commissioners": 30702, + "commissioning": 29585, + "commissions": 20668, + "commit": 3041, + "commit": 11797, + "commitment": 7770, + "commitments": 32136, + "commits": 20241, + "committed": 7907, + "committee": 5636, + "committees": 40504, + "committing": 21937, + "commod": 9496, + "commodities": 30350, + "commodity": 29041, + "commodore": 31129, + "common": 8414, + "common": 4176, + "commonly": 20344, + "commons": 16653, + "commonwealth": 16569, + "comms": 18832, + "commu": 9561, + "commun": 1515, + "communal": 32809, + "communi": 16164, + "communic": 4784, + "communicate": 19809, + "communication": 7999, + "communications": 10052, + "communion": 28579, + "communism": 35387, + "communist": 18602, + "communities": 6361, + "community": 14784, + "community": 1927, + "commute": 15898, + "commuter": 27782, + "commuters": 30823, + "commuting": 43503, + "como": 16236, + "comp": 2561, + "comp": 11679, + "compac": 40014, + "compact": 13690, + "compan": 1995, + "companies": 5361, + "companion": 14963, + "companions": 37124, + "company": 2634, + "compar": 7580, + "comparable": 27092, + "comparative": 33388, + "compare": 13771, + "compared": 10544, + "compares": 25104, + "comparing": 20564, + "comparison": 14186, + "comparisons": 40870, + "compart": 30072, + "compartment": 40383, + "compass": 19438, + "compassion": 14463, + "compassionate": 30193, + "compati": 17295, + "compatibility": 41614, + "compatible": 21286, + "compe": 5254, + "compelled": 49375, + "compelling": 21766, + "compen": 42079, + "compens": 15172, + "compensation": 18663, + "compet": 2932, + "compete": 10038, + "competed": 27767, + "competen": 31853, + "competence": 31165, + "competency": 49293, + "competent": 28113, + "competes": 39826, + "competing": 13068, + "competit": 15892, + "competiti": 32581, + "competition": 3742, + "competitions": 23259, + "competitive": 10687, + "competitiveness": 43209, + "competitor": 26633, + "competitors": 23638, + "compilation": 20446, + "compiled": 34579, + "compla": 7428, + "complain": 19292, + "complained": 42029, + "complaining": 20812, + "complains": 46363, + "complaint": 20391, + "complaints": 20020, + "comple": 1730, + "complement": 36624, + "complementary": 48953, + "complete": 3263, + "completed": 5976, + "completely": 5989, + "completes": 19321, + "completing": 14949, + "completion": 15915, + "complex": 16099, + "complex": 6324, + "complexes": 47870, + "complexion": 47732, + "complexity": 24815, + "compli": 5270, + "compliance": 14658, + "compliant": 29893, + "complic": 11460, + "complicated": 16621, + "complications": 29936, + "compliment": 25116, + "complimentary": 20948, + "compliments": 25477, + "comply": 36281, + "component": 21284, + "components": 16816, + "compos": 7783, + "compose": 43659, + "composed": 19916, + "composer": 12104, + "composers": 33314, + "composing": 40412, + "composite": 21606, + "composites": 45395, + "composition": 17510, + "compositions": 44652, + "compost": 46002, + "compost": 33307, + "compound": 19980, + "compounds": 33991, + "compre": 8483, + "compreh": 42976, + "comprehen": 12050, + "comprehend": 48230, + "comprehensive": 13854, + "compress": 33353, + "compressed": 42359, + "compression": 25638, + "compressor": 39607, + "compri": 29445, + "compromise": 26611, + "compromised": 38576, + "compromising": 45436, + "comps": 48665, + "compton": 28364, + "compu": 11639, + "compul": 25869, + "compulsory": 39345, + "computing": 12732, + "comra": 25553, + "comrade": 30844, + "comrades": 29282, + "coms": 30493, + "con": 616, + "con": 2457, + "cona": 30605, + "conan": 24750, + "conce": 9145, + "concealed": 35419, + "conceded": 37895, + "conceived": 39725, + "concentr": 11085, + "concentrate": 30846, + "concentrated": 36776, + "concentration": 18565, + "concep": 8389, + "concepcion": 47035, + "concept": 6353, + "conceptart": 31162, + "conception": 30510, + "conceptions": 40307, + "concepts": 16763, + "conceptu": 42745, + "conceptual": 34070, + "concer": 2228, + "concern": 12928, + "concerned": 12020, + "concerning": 21772, + "concerns": 11134, + "concert": 32180, + "concert": 3066, + "concerto": 24710, + "concerts": 14418, + "concession": 38117, + "concessions": 43981, + "concier": 28859, + "concierge": 39850, + "conclave": 38098, + "conclu": 9627, + "conclude": 37525, + "concluded": 27825, + "concludes": 30634, + "conclusion": 20932, + "conclusions": 39507, + "conco": 43034, + "concor": 19913, + "concord": 26448, + "concordia": 35492, + "concours": 36282, + "concourse": 37793, + "concre": 43658, + "concrete": 9637, + "concussion": 28321, + "condem": 13287, + "condemn": 27212, + "condemned": 35145, + "condemns": 32092, + "conden": 24816, + "conditi": 11170, + "condition": 36978, + "condition": 7336, + "conditional": 24671, + "conditioned": 37014, + "conditioner": 31239, + "conditioning": 18181, + "conditions": 5892, + "condo": 19952, + "condol": 18661, + "condolences": 20836, + "condom": 39021, + "condomin": 42589, + "condoms": 37878, + "condor": 47643, + "condos": 42342, + "condu": 40772, + "conduc": 5379, + "conduct": 11647, + "conducted": 13080, + "conducting": 16787, + "conductor": 22317, + "conducts": 32084, + "cone": 39279, + "cone": 10266, + "cones": 26718, + "coney": 41837, + "conf": 6477, + "confe": 1968, + "confeder": 17104, + "confederate": 24864, + "confederation": 43484, + "conferen": 37961, + "conference": 2230, + "conferences": 22811, + "conferencing": 47320, + "confess": 38860, + "confession": 22572, + "confessions": 29404, + "confetti": 37923, + "confi": 5005, + "confidence": 8510, + "confident": 12365, + "confidential": 28712, + "configu": 46746, + "configur": 26950, + "configuration": 33378, + "confin": 45316, + "confined": 40973, + "confir": 3930, + "confirm": 12130, + "confirmation": 19645, + "confirmed": 6346, + "confirming": 38433, + "confirms": 11803, + "confis": 36285, + "confit": 42241, + "confl": 8173, + "conflic": 19029, + "conflict": 10397, + "conflicting": 43894, + "conflicts": 28713, + "confor": 40933, + "confron": 20033, + "confront": 38382, + "confrontation": 41478, + "confu": 6890, + "confuse": 37503, + "confused": 10946, + "confusing": 24683, + "confusion": 20493, + "cong": 24407, + "conge": 20013, + "congestion": 24432, + "congo": 20334, + "congr": 1227, + "congrats": 1887, + "congratul": 1750, + "congratulate": 16633, + "congratulated": 42004, + "congratulates": 24580, + "congratulating": 30967, + "congratulation": 24751, + "congratulations": 1864, + "congre": 7947, + "congreg": 40727, + "congregation": 32618, + "congress": 12452, + "congress": 4599, + "congressional": 15239, + "congressman": 17145, + "congresswoman": 37317, + "coni": 39031, + "coni": 36651, + "conj": 41543, + "conju": 33821, + "conjunction": 34226, + "conley": 44536, + "conline": 37593, + "conn": 41836, + "conn": 20329, + "conne": 8437, + "connec": 29933, + "connect": 19969, + "connected": 27506, + "connecting": 41429, + "connection": 26840, + "connections": 37161, + "connie": 25739, + "connoisse": 46012, + "connol": 27739, + "connolly": 29537, + "connor": 21984, + "connor": 10218, + "conom": 2664, + "conomy": 22529, + "conor": 29955, + "conor": 19478, + "conqu": 13382, + "conquer": 38585, + "conquer": 19821, + "conquered": 27099, + "conquering": 43778, + "conquest": 35367, + "conrad": 22073, + "cons": 10311, + "consci": 9427, + "conscience": 27310, + "conscious": 14914, + "consciously": 46755, + "consciousness": 17894, + "conse": 34887, + "consecu": 12084, + "consecutive": 12413, + "consen": 23110, + "consensus": 25071, + "consent": 21922, + "consequ": 13003, + "consequence": 42262, + "consequences": 15682, + "conserv": 4649, + "conservancy": 46729, + "conservation": 37616, + "conservation": 8322, + "conservative": 11421, + "conservatives": 17631, + "conservatory": 32140, + "conserve": 34231, + "consi": 2899, + "consider": 12471, + "consider": 6734, + "considerable": 38256, + "considerably": 38510, + "consideration": 24310, + "considerations": 33700, + "considered": 9487, + "considering": 10761, + "considers": 24691, + "consist": 10410, + "consist": 33735, + "consisted": 49354, + "consistency": 25683, + "consistent": 16439, + "consistently": 23799, + "consisting": 39241, + "consists": 23458, + "consol": 27869, + "consolation": 38888, + "console": 13403, + "consoles": 33136, + "consoli": 21586, + "consolidation": 41111, + "consor": 27108, + "consortium": 29988, + "conspir": 12680, + "conspiracy": 15236, + "const": 3826, + "constable": 29179, + "constan": 38718, + "constance": 40682, + "constant": 32000, + "constant": 13111, + "constantine": 30640, + "constantly": 14336, + "constell": 21913, + "constellation": 25991, + "constitu": 6299, + "constituency": 22464, + "constituents": 32075, + "constitution": 12157, + "constitutional": 16091, + "constra": 28973, + "constraints": 41910, + "constru": 3983, + "construc": 13321, + "construct": 24467, + "constructed": 16876, + "constructing": 33653, + "construction": 48873, + "construction": 4585, + "constructive": 31810, + "consu": 4689, + "consul": 5295, + "consul": 33630, + "consulate": 34341, + "consult": 9438, + "consult": 26727, + "consultancy": 31735, + "consultant": 14196, + "consultants": 27203, + "consultation": 15777, + "consultations": 43424, + "consulting": 15883, + "consume": 28919, + "consumed": 29653, + "consumer": 34408, + "consumer": 10422, + "consumers": 14014, + "consuming": 30607, + "consumption": 14904, + "cont": 2036, + "cont": 21425, + "contact": 39367, + "contact": 3523, + "contacted": 37331, + "contacts": 22789, + "contag": 29259, + "contagious": 33984, + "contain": 9948, + "contain": 15187, + "contained": 23836, + "container": 14913, + "containers": 20448, + "containing": 20281, + "contains": 12844, + "contamin": 24662, + "contaminated": 35773, + "contamination": 31770, + "conte": 15402, + "conte": 26882, + "contempl": 21924, + "contemplating": 33854, + "contempor": 14538, + "contemporary": 16607, + "contemporary": 8859, + "contemporaryart": 20212, + "contempt": 39293, + "conten": 42201, + "contender": 23573, + "contenders": 29711, + "content": 15526, + "content": 4750, + "contentmarketing": 20429, + "contents": 14850, + "contest": 23103, + "contest": 4576, + "contestalert": 27313, + "contestant": 25682, + "contestants": 28062, + "contested": 37845, + "contests": 32210, + "contex": 42015, + "context": 13089, + "conti": 46431, + "conti": 40842, + "contin": 1918, + "continent": 19623, + "continental": 14089, + "continents": 38642, + "conting": 27104, + "contingent": 36467, + "continu": 4688, + "continually": 34086, + "continuation": 38964, + "continue": 3942, + "continued": 10150, + "continues": 4305, + "continuing": 11009, + "continuity": 34035, + "continuous": 17033, + "continuously": 29634, + "continuum": 44978, + "contour": 34733, + "contr": 22871, + "contra": 9880, + "contra": 38620, + "contrac": 7581, + "contracep": 35109, + "contract": 6120, + "contracting": 39091, + "contractor": 21429, + "contractors": 22427, + "contracts": 16563, + "contradic": 27957, + "contrary": 32805, + "contrast": 18501, + "contrasting": 40758, + "contribu": 4753, + "contribute": 14112, + "contributed": 19397, + "contributes": 34203, + "contributing": 21762, + "contribution": 11116, + "contributions": 14465, + "contributor": 24553, + "contributors": 32908, + "contro": 2372, + "control": 9963, + "control": 3366, + "controlled": 14140, + "controller": 12929, + "controllers": 30374, + "controlling": 26427, + "controls": 15746, + "controversi": 13674, + "controversial": 14617, + "controversy": 18659, + "conv": 48382, + "conve": 18421, + "conven": 7283, + "conveni": 33278, + "convenience": 17859, + "convenient": 18978, + "conveniently": 40844, + "convention": 6752, + "conventional": 20835, + "conventions": 41404, + "conver": 6336, + "convergence": 35381, + "convers": 4577, + "conversation": 5690, + "conversations": 12326, + "converse": 24149, + "conversion": 15111, + "conversions": 44137, + "convert": 20074, + "converted": 20808, + "converter": 34611, + "convertible": 19608, + "converting": 34674, + "converts": 42470, + "convey": 38342, + "convic": 11150, + "convicted": 18668, + "conviction": 24967, + "convictions": 44366, + "convin": 12889, + "convince": 20351, + "convinced": 17388, + "convincing": 27742, + "convo": 19372, + "convocation": 30674, + "convos": 44842, + "convoy": 30292, + "conway": 21410, + "conwy": 48971, + "cony": 14501, + "coo": 1664, + "coo": 21691, + "coogs": 47624, + "cook": 9726, + "cook": 5977, + "cookbook": 21086, + "cooke": 29979, + "cooked": 11452, + "cooker": 23806, + "cookery": 38779, + "cookie": 9367, + "cookies": 8320, + "cookin": 46610, + "cooking": 39248, + "cooking": 6283, + "cookout": 39743, + "cooks": 24256, + "cool": 5594, + "cool": 2077, + "cooled": 37170, + "cooler": 11078, + "coolest": 10566, + "cooling": 15291, + "coom": 41726, + "coon": 34260, + "coon": 16958, + "coop": 39917, + "coop": 18910, + "cooper": 7264, + "cooper": 8133, + "cooperate": 42936, + "cooperation": 11785, + "cooperative": 24517, + "coops": 48531, + "coordin": 8187, + "coordinate": 38250, + "coordinated": 32540, + "coordinating": 40075, + "coordination": 25611, + "coordinator": 13967, + "coors": 36025, + "cop": 3196, + "cop": 7070, + "copa": 22749, + "copd": 45876, + "cope": 47635, + "cope": 12564, + "copeland": 37604, + "copen": 15637, + "copenhagen": 17390, + "coper": 41891, + "copernic": 45519, + "copied": 36770, + "copies": 9851, + "coping": 30545, + "copolitics": 45846, + "copp": 20937, + "copped": 42229, + "copper": 24741, + "copper": 10333, + "coppola": 47427, + "cops": 10719, + "copter": 28049, + "copy": 11376, + "copy": 4509, + "copying": 38925, + "copyright": 15778, + "cor": 851, + "cor": 18559, + "cora": 34953, + "coral": 31220, + "coral": 12054, + "corbett": 35699, + "corbin": 35578, + "corbyn": 14026, + "cord": 40893, + "cord": 11181, + "corden": 41999, + "cordi": 41681, + "cordless": 44412, + "cords": 22164, + "core": 19622, + "core": 5000, + "cores": 37874, + "corey": 31279, + "corey": 15288, + "corgi": 31320, + "cori": 26508, + "coriander": 37491, + "corin": 17716, + "corinthians": 34471, + "cork": 18148, + "cork": 10376, + "corn": 5202, + "corn": 5894, + "cornelius": 45865, + "cornell": 38689, + "cornell": 20859, + "corner": 18509, + "corner": 5253, + "corners": 19584, + "cornerstone": 36280, + "cornish": 23774, + "cornwall": 37903, + "cornwall": 10777, + "coron": 13210, + "corona": 25564, + "coronado": 43946, + "coronary": 45955, + "coronation": 25014, + "coroner": 47241, + "corp": 29203, + "corp": 10918, + "corpor": 4258, + "corporal": 42445, + "corporate": 33877, + "corporate": 6838, + "corporation": 11282, + "corporations": 25482, + "corps": 11330, + "corpse": 29408, + "corpus": 31672, + "correc": 5011, + "correct": 8340, + "corrected": 35628, + "correction": 20843, + "correctional": 38030, + "corrections": 37507, + "correctly": 15359, + "correlation": 29218, + "correspon": 20203, + "correspondent": 29996, + "corri": 12974, + "corridor": 20592, + "corrie": 23961, + "corro": 24936, + "corro": 42033, + "corrosion": 39191, + "corru": 6501, + "corrup": 30429, + "corrupt": 15194, + "corruption": 9141, + "corsa": 47670, + "corsair": 42367, + "corset": 40408, + "cortex": 40109, + "cortez": 30461, + "corvette": 24367, + "cory": 23221, + "cory": 18329, + "cos": 5865, + "cos": 5700, + "cosby": 30324, + "cosc": 45944, + "coscino": 47909, + "cose": 26495, + "cosm": 37486, + "cosme": 9628, + "cosmetic": 23918, + "cosmetics": 12896, + "cosmic": 47398, + "cosmic": 18304, + "cosmo": 12829, + "cosmo": 32072, + "cosmopolitan": 35518, + "cosmos": 22151, + "cospla": 15149, + "cosplay": 42401, + "cosplay": 6435, + "cosplayer": 30215, + "cosplaying": 46701, + "cost": 11360, + "cost": 4713, + "costa": 10480, + "costar": 28659, + "costarica": 31272, + "costco": 31045, + "costello": 30667, + "costing": 39193, + "costly": 30170, + "costs": 7628, + "costu": 5786, + "costume": 7235, + "costumes": 15150, + "cosy": 22848, + "cot": 4718, + "cot": 5871, + "cote": 44234, + "cote": 20751, + "cotland": 32576, + "cotsw": 23303, + "cotswolds": 35546, + "cott": 8211, + "cott": 11349, + "cottage": 12155, + "cottages": 34405, + "cotton": 22218, + "cotton": 7050, + "cou": 1368, + "couch": 12724, + "cougar": 35028, + "cougar": 27042, + "cougars": 20425, + "cough": 35631, + "cough": 18498, + "cougs": 28482, + "coul": 22483, + "could": 44812, + "could": 1510, + "couldn": 4072, + "couldnt": 29042, + "coulter": 42291, + "coun": 939, + "counc": 12927, + "council": 18187, + "council": 3620, + "councill": 15732, + "councillor": 21179, + "councillors": 29695, + "councilman": 40833, + "councils": 29938, + "counsel": 13780, + "counsel": 19814, + "counseling": 25000, + "counsell": 47510, + "counselling": 40581, + "counselor": 26148, + "counselors": 38688, + "count": 6073, + "count": 5887, + "countdown": 39559, + "countdown": 7500, + "counted": 23149, + "counter": 10134, + "counter": 7352, + "counterfe": 33067, + "counterfeit": 44242, + "counterpart": 39216, + "counterparts": 42106, + "counters": 46170, + "countess": 46276, + "counties": 12338, + "counting": 9723, + "countless": 21819, + "countries": 5489, + "country": 7896, + "country": 2157, + "countryfile": 47023, + "countrymusic": 30372, + "countryside": 16303, + "counts": 12264, + "county": 18734, + "county": 2116, + "coup": 9871, + "coup": 16479, + "coupe": 16773, + "couple": 40136, + "couple": 3377, + "coupled": 37153, + "couples": 14752, + "coupling": 45595, + "coupon": 14019, + "coupons": 23945, + "cour": 1391, + "coura": 4436, + "courage": 9828, + "courageous": 25005, + "courier": 27217, + "cours": 21493, + "course": 43225, + "course": 2613, + "courses": 9464, + "court": 16837, + "court": 2908, + "courte": 5088, + "courtesy": 5228, + "courthouse": 22205, + "courtney": 33601, + "courtney": 15990, + "courtroom": 41071, + "courts": 13514, + "courty": 20121, + "courtyard": 21900, + "cous": 48397, + "cousin": 7780, + "cousins": 14073, + "cout": 29118, + "coutinho": 35530, + "couture": 14808, + "cov": 19384, + "cov": 48385, + "cove": 21700, + "cove": 14708, + "coven": 12483, + "covenant": 29647, + "coventry": 18007, + "cover": 13534, + "cover": 2202, + "coverage": 6810, + "covered": 5603, + "covering": 9462, + "covers": 7745, + "covert": 40134, + "coveted": 36119, + "covington": 43196, + "cow": 5076, + "cow": 9706, + "cowan": 42699, + "coward": 33729, + "cowards": 48972, + "cowboy": 25833, + "cowboy": 13657, + "cowboys": 11864, + "cowboysnation": 43082, + "cowell": 39015, + "cowgirl": 47090, + "coworker": 30727, + "coworkers": 30821, + "coworking": 36034, + "cows": 15204, + "cowx": 23831, + "cox": 25784, + "cox": 11597, + "coy": 12765, + "coy": 15742, + "coyi": 48407, + "coyle": 45348, + "coyne": 44729, + "coyo": 16614, + "coyote": 26586, + "coyotes": 30423, + "coys": 19736, + "coz": 39922, + "coz": 14282, + "cozy": 14873, + "cp": 7905, + "cp": 9130, + "cpa": 30095, + "cpac": 45731, + "cpc": 26125, + "cpd": 23402, + "cpec": 48007, + "cpfc": 27553, + "cpi": 41795, + "cpl": 26852, + "cpr": 25134, + "cps": 27078, + "cpt": 32892, + "cpu": 27700, + "cq": 48910, + "cq": 48417, + "cr": 1075, + "cr": 3483, + "cra": 1184, + "cra": 18362, + "crab": 27382, + "crab": 11574, + "crabs": 30908, + "crack": 11222, + "crack": 10334, + "crackdown": 29527, + "cracked": 19826, + "cracker": 16298, + "crackers": 26200, + "cracking": 13008, + "cracks": 21426, + "cracy": 24749, + "cradle": 29384, + "crae": 40438, + "craf": 10873, + "craft": 7717, + "craft": 3588, + "craftbeer": 12371, + "crafted": 12424, + "crafthour": 42324, + "crafting": 26886, + "crafts": 33276, + "crafts": 13383, + "craftsman": 39528, + "craftsmanship": 36682, + "crafty": 32317, + "craic": 46962, + "craig": 14042, + "craig": 8061, + "craigslist": 43865, + "cram": 29809, + "cramer": 44592, + "cramps": 46106, + "cran": 7761, + "cranberries": 49361, + "cranberry": 23824, + "crane": 14626, + "cranes": 26979, + "crani": 45674, + "crank": 46246, + "crank": 32283, + "cranston": 44340, + "crap": 11899, + "crappy": 30475, + "crash": 37150, + "crash": 5033, + "crashed": 16638, + "crashes": 17013, + "crashing": 24991, + "crat": 46696, + "crate": 24756, + "crater": 22663, + "crates": 30172, + "cratic": 32175, + "crative": 39999, + "crats": 43056, + "crave": 33397, + "craven": 33625, + "craving": 18344, + "cravings": 34476, + "craw": 7400, + "crawfish": 42772, + "crawford": 15918, + "crawl": 20106, + "crawler": 41012, + "crawley": 42316, + "crawling": 37066, + "cray": 24184, + "cray": 27032, + "crayon": 41801, + "crayons": 43508, + "craz": 25776, + "craze": 30637, + "craziest": 32690, + "craziness": 46436, + "crazy": 17540, + "crazy": 3578, + "crc": 25618, + "cre": 798, + "cre": 17762, + "cream": 23184, + "cream": 3867, + "creams": 41447, + "creamy": 17206, + "crease": 48441, + "create": 30949, + "create": 3380, + "created": 4080, + "creates": 10361, + "creati": 6714, + "creating": 5524, + "creation": 38293, + "creation": 6900, + "creations": 17411, + "creative": 15237, + "creative": 4450, + "creatives": 29352, + "creativity": 9636, + "creator": 10173, + "creators": 17981, + "creature": 14317, + "creatures": 13938, + "cred": 7314, + "cred": 22377, + "credenti": 29487, + "credentials": 33422, + "credi": 21097, + "credibility": 34984, + "credible": 32983, + "credit": 21467, + "credit": 3900, + "credited": 32480, + "credits": 10654, + "creds": 43462, + "cree": 33961, + "cree": 36014, + "creed": 18845, + "creek": 26120, + "creek": 5526, + "creep": 8153, + "creep": 26084, + "creeper": 38662, + "creeping": 29697, + "creeps": 45135, + "creepy": 11943, + "creighton": 42823, + "creme": 22681, + "creole": 45632, + "crepe": 38611, + "crescent": 18211, + "cress": 39124, + "crest": 35985, + "crest": 15760, + "crested": 36656, + "crete": 8584, + "crew": 21560, + "crew": 3462, + "crewe": 43284, + "crews": 10463, + "cri": 1621, + "cri": 38962, + "crib": 23271, + "cric": 4328, + "cricke": 19098, + "cricket": 21859, + "cricket": 5373, + "cricketer": 28439, + "cricketers": 43986, + "cried": 15290, + "cries": 19769, + "crime": 13872, + "crime": 4896, + "crimea": 28614, + "crimes": 11827, + "crimin": 5874, + "criminal": 30197, + "criminal": 8255, + "criminals": 18783, + "crimson": 19437, + "cringe": 42588, + "cripp": 33588, + "cris": 37818, + "crises": 36403, + "crisis": 5712, + "crisp": 15145, + "crispr": 39784, + "crisps": 35744, + "crispy": 16458, + "criss": 29708, + "cristi": 12699, + "cristian": 48808, + "cristiano": 14807, + "cristina": 33395, + "cristo": 38315, + "crit": 3613, + "crit": 48130, + "criteri": 33627, + "criteria": 24849, + "criterion": 43841, + "criti": 25333, + "critic": 12417, + "critic": 19361, + "critical": 15314, + "critical": 6808, + "critically": 21570, + "criticalrole": 33606, + "criticalrole": 22742, + "criticalrolefanart": 43663, + "critici": 20333, + "criticism": 17405, + "criticize": 46081, + "criticized": 41557, + "critics": 16946, + "critique": 32982, + "critters": 35423, + "crm": 22610, + "cro": 1192, + "cro": 22522, + "croati": 28072, + "croatia": 13323, + "croatian": 34795, + "croc": 43350, + "croche": 35352, + "crochet": 17554, + "crock": 41685, + "crocker": 47843, + "crockett": 48313, + "crocod": 24519, + "crocodile": 24757, + "crocs": 38988, + "croft": 16657, + "croissant": 46011, + "croix": 44735, + "crom": 25082, + "crombie": 46162, + "cromwell": 45345, + "cron": 17361, + "croo": 16443, + "crook": 43744, + "crooked": 48473, + "crooked": 25644, + "crooks": 44226, + "crop": 40751, + "crop": 9955, + "cropped": 31139, + "crops": 16290, + "crore": 18274, + "crores": 37281, + "cros": 16670, + "crosby": 21095, + "cross": 5266, + "cross": 3417, + "crossed": 11731, + "crosses": 20473, + "crossfit": 47214, + "crossfit": 20395, + "crossing": 8673, + "crossings": 43517, + "crossover": 17194, + "crossroads": 27427, + "crossword": 32945, + "crou": 31206, + "crouch": 36506, + "crow": 3138, + "crow": 16019, + "crowd": 12036, + "crowd": 4570, + "crowded": 20182, + "crowdfunding": 17971, + "crowds": 16092, + "crowe": 33560, + "crowley": 32287, + "crown": 22190, + "crown": 6902, + "crowned": 16109, + "crowns": 33229, + "crows": 27134, + "croy": 21676, + "croydon": 27116, + "crs": 28449, + "crt": 43877, + "cru": 1815, + "cru": 29788, + "cruci": 18499, + "crucial": 12396, + "crude": 20677, + "cruel": 16073, + "cruel": 17573, + "cruelty": 20675, + "cruis": 27721, + "cruise": 36425, + "cruise": 6764, + "cruiser": 21394, + "cruises": 19214, + "cruising": 19743, + "crum": 43268, + "crumb": 48327, + "crumb": 39909, + "crumble": 36595, + "crumbs": 35893, + "crun": 17407, + "crunch": 16620, + "crunchy": 31366, + "crusad": 19133, + "crusade": 36846, + "crusader": 40171, + "crusaders": 31319, + "crush": 22296, + "crush": 7610, + "crushed": 18270, + "crusher": 44923, + "crushes": 35844, + "crushing": 20790, + "crust": 23136, + "crusted": 37314, + "cruz": 33689, + "cruz": 8403, + "cry": 2837, + "cry": 6290, + "crying": 6828, + "cryo": 32215, + "cryp": 4865, + "crypt": 37814, + "cryptic": 46925, + "crypto": 8080, + "crypto": 9608, + "cryptocurrencies": 33329, + "cryptocurrency": 12070, + "cryst": 15891, + "crystal": 17387, + "crystal": 6517, + "crystalli": 47551, + "crystals": 18350, + "cs": 11978, + "cs": 2804, + "csa": 26355, + "csc": 41727, + "csc": 37266, + "csd": 36913, + "cse": 41659, + "csg": 47085, + "csgo": 28928, + "csi": 41750, + "csi": 28070, + "csk": 43036, + "csm": 40061, + "csn": 46329, + "cso": 43864, + "csp": 39243, + "csr": 32105, + "csr": 24598, + "csrracing": 44193, + "css": 41418, + "css": 19846, + "cst": 17016, + "csu": 35948, + "csu": 31261, + "csw": 41031, + "ct": 3381, + "ct": 1122, + "cta": 28397, + "ctar": 27842, + "ctc": 34123, + "cte": 31410, + "cted": 2910, + "ctf": 35250, + "cthulhu": 41064, + "cting": 7985, + "ction": 17578, + "ction": 1569, + "ctions": 7021, + "ctive": 9313, + "cto": 17445, + "ctor": 8108, + "ctr": 35602, + "ctr": 18481, + "cts": 6936, + "ctto": 25118, + "ctu": 20834, + "cture": 17668, + "ctv": 21213, + "ctv": 27590, + "cu": 729, + "cu": 11224, + "cuando": 40388, + "cub": 16938, + "cub": 19972, + "cuba": 11576, + "cuban": 15536, + "cube": 47753, + "cube": 11353, + "cubes": 31413, + "cubic": 48159, + "cubic": 29614, + "cubs": 9858, + "cuck": 26364, + "cuckoo": 38062, + "cucu": 16705, + "cucumber": 19787, + "cucumbers": 48065, + "cud": 42684, + "cudd": 12820, + "cuddle": 19568, + "cuddles": 24001, + "cuddling": 29696, + "cuddly": 36208, + "cudi": 48713, + "cue": 13424, + "cuer": 39506, + "cues": 35719, + "cuff": 34693, + "cuff": 22414, + "cufflinks": 43938, + "cuffs": 37221, + "cuis": 9938, + "cuisine": 10605, + "cuk": 34838, + "cul": 1877, + "cula": 35935, + "cular": 10940, + "culars": 45719, + "cule": 31066, + "cules": 18984, + "culin": 14772, + "culinary": 16466, + "cull": 21880, + "cull": 42061, + "cullen": 25973, + "culmin": 33778, + "culo": 36305, + "culprit": 41593, + "cult": 11965, + "cultiv": 16781, + "cultivate": 42983, + "cultivated": 48901, + "cultivation": 41539, + "cultur": 20780, + "cultural": 34908, + "cultural": 6753, + "culturally": 36783, + "culture": 20197, + "culture": 3673, + "cultured": 40176, + "cultures": 19552, + "culver": 42103, + "cum": 20142, + "cum": 27119, + "cumb": 10858, + "cumber": 15309, + "cumberbatch": 27541, + "cumberland": 28747, + "cumbri": 32010, + "cumbria": 17953, + "cumin": 42285, + "cumple": 47050, + "cumul": 42961, + "cumulative": 47610, + "cumulus": 46313, + "cun": 12423, + "cun": 29532, + "cunningham": 25321, + "cuomo": 25681, + "cup": 5059, + "cup": 1937, + "cupboard": 32074, + "cupcake": 17025, + "cupcakes": 12747, + "cupid": 34885, + "cuppa": 28077, + "cups": 11463, + "cur": 1092, + "cur": 33073, + "curated": 20341, + "curator": 20753, + "curb": 21931, + "curd": 38881, + "cure": 36758, + "cure": 9088, + "cured": 26248, + "cures": 38204, + "curfew": 48826, + "curi": 12640, + "curing": 44169, + "curiosity": 21583, + "curious": 9865, + "curl": 24306, + "curled": 43734, + "curling": 18543, + "curls": 24340, + "curly": 20795, + "curran": 40999, + "currant": 43501, + "curren": 6142, + "currencies": 23530, + "currency": 7853, + "current": 3653, + "currently": 3792, + "currents": 35450, + "curric": 16201, + "curriculum": 17947, + "currie": 39385, + "curry": 49285, + "curry": 8051, + "curse": 18479, + "cursed": 26408, + "cursor": 46546, + "curt": 38137, + "curtain": 17223, + "curtains": 30223, + "curti": 39925, + "curtis": 13808, + "curve": 15792, + "curved": 25789, + "curves": 22814, + "curvy": 45788, + "cus": 2736, + "cusa": 47414, + "cuse": 37950, + "cush": 43731, + "cushi": 15333, + "cushion": 20853, + "cushions": 34163, + "cussion": 16658, + "cussions": 46853, + "cust": 20900, + "custard": 26516, + "custo": 4376, + "custody": 16176, + "custom": 2662, + "custom": 4996, + "custome": 41323, + "customer": 24035, + "customer": 5102, + "customerexperience": 45167, + "customers": 5528, + "customerservice": 40611, + "customiz": 41793, + "customizable": 48253, + "customization": 48244, + "customize": 32179, + "customized": 23229, + "customs": 16880, + "cut": 10511, + "cut": 3032, + "cute": 16031, + "cute": 2242, + "cuteness": 19342, + "cuter": 27151, + "cutest": 8032, + "cuth": 44328, + "cutie": 10733, + "cuties": 40939, + "cuties": 23420, + "cutiesaturday": 41883, + "cutler": 40428, + "cutlery": 49073, + "cutout": 45016, + "cuts": 7435, + "cutt": 27338, + "cutt": 47647, + "cutter": 19719, + "cutters": 44783, + "cutting": 7266, + "cuz": 9215, + "cv": 13531, + "cv": 13947, + "cvs": 29603, + "cw": 10652, + "cw": 11065, + "cwc": 19179, + "cwgc": 48527, + "cws": 45186, + "cx": 44457, + "cx": 14283, + "cy": 1470, + "cy": 1678, + "cyber": 5830, + "cyber": 10210, + "cybercrime": 41772, + "cybermonday": 36578, + "cyberpunk": 36896, + "cybersecurity": 10581, + "cyborg": 36650, + "cycl": 9791, + "cycle": 19083, + "cycle": 5072, + "cycled": 31055, + "cycles": 14605, + "cycli": 12201, + "cycling": 26353, + "cycling": 6321, + "cyclist": 20686, + "cyclists": 20303, + "cyclo": 18122, + "cyclone": 48094, + "cyclone": 20917, + "cyclones": 34669, + "cylin": 18569, + "cylinder": 22092, + "cylinders": 48888, + "cymb": 36677, + "cymru": 24005, + "cyn": 14324, + "cynthi": 41994, + "cynthia": 23748, + "cyp": 14809, + "cypress": 25347, + "cypri": 36481, + "cyprus": 15263, + "cyril": 36028, + "cyrus": 14204, + "cystic": 46131, + "cyto": 31864, + "cz": 22898, + "cz": 22921, + "cze": 12152, + "czech": 43151, + "czech": 16141, + "cé": 36454, + "cé": 18317, + "d": 67, + "d": 323, + "da": 925, + "da": 1140, + "daa": 32642, + "daan": 44814, + "dab": 10413, + "dab": 22900, + "dac": 16222, + "dac": 27478, + "daca": 28477, + "dach": 34166, + "dachsh": 41641, + "dachshund": 42720, + "dad": 4346, + "dad": 2639, + "dada": 31325, + "daddy": 29466, + "daddy": 6546, + "dade": 23299, + "dades": 28289, + "dads": 12741, + "dae": 23358, + "dae": 15422, + "daener": 46934, + "daes": 47282, + "daesh": 35047, + "daf": 9972, + "daf": 36704, + "daffodils": 44769, + "daft": 36347, + "dag": 11434, + "dag": 25650, + "dagger": 34251, + "dah": 16976, + "dah": 11776, + "dahl": 45816, + "dahl": 22621, + "dahlia": 41768, + "dai": 13559, + "dai": 10632, + "dail": 14676, + "dailies": 21260, + "daily": 6689, + "daily": 2873, + "dailynews": 43466, + "dailys": 43160, + "dailysketch": 46738, + "daim": 40421, + "dain": 32222, + "dain": 28315, + "daipur": 47631, + "dair": 19998, + "dair": 42078, + "dairy": 25243, + "dairy": 10302, + "dairyfree": 49366, + "dais": 10502, + "daisi": 39947, + "daisies": 40654, + "daisy": 39310, + "daisy": 12865, + "dak": 6999, + "dak": 16095, + "dakar": 31137, + "dakota": 38522, + "dakota": 12358, + "dal": 2476, + "dal": 5601, + "dala": 42675, + "dalai": 41222, + "dalail": 35169, + "dalailama": 35849, + "dale": 11533, + "dale": 4677, + "dalejr": 38207, + "dales": 29031, + "daley": 28544, + "dalgo": 43614, + "dali": 36735, + "dali": 25703, + "dalit": 45432, + "dall": 43631, + "dalla": 16772, + "dallas": 27414, + "dallas": 5759, + "dallascowboys": 33016, + "dalmati": 44275, + "dalton": 21488, + "daly": 24873, + "dam": 1880, + "dam": 4926, + "damage": 6822, + "damaged": 13568, + "damages": 28842, + "damaging": 20610, + "damas": 23345, + "damascus": 25396, + "dame": 10069, + "dames": 44548, + "dami": 17783, + "damian": 43307, + "damian": 25375, + "damien": 25090, + "dammit": 31057, + "damn": 37409, + "damn": 4451, + "damned": 28428, + "damon": 48503, + "damon": 18244, + "damp": 26520, + "dams": 37680, + "dan": 2257, + "dan": 2284, + "dana": 44834, + "dana": 13777, + "danao": 38598, + "danc": 3945, + "dance": 10619, + "dance": 2724, + "danced": 32891, + "dancehall": 33300, + "dancer": 11400, + "dancers": 13153, + "dances": 24083, + "dancing": 33280, + "dancing": 6226, + "dand": 12593, + "dandelion": 38903, + "dandy": 31932, + "dane": 19330, + "danes": 47477, + "dang": 4283, + "dang": 14992, + "danger": 20083, + "danger": 11212, + "dangerous": 7350, + "dangerously": 35012, + "dangers": 23726, + "dangle": 39907, + "dani": 3001, + "dani": 17009, + "daniel": 7859, + "daniel": 4981, + "daniela": 44466, + "danielle": 30396, + "danielle": 15292, + "danielpadilla": 34702, + "daniels": 16146, + "danish": 15467, + "dank": 31849, + "dann": 11951, + "danny": 14950, + "danny": 7621, + "dano": 29703, + "dans": 16241, + "dant": 48097, + "dant": 28237, + "dante": 21911, + "danube": 44594, + "dany": 47816, + "dao": 36099, + "dap": 12149, + "dap": 38034, + "daph": 24591, + "daphne": 31687, + "dapl": 34478, + "dapp": 46857, + "dapper": 26071, + "daq": 25381, + "dar": 1377, + "dar": 6242, + "dara": 17064, + "darby": 34366, + "darcy": 32916, + "dare": 14833, + "dare": 9863, + "daredevil": 28849, + "dares": 42973, + "dareto": 46794, + "dari": 16292, + "dari": 14552, + "daria": 45622, + "daries": 18184, + "daring": 28166, + "dario": 33918, + "darius": 32606, + "darje": 49089, + "dark": 5724, + "dark": 3144, + "darker": 18737, + "darkest": 25898, + "darkness": 10521, + "darling": 13048, + "darlings": 39961, + "darlington": 34565, + "darn": 26059, + "darrell": 33522, + "darren": 20263, + "darren": 12275, + "darry": 29200, + "darryl": 35359, + "darshan": 34564, + "dart": 14001, + "dart": 19841, + "darth": 41304, + "darth": 23164, + "dartmoor": 31477, + "dartmouth": 29667, + "darts": 15246, + "darwin": 43013, + "darwin": 20926, + "daryl": 45607, + "daryl": 24532, + "das": 9940, + "das": 7359, + "dash": 13858, + "dash": 10206, + "dashboard": 27679, + "dashi": 12876, + "dashing": 33825, + "dat": 1717, + "dat": 9445, + "data": 14876, + "data": 2281, + "datab": 11941, + "database": 14678, + "databases": 48384, + "datac": 27329, + "datacenter": 40133, + "datasci": 14496, + "datascience": 15748, + "dataviz": 28138, + "date": 34300, + "date": 1524, + "dated": 13564, + "dates": 7228, + "dating": 8534, + "dation": 15311, + "datlantic": 34270, + "dato": 36075, + "dats": 48674, + "dau": 3162, + "dau": 33828, + "daugh": 42523, + "daughter": 3944, + "daughters": 13585, + "daun": 29470, + "dav": 3700, + "dav": 46488, + "davao": 31502, + "dave": 10089, + "dave": 5077, + "daven": 28350, + "davenport": 34624, + "davey": 33391, + "davi": 1732, + "david": 4640, + "david": 2259, + "davidbowie": 44448, + "davido": 35989, + "davids": 46695, + "davidson": 13166, + "davies": 13120, + "davin": 43187, + "davis": 24426, + "davis": 5536, + "davison": 43725, + "davos": 31887, + "davy": 41565, + "daw": 5971, + "daw": 24404, + "dawg": 18660, + "dawgs": 26431, + "dawn": 30590, + "dawn": 7689, + "dawson": 18611, + "dax": 29458, + "day": 1405, + "day": 575, + "daya": 38165, + "daybreak": 33862, + "daycare": 36363, + "daydream": 41587, + "dayin": 20332, + "daylight": 20809, + "dayo": 29856, + "dayo": 46605, + "dayof": 16272, + "dayofthe": 38043, + "days": 1161, + "daysof": 12379, + "daysofcode": 36537, + "daysto": 29886, + "daystogo": 42198, + "dayswild": 42052, + "daytime": 22830, + "dayton": 35729, + "dayton": 20262, + "daytona": 16335, + "dayweekend": 44526, + "dayz": 35949, + "daz": 15449, + "daz": 43844, + "daze": 33591, + "dazz": 17149, + "dazzle": 41164, + "dazzling": 28821, + "db": 19100, + "db": 8128, + "dbacks": 31175, + "dbs": 40558, + "dbz": 49226, + "dc": 5074, + "dc": 2743, + "dca": 49107, + "dcc": 33747, + "dccomics": 17610, + "dcfc": 35526, + "dci": 35336, + "dcs": 42878, + "dcu": 42647, + "dd": 1353, + "dd": 3766, + "dda": 35202, + "ddad": 39049, + "dday": 32689, + "dday": 26243, + "ddc": 48513, + "ddd": 24183, + "dddd": 35362, + "dden": 5013, + "dder": 9300, + "dders": 24827, + "ddi": 44450, + "ddin": 17175, + "dding": 48101, + "dding": 8974, + "ddings": 49106, + "ddington": 29238, + "ddle": 17633, + "ddle": 8357, + "ddled": 38392, + "ddles": 33901, + "ddleston": 25647, + "ddling": 30981, + "ddlovato": 28244, + "ddos": 46463, + "ddr": 26027, + "dds": 48334, + "ddu": 43836, + "ddy": 14981, + "ddy": 7876, + "de": 561, + "de": 654, + "dea": 18477, + "deacon": 29155, + "dead": 3906, + "dead": 2747, + "deadliest": 40811, + "deadline": 47209, + "deadline": 8458, + "deadlines": 44959, + "deadly": 10756, + "deadpool": 21471, + "deaf": 28229, + "deaf": 18358, + "deal": 7249, + "deal": 2696, + "dealer": 15218, + "dealers": 21697, + "dealership": 32096, + "dealing": 13138, + "deals": 4469, + "dealt": 30101, + "dean": 13807, + "dean": 5828, + "deandre": 43635, + "deans": 46852, + "dear": 15696, + "dear": 3817, + "dearest": 24880, + "dearly": 31880, + "deas": 34715, + "death": 7163, + "death": 2767, + "deaths": 12253, + "deau": 12399, + "deaux": 19883, + "deb": 2987, + "deb": 25687, + "debat": 32082, + "debate": 5196, + "debates": 19239, + "debating": 23472, + "debbie": 47186, + "debbie": 16735, + "debit": 32410, + "debor": 16738, + "deborah": 40997, + "deborah": 22150, + "debra": 33233, + "debris": 19208, + "debt": 8932, + "debts": 38770, + "debu": 9790, + "debun": 33123, + "debut": 42608, + "debut": 4085, + "debuted": 25215, + "debuting": 34817, + "debuts": 17044, + "dec": 3063, + "dec": 4628, + "deca": 33428, + "decad": 29914, + "decade": 11099, + "decadent": 41716, + "decades": 10488, + "decal": 26678, + "decals": 37606, + "decan": 40677, + "decat": 35334, + "decath": 47455, + "decatur": 38540, + "decay": 22703, + "dece": 3534, + "deceased": 30035, + "december": 3864, + "decent": 10698, + "decentr": 28960, + "decentralized": 38485, + "decep": 33529, + "deception": 33046, + "deci": 2262, + "decide": 8447, + "decided": 4939, + "decides": 17269, + "deciding": 22513, + "decision": 5575, + "decisions": 9903, + "decisive": 28690, + "deck": 24885, + "deck": 6943, + "decked": 39096, + "decker": 21449, + "decks": 23968, + "decl": 7091, + "decla": 10739, + "declan": 42341, + "declar": 18040, + "declaration": 19714, + "declare": 19856, + "declared": 13845, + "declares": 23641, + "declaring": 33273, + "decline": 15084, + "declined": 28911, + "declines": 40478, + "declining": 29221, + "deco": 26412, + "deco": 16422, + "decor": 5148, + "decor": 6928, + "decorate": 23651, + "decorated": 15917, + "decorating": 16968, + "decoration": 16029, + "decorations": 19158, + "decorative": 19289, + "decre": 12284, + "decrease": 24703, + "decreased": 33913, + "decreasing": 43763, + "decree": 43327, + "ded": 16744, + "ded": 1241, + "dedic": 4701, + "dedicate": 27610, + "dedicated": 6770, + "dedication": 10188, + "dedly": 36204, + "deduc": 22799, + "dee": 5268, + "dee": 6705, + "deed": 30260, + "deeds": 24516, + "deejay": 48304, + "deejay": 44511, + "deemed": 28102, + "deen": 26456, + "deen": 12912, + "deep": 5462, + "deep": 3383, + "deepak": 45528, + "deeper": 15224, + "deepest": 22245, + "deephouse": 35684, + "deepi": 19371, + "deepika": 34120, + "deepikap": 29903, + "deepikapadukone": 30646, + "deeplear": 22181, + "deeplearning": 24362, + "deeply": 11449, + "deer": 19454, + "deer": 8700, + "deere": 32901, + "dees": 12547, + "deets": 35537, + "def": 2044, + "def": 11649, + "defam": 35670, + "defamation": 42741, + "default": 21650, + "defe": 4148, + "defeat": 8477, + "defeated": 8927, + "defeating": 22594, + "defeats": 16317, + "defect": 44013, + "defects": 37485, + "defen": 3619, + "defence": 30307, + "defence": 9659, + "defend": 21970, + "defend": 11397, + "defended": 27161, + "defender": 10618, + "defenders": 20063, + "defending": 13098, + "defends": 20134, + "defense": 45875, + "defense": 6021, + "defenseman": 43714, + "defenses": 49198, + "defensive": 10824, + "defi": 17244, + "defiance": 36186, + "defiant": 47597, + "defibrill": 47684, + "defic": 18022, + "defici": 23387, + "deficiency": 30685, + "deficit": 20156, + "defin": 3188, + "define": 14919, + "defined": 15278, + "defines": 28218, + "defining": 20504, + "definite": 40793, + "definitely": 4824, + "definition": 11405, + "definitive": 25298, + "defl": 31467, + "deforestation": 41330, + "defstar": 36427, + "defy": 39148, + "defying": 38496, + "deg": 38498, + "degra": 28939, + "degradation": 44468, + "degre": 4653, + "degree": 7119, + "degrees": 8000, + "deh": 35582, + "dei": 33833, + "dei": 23279, + "deir": 42948, + "deity": 42574, + "deja": 46902, + "dek": 23901, + "dekalb": 37775, + "del": 1233, + "del": 2003, + "dela": 37986, + "delaney": 31528, + "delav": 23706, + "delavin": 40477, + "delavin": 40776, + "delavinkisses": 40631, + "delaware": 17547, + "delay": 12955, + "delay": 10934, + "delayed": 14567, + "delaying": 43781, + "delays": 11232, + "dele": 7922, + "dele": 33431, + "delec": 38615, + "delectable": 45500, + "deleg": 8046, + "delegate": 27259, + "delegates": 14623, + "delegation": 14632, + "delete": 19204, + "deleted": 16588, + "deleting": 41857, + "delft": 42749, + "delgado": 49182, + "delhi": 26723, + "delhi": 5717, + "deli": 1932, + "deli": 18601, + "delia": 33193, + "deliber": 18316, + "deliberate": 38271, + "deliberately": 35163, + "delic": 13366, + "delicacy": 49181, + "delicate": 18768, + "delici": 19993, + "delicious": 3959, + "deliciously": 39589, + "deliciousness": 42819, + "delight": 46165, + "delight": 13073, + "delighted": 5943, + "delightful": 15513, + "delights": 25330, + "deline": 18797, + "delines": 13562, + "delish": 25093, + "deliver": 19561, + "deliver": 7396, + "delivered": 7278, + "deliveries": 29336, + "delivering": 9943, + "delivers": 11753, + "delivery": 5619, + "dell": 24381, + "dell": 10242, + "della": 22986, + "delle": 35963, + "deloit": 29428, + "deloitte": 38667, + "dels": 48636, + "delta": 32250, + "delta": 8768, + "delu": 18779, + "delusional": 48059, + "delux": 13709, + "deluxe": 14056, + "delve": 46008, + "dely": 15040, + "dem": 3251, + "dem": 7825, + "dema": 40268, + "dema": 45046, + "deman": 48366, + "demand": 13072, + "demand": 5650, + "demanded": 33699, + "demanding": 17099, + "demands": 14241, + "demar": 46566, + "demarcus": 47873, + "demb": 35930, + "demdebate": 43973, + "deme": 25143, + "demean": 37376, + "demen": 12604, + "dementi": 46028, + "dementia": 14047, + "demetri": 39553, + "demi": 32879, + "demi": 14480, + "demise": 28756, + "demo": 2930, + "demo": 7380, + "democr": 3573, + "democracy": 7758, + "democrat": 15431, + "democratic": 9149, + "democrats": 8865, + "demographic": 31308, + "demol": 19382, + "demolished": 26537, + "demolition": 22237, + "demon": 5635, + "demon": 12085, + "demonetisation": 41338, + "demonic": 46920, + "demons": 18388, + "demonstr": 8579, + "demonstrate": 22231, + "demonstrated": 29477, + "demonstrates": 24806, + "demonstrating": 22107, + "demonstration": 16722, + "demonstrations": 33964, + "demonstrators": 46450, + "demos": 19304, + "demp": 22490, + "dempsey": 30188, + "dems": 10989, + "demsin": 42664, + "demsinphilly": 43091, + "den": 1177, + "den": 1181, + "dena": 32431, + "denali": 48076, + "dence": 3370, + "dency": 11659, + "dend": 37447, + "dends": 43985, + "dene": 45128, + "dened": 19571, + "deng": 43098, + "deng": 41788, + "dengue": 41932, + "denham": 39180, + "deni": 21995, + "denial": 25716, + "denied": 15780, + "denies": 19565, + "denim": 13606, + "denis": 47630, + "denis": 18750, + "denise": 45900, + "denise": 20899, + "denmark": 13268, + "dennis": 32738, + "dennis": 10534, + "denny": 26808, + "denomin": 41016, + "dens": 16533, + "dense": 19353, + "density": 22431, + "dent": 3593, + "dent": 1258, + "dental": 24635, + "dental": 8382, + "dentally": 10346, + "dented": 21923, + "denti": 4418, + "dential": 5459, + "dentist": 17816, + "dentistry": 25754, + "dently": 28817, + "denton": 23567, + "dents": 1517, + "denver": 27847, + "denver": 8569, + "deny": 18679, + "denying": 32771, + "denzel": 42503, + "deo": 26406, + "deo": 12121, + "deodor": 47639, + "deol": 41902, + "deon": 31466, + "deon": 16079, + "dep": 6079, + "dep": 24370, + "depar": 10794, + "depart": 5343, + "depart": 30649, + "departed": 32541, + "departing": 26902, + "department": 5744, + "departments": 29523, + "departs": 38998, + "departure": 17850, + "depe": 36118, + "depend": 13894, + "depend": 27371, + "dependence": 40243, + "dependent": 23280, + "depending": 23673, + "depends": 20497, + "depic": 11307, + "depicted": 34637, + "depicting": 24970, + "depiction": 31071, + "depicts": 29340, + "deple": 38504, + "deplo": 9356, + "deplor": 39232, + "deploy": 26944, + "deployed": 20009, + "deploying": 42212, + "deployment": 20183, + "depo": 14276, + "depor": 36110, + "deport": 23389, + "deportation": 36617, + "deported": 39320, + "deportes": 47878, + "depos": 21266, + "deposit": 16775, + "deposits": 30740, + "depot": 12589, + "depp": 24941, + "depre": 7107, + "depress": 38869, + "depressed": 23269, + "depressing": 29235, + "depression": 10023, + "depri": 28587, + "depriv": 45809, + "deprivation": 47810, + "deprived": 39140, + "dept": 9201, + "depth": 10350, + "depths": 28855, + "depu": 6912, + "deputies": 24914, + "deputy": 7932, + "der": 839, + "der": 801, + "dera": 20696, + "derail": 48502, + "derby": 13904, + "derby": 7177, + "derbyshire": 22147, + "derdale": 21513, + "dere": 5701, + "dere": 44194, + "dered": 3776, + "derek": 22461, + "derek": 11205, + "derel": 46728, + "derer": 11289, + "derers": 20882, + "deri": 34573, + "derick": 33908, + "dering": 6076, + "deriv": 33458, + "derived": 26461, + "derland": 35488, + "derman": 29740, + "dermatology": 48051, + "dern": 30086, + "dero": 37203, + "dero": 34026, + "derrick": 21798, + "derry": 45777, + "derry": 20535, + "ders": 37307, + "ders": 1923, + "derson": 12677, + "dery": 17172, + "des": 6797, + "des": 1437, + "desai": 35316, + "desc": 13866, + "descen": 32318, + "descend": 26004, + "descend": 46241, + "descendants": 36323, + "descending": 36620, + "descent": 19375, + "desch": 49209, + "descri": 4637, + "describe": 10967, + "described": 14671, + "describes": 13678, + "describing": 24239, + "descrip": 41832, + "description": 13951, + "descriptions": 40653, + "desde": 42218, + "dese": 27195, + "deser": 3659, + "desert": 45776, + "desert": 7301, + "deserted": 41560, + "deserve": 7043, + "deserved": 10061, + "deserves": 9079, + "deserving": 26615, + "desh": 25320, + "desh": 7448, + "deshi": 42769, + "desi": 6772, + "desi": 26635, + "desig": 1250, + "design": 8359, + "design": 1681, + "designated": 24119, + "designation": 41155, + "designed": 4486, + "designer": 35640, + "designer": 5728, + "designers": 12720, + "designing": 13467, + "designs": 6747, + "designthinking": 32450, + "desirable": 32368, + "desire": 11858, + "desired": 28631, + "desires": 27598, + "desk": 11937, + "desk": 6550, + "desks": 41014, + "desktop": 14345, + "desmond": 27821, + "desol": 41258, + "desp": 3642, + "despair": 28097, + "desper": 10144, + "desperate": 15072, + "desperately": 21993, + "despic": 32442, + "despicable": 37158, + "despite": 5325, + "dess": 7096, + "dess": 10001, + "dessert": 9753, + "desserts": 22948, + "desses": 43913, + "dest": 6540, + "dest": 4549, + "destin": 4934, + "destination": 32191, + "destination": 9179, + "destinations": 16981, + "destined": 28525, + "destiny": 39875, + "destiny": 10867, + "destro": 8287, + "destroy": 8308, + "destroy": 11930, + "destroyed": 9965, + "destroyer": 25291, + "destroying": 19613, + "destroys": 27634, + "destruc": 22945, + "destruction": 14281, + "destructive": 29591, + "det": 28966, + "det": 15366, + "deta": 1914, + "detached": 26252, + "detail": 7657, + "detailed": 12609, + "detailing": 23163, + "details": 2353, + "detained": 20260, + "dete": 5606, + "detec": 17991, + "detect": 22744, + "detected": 26988, + "detecting": 41290, + "detection": 16220, + "detective": 13672, + "detectives": 27994, + "detector": 27689, + "detectors": 45063, + "detention": 16908, + "deter": 10742, + "deter": 47458, + "detergent": 46726, + "deterior": 28512, + "determin": 8325, + "determination": 17410, + "determine": 16768, + "determined": 14371, + "determines": 42192, + "determining": 39884, + "deth": 38375, + "deto": 39710, + "deton": 39335, + "detour": 31211, + "detox": 22459, + "detri": 47951, + "detro": 6210, + "detroit": 19404, + "detroit": 7073, + "detta": 45438, + "dette": 35750, + "deu": 21457, + "deuce": 45332, + "deus": 37625, + "deut": 14970, + "deutsch": 30389, + "deutsche": 32760, + "deutschland": 36878, + "deux": 47089, + "dev": 2797, + "dev": 3670, + "deva": 45179, + "devan": 37072, + "devast": 12913, + "devastated": 29865, + "devastating": 19280, + "devastation": 42452, + "devel": 1820, + "develop": 1966, + "develop": 7708, + "developed": 8763, + "developer": 10929, + "developers": 13248, + "developing": 8131, + "development": 2855, + "developmental": 29347, + "developments": 17393, + "develops": 29895, + "deven": 45537, + "devgn": 29871, + "devi": 12926, + "devi": 20717, + "deviant": 25593, + "deviantart": 26046, + "device": 8163, + "devices": 9067, + "devil": 8894, + "devil": 8043, + "deville": 34329, + "devils": 11683, + "devin": 31193, + "devin": 20996, + "devine": 33019, + "devlin": 48040, + "devo": 11861, + "devo": 43444, + "devon": 16205, + "devon": 10046, + "devops": 21504, + "devos": 40646, + "devote": 37777, + "devoted": 24561, + "devotees": 39759, + "devotion": 25821, + "devotional": 35456, + "devs": 27374, + "dew": 31952, + "dew": 16358, + "dewey": 40399, + "dex": 10030, + "dex": 13790, + "dexpo": 42502, + "dexter": 45049, + "dexter": 22781, + "dey": 11829, + "dez": 23190, + "dez": 8122, + "df": 12908, + "df": 10468, + "dfc": 41903, + "dfs": 32880, + "dfw": 20439, + "dg": 2394, + "dg": 9742, + "dgate": 41684, + "dge": 4016, + "dge": 1360, + "dged": 11830, + "dgeon": 45655, + "dgers": 8733, + "dges": 5432, + "dging": 9565, + "dh": 6669, + "dh": 9960, + "dha": 11629, + "dha": 27377, + "dhabi": 22349, + "dhaka": 32877, + "dham": 29635, + "dham": 30838, + "dhan": 12542, + "dhan": 28569, + "dhanush": 26162, + "dhanush": 36200, + "dhanushkraja": 29266, + "dhar": 12397, + "dharma": 30536, + "dhary": 28706, + "dhawan": 44699, + "dhe": 29706, + "dheim": 44280, + "dhi": 31553, + "dhi": 26166, + "dho": 37834, + "dhoni": 25698, + "dhru": 40257, + "dhry": 39960, + "dhs": 26849, + "dhu": 32387, + "di": 570, + "di": 1618, + "dia": 7351, + "dia": 3357, + "diab": 15954, + "diabe": 19167, + "diabete": 43826, + "diabetes": 10319, + "diabetic": 30230, + "diablo": 23931, + "diag": 6851, + "diagno": 7736, + "diagnose": 44429, + "diagnosed": 16979, + "diagnosis": 15715, + "diagnostic": 26351, + "diagnostics": 37723, + "diagram": 22697, + "dial": 18416, + "dial": 11381, + "dialo": 30709, + "dialog": 48945, + "dialogue": 11288, + "dialogues": 40330, + "dialysis": 44798, + "diam": 4347, + "diameter": 27189, + "diamon": 8873, + "diamond": 18535, + "diamond": 6235, + "diamonds": 12687, + "dian": 16021, + "dian": 4998, + "diana": 12803, + "diane": 15855, + "dianne": 42299, + "dians": 21041, + "diaper": 34382, + "diapers": 39659, + "diar": 25932, + "diaries": 15541, + "diary": 10380, + "dias": 22137, + "dias": 29354, + "diaspora": 28390, + "diaz": 17688, + "dic": 1404, + "dic": 6717, + "dicap": 30023, + "dicaprio": 30755, + "dice": 14406, + "dick": 14413, + "dick": 9554, + "dickens": 33421, + "dict": 45360, + "dict": 15159, + "dictat": 26156, + "dictator": 27399, + "dictatorship": 37989, + "dictionary": 19699, + "did": 1861, + "did": 1335, + "diddy": 33527, + "didi": 34396, + "didier": 45614, + "didn": 2376, + "didnt": 13057, + "dido": 31725, + "didyou": 12295, + "didyouknow": 12506, + "die": 3150, + "die": 2082, + "diec": 27729, + "diecast": 37936, + "died": 3622, + "diego": 30940, + "diego": 6306, + "diem": 45571, + "dience": 33686, + "dient": 27231, + "dier": 29702, + "dier": 16394, + "dies": 20104, + "dies": 1862, + "diesel": 46312, + "diesel": 10591, + "diest": 45739, + "diet": 21295, + "diet": 6582, + "dietary": 29009, + "dietrich": 47005, + "diets": 35173, + "dif": 18656, + "dif": 48731, + "diff": 44073, + "diff": 20331, + "diffe": 1967, + "differ": 34620, + "differen": 14903, + "difference": 4731, + "differences": 14003, + "different": 2731, + "differenti": 21729, + "differential": 34027, + "differentiate": 49032, + "differently": 18325, + "diffic": 6140, + "difficult": 7405, + "difficulties": 23468, + "difficulty": 25245, + "diffu": 31603, + "diffuser": 49400, + "dig": 1831, + "dig": 9887, + "dige": 17820, + "digest": 20413, + "digestion": 40533, + "digestive": 32304, + "digg": 43240, + "digger": 35919, + "diggin": 48466, + "digging": 14971, + "digi": 15627, + "digi": 39361, + "digimon": 44181, + "digit": 14899, + "digit": 27472, + "digital": 4704, + "digital": 2794, + "digitalart": 16987, + "digitalhealth": 32190, + "digitalindia": 46630, + "digitally": 27543, + "digitalmarketing": 15299, + "digitaltransformation": 20047, + "digiti": 25935, + "digits": 31710, + "digni": 45532, + "dignit": 39497, + "dignity": 17744, + "digo": 35701, + "digs": 26877, + "dih": 43089, + "dii": 32755, + "dijk": 44444, + "dik": 38854, + "dik": 37747, + "dike": 42683, + "dil": 7643, + "dil": 17942, + "dile": 25428, + "dilemma": 29787, + "dilig": 30664, + "dill": 12318, + "dill": 27206, + "dillon": 21056, + "dilu": 45242, + "dim": 19576, + "dim": 17523, + "dime": 24443, + "dimen": 10935, + "dimension": 20479, + "dimensional": 25252, + "dimensions": 25086, + "diment": 43500, + "dimes": 44888, + "dimini": 37459, + "dimit": 22250, + "dimitri": 48840, + "dimp": 38853, + "din": 1462, + "din": 5673, + "dina": 36815, + "dinah": 30903, + "dine": 20951, + "dine": 12989, + "diner": 16963, + "dinesh": 48341, + "ding": 7545, + "ding": 796, + "dinger": 45580, + "dingh": 48064, + "dings": 5473, + "dington": 24804, + "dinho": 47370, + "dini": 20196, + "dining": 8658, + "dinner": 27548, + "dinner": 2571, + "dinners": 33570, + "dino": 9692, + "dino": 14077, + "dinosa": 18955, + "dinosaur": 15095, + "dinosaurs": 20387, + "dio": 3779, + "dio": 1521, + "dioce": 20763, + "diocese": 27091, + "dion": 42899, + "dion": 16250, + "dior": 23655, + "dios": 37563, + "dious": 27417, + "dioxide": 38102, + "dip": 19918, + "dip": 11343, + "dipl": 8490, + "diplo": 38115, + "diplom": 11169, + "diploma": 21251, + "diplomacy": 23798, + "diplomat": 32828, + "diplomatic": 23782, + "diplomats": 44126, + "dipped": 30610, + "dipper": 49317, + "dipping": 33544, + "dips": 37522, + "dir": 4251, + "dir": 8478, + "dire": 38355, + "dire": 25664, + "direc": 1534, + "direct": 43224, + "direct": 6016, + "directed": 8392, + "directing": 21817, + "direction": 15923, + "direction": 5407, + "directional": 38687, + "directioner": 48042, + "directioners": 22055, + "directions": 16440, + "directive": 40630, + "directly": 9701, + "director": 20337, + "director": 2681, + "directorial": 45327, + "directors": 11940, + "directory": 25272, + "directs": 34349, + "directv": 48652, + "dirk": 28171, + "dirt": 31415, + "dirt": 11795, + "dirty": 20127, + "dirty": 7615, + "dis": 1518, + "dis": 6112, + "disa": 3882, + "disab": 47380, + "disabilities": 17350, + "disability": 48986, + "disability": 13261, + "disabled": 13613, + "disadvantaged": 40577, + "disagree": 23199, + "disapp": 5384, + "disappear": 21148, + "disappear": 25173, + "disappearance": 35929, + "disappeared": 23139, + "disappearing": 35819, + "disappears": 44406, + "disappo": 7605, + "disappoint": 25446, + "disappointed": 13794, + "disappointing": 21941, + "disappointment": 23884, + "disappoints": 48545, + "disappro": 48276, + "disar": 42971, + "disaster": 9072, + "disasters": 26976, + "disastrous": 35790, + "disc": 1472, + "disc": 10712, + "discar": 40532, + "discarded": 45197, + "discer": 49140, + "dischar": 22671, + "discharge": 32485, + "disci": 9559, + "discip": 38951, + "discipl": 10467, + "disciples": 39366, + "disciplinary": 20232, + "discipline": 18903, + "disciplines": 42032, + "discla": 40248, + "disclaimer": 46465, + "disclo": 17481, + "disclose": 46379, + "disclosed": 30905, + "disclosure": 26502, + "disco": 2475, + "disco": 11964, + "discography": 47545, + "discomfort": 48054, + "discord": 23582, + "discoun": 18515, + "discount": 7638, + "discounted": 20993, + "discounts": 18186, + "discoura": 45850, + "discourse": 29441, + "discover": 10539, + "discover": 4834, + "discovered": 6986, + "discoveries": 29308, + "discovering": 17967, + "discovers": 29719, + "discovery": 40491, + "discovery": 8027, + "discre": 20616, + "discrimin": 11721, + "discrimination": 14775, + "discs": 29270, + "discu": 1984, + "discus": 41828, + "discuss": 4312, + "discussed": 11300, + "discusses": 8116, + "discussing": 5900, + "discussion": 5060, + "discussions": 13806, + "dise": 4262, + "disease": 5336, + "diseases": 12035, + "disen": 46468, + "disgrace": 29877, + "disgraceful": 44146, + "disgu": 9793, + "disguise": 27803, + "disguised": 37149, + "disgusted": 41977, + "disgusting": 16218, + "dish": 11039, + "dish": 4531, + "disha": 42498, + "dishes": 11412, + "dishon": 30777, + "dishu": 44728, + "dishwasher": 40524, + "disin": 19484, + "disinfe": 48050, + "disintegr": 49275, + "disk": 17970, + "dislike": 30796, + "dism": 30836, + "dism": 38821, + "dismant": 36557, + "dismiss": 43287, + "dismissal": 42068, + "dismissed": 30087, + "dismisses": 45238, + "disney": 6729, + "disney": 4696, + "disneyland": 39481, + "disneyland": 13661, + "disneyworld": 28469, + "diso": 26305, + "disobe": 42841, + "dison": 19310, + "disorder": 12635, + "disorders": 17114, + "disp": 11073, + "dispar": 24633, + "disparities": 45122, + "dispat": 28652, + "dispatch": 26306, + "dispen": 19077, + "dispenser": 40116, + "disper": 34499, + "displa": 9326, + "displac": 17718, + "displaced": 22817, + "displacement": 37931, + "display": 4456, + "displayed": 18967, + "displaying": 26468, + "displays": 15648, + "dispo": 13651, + "dispon": 38872, + "disponible": 46130, + "dispos": 45177, + "disposable": 37275, + "disposal": 28231, + "dispro": 32927, + "dispropor": 40354, + "disproportion": 45492, + "disregard": 43869, + "disrespect": 34055, + "disrespectful": 41723, + "disru": 13763, + "disrup": 14641, + "disrupt": 25214, + "disrupted": 46674, + "disrupting": 42419, + "disruption": 19635, + "disruptive": 31554, + "diss": 10766, + "diss": 35688, + "dissec": 43879, + "dissemin": 40463, + "dissent": 45154, + "disser": 25560, + "dissertation": 29448, + "dissi": 25088, + "dissol": 27398, + "dissuper": 33461, + "dist": 5479, + "dist": 12116, + "distance": 7964, + "distances": 37078, + "distant": 18949, + "distill": 41586, + "distilled": 49179, + "distillery": 22200, + "distin": 11892, + "distinct": 25056, + "distinction": 28183, + "distinctive": 25486, + "distingui": 15053, + "distinguish": 45418, + "distinguished": 16513, + "distor": 23781, + "distortion": 43690, + "distr": 11885, + "distract": 39309, + "distracted": 24049, + "distraction": 32039, + "distress": 26866, + "distressed": 37515, + "distri": 5987, + "distribu": 6138, + "distribute": 32313, + "distributed": 16419, + "distributing": 35216, + "distribution": 10484, + "distributor": 28354, + "distributors": 44240, + "distric": 3208, + "district": 46683, + "district": 3506, + "districts": 17565, + "distur": 11732, + "disturb": 33018, + "disturb": 39449, + "disturbance": 42416, + "disturbed": 29967, + "disturbing": 21476, + "disupdates": 45667, + "dit": 5752, + "dit": 2524, + "dita": 47965, + "ditch": 43715, + "ditch": 19291, + "dited": 40392, + "diti": 2363, + "dition": 16452, + "dition": 3015, + "ditional": 4322, + "ditions": 4503, + "dito": 43705, + "dits": 49374, + "dity": 16436, + "dium": 2903, + "div": 5293, + "div": 14869, + "diva": 13605, + "divas": 23534, + "dive": 26042, + "dive": 9058, + "diver": 13119, + "diver": 22094, + "divergence": 48735, + "divergent": 36132, + "divers": 30241, + "divers": 27038, + "diverse": 11464, + "diversi": 24475, + "diversion": 38457, + "diversity": 35634, + "diversity": 6257, + "diverted": 41049, + "dives": 13893, + "divi": 8375, + "divid": 31337, + "divide": 18842, + "divided": 18689, + "dividend": 32067, + "dividends": 45146, + "dividing": 45605, + "divin": 21838, + "divine": 46919, + "divine": 10976, + "diving": 9886, + "divinity": 39754, + "divisi": 39196, + "division": 5378, + "divisional": 40912, + "divisions": 33715, + "divor": 13543, + "divorce": 17060, + "divorced": 39437, + "divya": 47767, + "diwali": 18218, + "dix": 45838, + "dix": 27620, + "dixie": 24484, + "dixit": 28279, + "dixon": 16086, + "diy": 28472, + "diy": 7845, + "diya": 36459, + "diz": 32740, + "dized": 36232, + "dizz": 40239, + "dizzy": 35464, + "dj": 3761, + "dj": 3723, + "djan": 35338, + "django": 46498, + "dji": 35284, + "dji": 28379, + "djing": 36113, + "djo": 19432, + "djoker": 42721, + "djokernole": 42830, + "djokovic": 27944, + "djs": 18117, + "dk": 20702, + "dk": 16196, + "dl": 12558, + "dl": 9373, + "dlc": 19079, + "dle": 11057, + "dle": 3287, + "dled": 23494, + "dler": 40279, + "dles": 7890, + "dless": 14997, + "dley": 12808, + "dling": 18221, + "dly": 3069, + "dm": 19070, + "dm": 4667, + "dma": 42903, + "dman": 18826, + "dmc": 28991, + "dmit": 31607, + "dmitry": 48326, + "dms": 19955, + "dmv": 27508, + "dmx": 45255, + "dn": 11552, + "dn": 7459, + "dna": 8790, + "dnb": 35422, + "dnc": 20237, + "dnd": 11678, + "dnr": 37051, + "dns": 39245, + "dnt": 26795, + "do": 639, + "do": 818, + "doa": 48332, + "dob": 29640, + "doba": 35605, + "dobbs": 43006, + "dobson": 46888, + "doc": 3009, + "doc": 7251, + "doch": 25101, + "dock": 17311, + "dock": 8997, + "docked": 46784, + "docker": 31152, + "docking": 40845, + "docks": 24091, + "docs": 15157, + "doctor": 7872, + "doctor": 5547, + "doctoral": 23649, + "doctorate": 39134, + "doctors": 9705, + "doctorwho": 12996, + "doctr": 28497, + "doctrine": 35612, + "docu": 4433, + "document": 29293, + "document": 15121, + "documentaries": 44209, + "documentary": 7881, + "documentation": 31560, + "documented": 22310, + "documenting": 37876, + "documents": 14105, + "dod": 13847, + "dod": 30187, + "dodd": 36748, + "dodge": 31263, + "dodge": 12093, + "dodgeball": 43244, + "dodger": 31641, + "dodgers": 12422, + "dodgy": 37727, + "doe": 13296, + "does": 2397, + "does": 1897, + "doesn": 2503, + "doesnt": 17937, + "dof": 8277, + "doff": 20193, + "dofficial": 42516, + "dog": 4326, + "dog": 1929, + "dogcelebration": 41819, + "dogday": 27475, + "doge": 42187, + "dogg": 20749, + "doggie": 32237, + "doggo": 42155, + "doggy": 26359, + "doglo": 40733, + "dogre": 40030, + "dogrescue": 44158, + "dogs": 42182, + "dogs": 3255, + "dogsoftwitter": 19415, + "doh": 23581, + "doha": 20908, + "doherty": 31774, + "doi": 36361, + "doin": 15412, + "doing": 37408, + "doing": 1960, + "doit": 32272, + "doit": 28109, + "doj": 25700, + "dojo": 35901, + "dok": 40547, + "dok": 41034, + "doka": 46528, + "dol": 2287, + "dol": 19170, + "dola": 38005, + "dolan": 27200, + "dolby": 42414, + "dolce": 30033, + "dolce": 30661, + "dole": 41040, + "doll": 27031, + "doll": 9286, + "dollar": 35092, + "dollar": 7474, + "dollars": 10669, + "dolls": 15090, + "dolly": 43281, + "dolly": 23821, + "dolom": 37137, + "dolores": 40741, + "dolph": 8900, + "dolph": 22257, + "dolphin": 42963, + "dolphin": 16464, + "dolphins": 14002, + "dom": 2164, + "dom": 1919, + "domain": 15492, + "domaine": 48744, + "domains": 36358, + "dome": 8515, + "dome": 9827, + "domen": 37584, + "domest": 21936, + "domestic": 28189, + "domestic": 9043, + "domin": 4361, + "dominance": 30546, + "dominant": 20565, + "dominate": 21431, + "dominated": 23048, + "dominates": 34043, + "dominating": 29303, + "domination": 30919, + "domingo": 24882, + "dominic": 39007, + "dominic": 19095, + "dominican": 22934, + "dominion": 27155, + "domino": 30752, + "dominos": 39770, + "domo": 44293, + "doms": 30126, + "don": 1067, + "don": 847, + "dona": 26789, + "donal": 42375, + "donald": 5990, + "donald": 4335, + "donaldson": 37783, + "donaldtrump": 6652, + "donat": 36384, + "donate": 6429, + "donated": 8705, + "donates": 26960, + "donating": 12621, + "donation": 7924, + "donations": 9928, + "doncaster": 38008, + "doncaster": 25352, + "doncasterisgreat": 47333, + "done": 5136, + "done": 1700, + "donegal": 24172, + "donesia": 41281, + "donet": 33724, + "donetsk": 33999, + "dong": 26242, + "dong": 31478, + "dongha": 28365, + "donghae": 28945, + "donia": 24014, + "donkey": 21415, + "donkeys": 44644, + "donna": 9158, + "donne": 30897, + "donnein": 38308, + "donneinarte": 40193, + "donnell": 35118, + "donnelly": 39070, + "donnie": 47058, + "donnie": 30609, + "donny": 37291, + "donny": 32887, + "dono": 14840, + "donor": 18013, + "donors": 17887, + "donovan": 21499, + "dons": 22127, + "dont": 8094, + "dont": 4632, + "donut": 18471, + "donuts": 13970, + "doo": 4543, + "doo": 11643, + "doodle": 9388, + "doodled": 41030, + "doodles": 22156, + "doodling": 37548, + "dooley": 47609, + "doom": 23263, + "doom": 14344, + "doomed": 33251, + "doomsday": 41791, + "doon": 36612, + "doop": 33886, + "door": 7188, + "door": 2489, + "doors": 4228, + "doorstep": 19533, + "doorway": 46575, + "dop": 42381, + "dop": 31722, + "dope": 42587, + "dope": 10094, + "doping": 30285, + "dopp": 21774, + "doppelg": 45216, + "doppler": 42540, + "dor": 2766, + "dor": 8695, + "dora": 18104, + "dorado": 32350, + "dorchester": 32656, + "dore": 39423, + "dores": 34323, + "dorf": 17296, + "dori": 49270, + "doria": 43186, + "dorian": 44016, + "doris": 24285, + "dork": 36206, + "dorm": 24263, + "doro": 15498, + "doro": 37389, + "dorothy": 20805, + "dors": 31240, + "dorset": 42109, + "dorset": 16047, + "dorsey": 41607, + "dortmund": 24290, + "dory": 36135, + "dos": 44258, + "dos": 5474, + "dose": 11497, + "doses": 37873, + "dossier": 46042, + "dost": 44222, + "dot": 7473, + "dot": 7004, + "dota": 23085, + "dotcom": 12443, + "dote": 31202, + "dothis": 47864, + "dotnet": 43124, + "dotorg": 46587, + "dots": 19019, + "dotted": 47950, + "dou": 1756, + "dou": 23608, + "doub": 19631, + "double": 13013, + "double": 3200, + "doubled": 24948, + "doubleheader": 34668, + "doubles": 12539, + "doubling": 36850, + "doubt": 37071, + "doubt": 8671, + "doubts": 30894, + "douche": 44292, + "doug": 20271, + "doug": 10758, + "dough": 15785, + "dough": 14983, + "doughnut": 32555, + "doughnuts": 31124, + "dougie": 46317, + "dougla": 9140, + "douglas": 10065, + "douglass": 45692, + "doun": 44785, + "dov": 38856, + "dova": 26551, + "dove": 27511, + "dove": 18281, + "dover": 43019, + "dover": 14683, + "doves": 47067, + "dow": 8022, + "dow": 10688, + "dowell": 27344, + "down": 1833, + "down": 1136, + "downe": 46501, + "downed": 35814, + "downer": 42522, + "downers": 43739, + "downey": 29429, + "downfall": 48702, + "downhill": 27387, + "downing": 28140, + "download": 35076, + "download": 3794, + "downloadable": 49105, + "downloaded": 22961, + "downloading": 30519, + "downloads": 26481, + "downpour": 39034, + "downpours": 40160, + "downs": 10706, + "downside": 41937, + "downstairs": 28174, + "downstream": 43822, + "downtime": 41964, + "downton": 45023, + "downton": 42668, + "downtown": 18230, + "downtown": 5061, + "downward": 37430, + "dowski": 43556, + "dox": 44786, + "dox": 14510, + "doyle": 17728, + "doyou": 27256, + "doz": 31106, + "dozen": 16401, + "dozens": 17883, + "dp": 23820, + "dp": 6465, + "dprint": 46644, + "dprinting": 16194, + "dprk": 47920, + "dps": 34288, + "dq": 28741, + "dr": 1084, + "dr": 1701, + "dra": 1114, + "dra": 7402, + "drac": 20168, + "dracing": 41253, + "dracula": 25405, + "draf": 37426, + "draft": 30624, + "draft": 5198, + "drafted": 19129, + "drafting": 33528, + "drafts": 29194, + "drag": 8452, + "drag": 12463, + "dragged": 27884, + "dragging": 37069, + "dragon": 9187, + "dragon": 5471, + "dragonball": 40959, + "dragoncon": 47802, + "dragonfly": 32824, + "dragons": 10203, + "dragrace": 40762, + "drags": 45368, + "drain": 23347, + "drain": 19467, + "drainage": 25953, + "drained": 44630, + "drains": 43638, + "drainthe": 47337, + "drake": 32504, + "drake": 8958, + "dral": 7503, + "dram": 6937, + "dram": 32170, + "drama": 5055, + "dramas": 33467, + "dramati": 43512, + "dramatic": 11240, + "dramatically": 24495, + "drank": 21712, + "draped": 49113, + "drastic": 43159, + "drastically": 35478, + "drau": 18621, + "draw": 17675, + "draw": 4001, + "drawer": 23219, + "drawers": 38975, + "drawing": 36996, + "drawing": 3610, + "drawings": 13397, + "drawn": 8893, + "draws": 12043, + "dray": 25562, + "drayton": 49044, + "drc": 21434, + "dre": 960, + "dre": 14584, + "dread": 17412, + "dread": 31403, + "dreaded": 47227, + "dreadful": 35846, + "dreality": 48367, + "dream": 4595, + "dream": 2984, + "dreambig": 46495, + "dreamcast": 47226, + "dreamed": 27984, + "dreamer": 25692, + "dreamers": 27194, + "dreaming": 11662, + "dreamliner": 49143, + "dreams": 4405, + "dreamt": 43743, + "dreamteam": 40090, + "dreamy": 23517, + "dred": 10903, + "dredge": 48783, + "dren": 29068, + "dren": 47309, + "drenched": 46378, + "dres": 48852, + "dres": 44697, + "dresden": 34836, + "dress": 12622, + "dress": 2595, + "dressage": 36144, + "dressed": 6559, + "dresser": 26346, + "dresses": 8184, + "dressing": 6348, + "drew": 18792, + "drew": 5281, + "drex": 33985, + "drey": 48271, + "dri": 1203, + "dri": 28833, + "drian": 36870, + "dribb": 42153, + "dric": 23448, + "dridge": 22956, + "drie": 40170, + "dried": 16037, + "drier": 39877, + "dries": 33857, + "drif": 33585, + "drift": 18194, + "drifting": 30276, + "drill": 11626, + "drilled": 46338, + "drilling": 18634, + "drills": 24378, + "drin": 3375, + "drin": 47133, + "drink": 14131, + "drink": 3979, + "drinking": 5778, + "drinklocal": 45998, + "drinks": 6732, + "drip": 24050, + "dripping": 38787, + "dris": 35804, + "drive": 11402, + "drive": 2620, + "driven": 9314, + "driver": 27563, + "driver": 4383, + "driverless": 46769, + "drivers": 7384, + "drives": 11441, + "driveway": 26273, + "driving": 37800, + "driving": 4161, + "drizzle": 28240, + "drm": 39674, + "dro": 1494, + "dro": 12442, + "drogba": 49199, + "droid": 38016, + "drome": 9157, + "dron": 43898, + "dron": 23360, + "drone": 33557, + "drone": 9397, + "drones": 14006, + "droo": 30715, + "drool": 41554, + "drooling": 44360, + "drop": 16407, + "drop": 3387, + "dropbox": 47216, + "dropped": 6792, + "dropping": 8339, + "drops": 6437, + "dros": 47033, + "drou": 38558, + "drought": 13935, + "drove": 13753, + "drow": 21159, + "drown": 28571, + "drowned": 34005, + "drowning": 24618, + "drs": 21257, + "dru": 2275, + "dru": 49048, + "drug": 20601, + "drug": 5600, + "drugs": 8021, + "druid": 40297, + "drum": 13353, + "drum": 8698, + "drummer": 13618, + "drummers": 46191, + "drumming": 35480, + "drummond": 42213, + "drums": 11690, + "drun": 15488, + "drunk": 37398, + "drunk": 8232, + "drunken": 28196, + "drupal": 46481, + "drush": 43009, + "drwho": 48342, + "dry": 13544, + "dry": 4501, + "dryer": 24425, + "drying": 23203, + "ds": 3361, + "ds": 646, + "dsa": 47607, + "dsb": 47168, + "dsb": 14257, + "dsburg": 47237, + "dsc": 37240, + "dsd": 45383, + "dsley": 40740, + "dslr": 33740, + "dsm": 39502, + "dson": 40310, + "dsp": 45291, + "dss": 41580, + "dstv": 35027, + "dt": 13104, + "dt": 7427, + "dthe": 13863, + "dtla": 31885, + "dtm": 42407, + "dts": 46233, + "du": 691, + "du": 3686, + "dua": 25244, + "dual": 39739, + "dual": 5347, + "duane": 38946, + "dub": 14526, + "dub": 13144, + "duba": 5485, + "dubai": 32599, + "dubai": 5985, + "dubbed": 27740, + "dublin": 20707, + "dublin": 6145, + "dubnation": 47329, + "dubois": 48046, + "dubrov": 46709, + "dubrovnik": 48724, + "dubs": 27013, + "dubstep": 38303, + "dubu": 43257, + "duc": 979, + "duc": 36446, + "ducati": 28570, + "ducation": 17197, + "duce": 3660, + "duchess": 21713, + "duck": 12708, + "duck": 6910, + "ducks": 11202, + "duct": 26829, + "dude": 48087, + "dude": 5710, + "dudes": 14449, + "dudley": 27324, + "due": 2887, + "duel": 27143, + "dues": 37646, + "duet": 25457, + "duf": 38713, + "duff": 38071, + "duff": 21934, + "duffy": 23599, + "dug": 22743, + "dug": 21000, + "dugg": 40523, + "duggan": 46169, + "dugout": 36831, + "duh": 26716, + "dui": 29693, + "duk": 14160, + "duke": 18402, + "duke": 7732, + "dukes": 27914, + "dul": 6738, + "dulce": 44872, + "dulil": 32565, + "dulkar": 47980, + "dull": 19433, + "dulu": 28865, + "duluth": 32109, + "dulwich": 47343, + "dum": 13400, + "dum": 11564, + "dumb": 15901, + "dumb": 12464, + "dumbass": 38980, + "dummies": 40899, + "dummy": 34246, + "dump": 12655, + "dump": 17146, + "dumped": 23768, + "dumping": 31707, + "dumplings": 35495, + "dumps": 45804, + "dumpster": 45467, + "dun": 2616, + "dun": 18284, + "dunbar": 41453, + "duncan": 31084, + "duncan": 13502, + "dundal": 38185, + "dundas": 39300, + "dundee": 18619, + "dune": 32833, + "dune": 28208, + "dunedin": 40121, + "dunes": 23526, + "dung": 33712, + "dungeon": 28812, + "dungeon": 22931, + "dungeons": 42572, + "dungeonsand": 34970, + "dungeonsanddragons": 35497, + "dunham": 42501, + "duni": 43454, + "dunk": 17222, + "dunkin": 48022, + "dunkin": 36415, + "dunkirk": 46928, + "dunks": 48977, + "dunlop": 34753, + "dunn": 19185, + "dunne": 38538, + "dunno": 24502, + "duo": 8696, + "dup": 36805, + "dup": 10445, + "duper": 44850, + "duplex": 41186, + "duplic": 28992, + "dupont": 35994, + "dur": 4355, + "dur": 23230, + "dura": 28173, + "dura": 47382, + "durability": 43671, + "durable": 22285, + "duran": 28185, + "durango": 44443, + "durant": 24861, + "duras": 27518, + "duration": 31663, + "durban": 24474, + "dure": 19108, + "durga": 38456, + "durham": 26765, + "durham": 14335, + "during": 1590, + "dus": 9931, + "dusa": 28546, + "dusk": 19708, + "dust": 29723, + "dust": 8349, + "dusted": 38274, + "duster": 46280, + "dustin": 42423, + "dustin": 21235, + "dusting": 41756, + "dusty": 22029, + "dut": 32625, + "dutch": 22277, + "dutch": 7991, + "duter": 21624, + "duterte": 22371, + "duties": 19603, + "dutt": 30081, + "dutton": 42771, + "duty": 6458, + "duval": 42459, + "duvet": 48006, + "dux": 28562, + "dv": 4288, + "dv": 26265, + "dvd": 7170, + "dvds": 36655, + "dvn": 29811, + "dvr": 29210, + "dw": 8455, + "dw": 19997, + "dwar": 13487, + "dwarf": 22643, + "dwayne": 31395, + "dwell": 27549, + "dwell": 18755, + "dwelling": 37098, + "dwight": 22473, + "dwp": 46976, + "dwts": 30220, + "dwyer": 43878, + "dx": 22717, + "dx": 15679, + "dy": 1444, + "dy": 907, + "dyce": 48325, + "dye": 37159, + "dye": 15997, + "dyed": 24906, + "dyer": 29495, + "dyes": 39874, + "dying": 5115, + "dyk": 12142, + "dyke": 32632, + "dylan": 21004, + "dylan": 9900, + "dyn": 44289, + "dyn": 30669, + "dynam": 5735, + "dynamic": 10057, + "dynamics": 14329, + "dynamite": 29003, + "dynamo": 28281, + "dynasty": 14593, + "dyne": 42756, + "dyou": 11484, + "dyour": 22525, + "dys": 11022, + "dys": 38384, + "dysfunction": 36865, + "dysfunctional": 40757, + "dysle": 33681, + "dyslexia": 43199, + "dyson": 34475, + "dyssey": 17435, + "dystop": 28276, + "dystopian": 38915, + "dz": 24421, + "dz": 22913, + "dé": 25466, + "dü": 46948, + "dÃŃ": 46988, + "e": 68, + "e": 324, + "ea": 2150, + "ea": 8100, + "eable": 20693, + "each": 31442, + "each": 2416, + "eachother": 40792, + "ead": 42556, + "ead": 45523, + "eae": 27446, + "eag": 3743, + "eager": 21551, + "eagerly": 30094, + "eagle": 20207, + "eagle": 7517, + "eagles": 6920, + "eal": 48872, + "ealing": 40484, + "eames": 49072, + "eamon": 45954, + "ean": 13327, + "ear": 1055, + "ear": 8373, + "earbuds": 47807, + "eared": 9127, + "earl": 30573, + "earl": 14235, + "earle": 40292, + "earlier": 4297, + "earliest": 22097, + "early": 15840, + "early": 2090, + "earn": 33977, + "earn": 8465, + "earned": 8898, + "earnest": 45422, + "earning": 14550, + "earnings": 15912, + "earns": 16760, + "earp": 35296, + "earphones": 44905, + "earring": 28664, + "earrings": 9136, + "ears": 9861, + "eart": 7086, + "earth": 5184, + "earth": 3475, + "earthand": 34229, + "earthandclouds": 34480, + "earthday": 19481, + "earthquake": 10060, + "earthquakes": 32895, + "earthy": 47139, + "earts": 38824, + "eas": 5740, + "ease": 13574, + "easier": 8817, + "easiest": 26314, + "easily": 8197, + "easing": 44825, + "easport": 42251, + "east": 5022, + "east": 2602, + "eastbound": 28827, + "eastbourne": 38455, + "eastenders": 23545, + "easter": 14783, + "easter": 4811, + "eastern": 34522, + "eastern": 6311, + "eastman": 48280, + "easton": 29619, + "eastside": 42650, + "eastwood": 28270, + "easy": 18308, + "easy": 3176, + "eat": 5418, + "eat": 3384, + "eaten": 16750, + "eater": 24060, + "eaters": 37645, + "eatery": 46559, + "eating": 4371, + "eatlocal": 42868, + "eaton": 28462, + "eats": 13188, + "eau": 17608, + "eazy": 36536, + "eb": 12283, + "eb": 8677, + "eba": 40889, + "ebay": 34412, + "ebay": 4099, + "eber": 34020, + "ebo": 46635, + "ebola": 15864, + "ebon": 22013, + "ebony": 30651, + "ebook": 13122, + "ebooks": 25774, + "ec": 747, + "ec": 10879, + "eca": 18465, + "ecar": 34500, + "ecb": 26205, + "ecc": 33128, + "eccc": 47401, + "eccentric": 43228, + "eccle": 27494, + "ece": 2163, + "eces": 5905, + "ecg": 45983, + "ech": 15797, + "ech": 31147, + "echel": 41233, + "echo": 17366, + "echo": 13989, + "echoes": 32564, + "eci": 31936, + "eck": 25866, + "eck": 15969, + "ecker": 39661, + "ecker": 40890, + "ecla": 47806, + "eclec": 25114, + "eclectic": 28382, + "eclip": 30841, + "eclipse": 11505, + "eclub": 38983, + "eco": 5106, + "eco": 10077, + "ecofriendly": 43412, + "ecol": 22706, + "ecological": 25127, + "ecology": 18578, + "ecommerce": 15529, + "econ": 26755, + "econ": 21158, + "econom": 2768, + "economic": 36649, + "economic": 5259, + "economical": 48782, + "economically": 39406, + "economics": 12625, + "economies": 27136, + "economist": 18836, + "economists": 43701, + "economy": 5644, + "ecor": 28962, + "ecosystem": 15788, + "ecosystems": 28725, + "ecoun": 27924, + "ecr": 48572, + "ecraft": 11439, + "ecs": 23485, + "ecstasy": 47286, + "ecstatic": 36244, + "ect": 25168, + "ecu": 13087, + "ecu": 32919, + "ecuador": 19813, + "ecz": 43530, + "ed": 843, + "ed": 538, + "eda": 10804, + "edad": 44724, + "eday": 39258, + "edc": 21245, + "edchat": 14702, + "edd": 35431, + "eddi": 42930, + "eddie": 22748, + "eddie": 9517, + "eddy": 25959, + "ede": 29632, + "eded": 19555, + "edel": 20460, + "edelman": 48139, + "eden": 23621, + "eden": 13741, + "eder": 16249, + "edes": 36247, + "edfringe": 27402, + "edg": 35955, + "edgar": 33543, + "edgar": 17914, + "edge": 16914, + "edge": 5461, + "edged": 39188, + "edges": 20938, + "edgy": 35393, + "edi": 8750, + "edi": 27148, + "edible": 19795, + "edic": 25184, + "edics": 30641, + "edin": 6524, + "edinburgh": 27574, + "edinburgh": 8068, + "eding": 5742, + "edison": 25846, + "edit": 8239, + "edit": 8013, + "edited": 13945, + "edith": 28597, + "editing": 10178, + "edition": 3062, + "editions": 21664, + "editor": 7661, + "editorial": 12325, + "editors": 19486, + "edits": 24945, + "edm": 37843, + "edm": 13539, + "edmon": 11275, + "edmond": 41581, + "edmonds": 46520, + "edmonton": 37311, + "edmonton": 15058, + "edmun": 36561, + "edmund": 27567, + "edna": 39002, + "edo": 29145, + "edo": 18096, + "edon": 41467, + "edor": 30184, + "edou": 47678, + "edp": 46066, + "eds": 1941, + "edsheeran": 30386, + "edt": 15071, + "edtech": 41825, + "edtech": 15262, + "edu": 11757, + "edu": 11799, + "eduardo": 30604, + "educ": 2200, + "educate": 17563, + "educated": 21447, + "education": 22358, + "education": 2806, + "educational": 10400, + "educator": 19875, + "educators": 15420, + "edwar": 27586, + "edward": 26184, + "edward": 7450, + "edwards": 12627, + "edwin": 48718, + "edwin": 22471, + "edy": 17072, + "edy": 4144, + "ee": 2644, + "ee": 4708, + "eed": 17513, + "eee": 24632, + "eee": 9361, + "eeee": 11696, + "eeee": 17570, + "eeeee": 26938, + "eeeeee": 41407, + "eek": 46591, + "eel": 27462, + "eels": 44416, + "eem": 27236, + "een": 47490, + "een": 21230, + "eer": 35409, + "eer": 31846, + "eera": 36664, + "eerie": 33846, + "ees": 40308, + "eet": 48935, + "eez": 39033, + "ef": 1490, + "ef": 1829, + "efa": 16999, + "eface": 48804, + "efan": 33556, + "efc": 22065, + "efcc": 46087, + "efer": 26199, + "eff": 20548, + "eff": 21715, + "effe": 2808, + "effec": 3943, + "effect": 5436, + "effective": 6837, + "effectively": 17516, + "effectiveness": 26847, + "effects": 7331, + "effic": 36004, + "efficacy": 39937, + "effici": 6670, + "efficiency": 11823, + "efficient": 11334, + "efficiently": 32915, + "effor": 6356, + "effort": 40078, + "effort": 6255, + "effortless": 41639, + "effortlessly": 42320, + "efforts": 6847, + "efish": 35813, + "efl": 27172, + "efron": 48111, + "efs": 7389, + "eg": 8053, + "eg": 14599, + "ega": 41193, + "egan": 42943, + "eger": 46704, + "eger": 22767, + "egg": 13778, + "egg": 5911, + "eggplant": 34906, + "eggs": 7099, + "ego": 34712, + "ego": 14250, + "egos": 43992, + "egre": 27044, + "egret": 42002, + "egy": 5224, + "egyp": 10250, + "egypt": 7267, + "egyptian": 12428, + "eh": 9277, + "eh": 9135, + "eha": 48563, + "ehealth": 48617, + "ehr": 45271, + "ehs": 44648, + "ei": 4006, + "ei": 18264, + "eic": 40251, + "eid": 28038, + "eid": 13979, + "eidmubarak": 46275, + "eiffel": 29720, + "eigh": 13468, + "eight": 7910, + "eighteen": 49316, + "eighth": 21237, + "eighty": 47449, + "eil": 29457, + "eileen": 31468, + "ein": 29944, + "ein": 24524, + "eindhoven": 47172, + "eing": 7702, + "einstein": 20587, + "eira": 47708, + "eis": 13802, + "eisen": 25273, + "eisenhower": 35562, + "either": 6036, + "ej": 19887, + "ej": 25009, + "ejec": 29771, + "ek": 4212, + "ek": 2092, + "el": 544, + "el": 832, + "ela": 11284, + "ela": 3787, + "elab": 38866, + "elabor": 26034, + "elaborate": 33855, + "elaine": 22523, + "elan": 17763, + "elan": 18399, + "eland": 24930, + "eland": 6275, + "elas": 41078, + "elast": 27479, + "elastic": 30282, + "elba": 48598, + "elbow": 21965, + "eld": 5684, + "elder": 11791, + "elder": 14416, + "elderly": 15455, + "elders": 28617, + "eldest": 33503, + "elding": 28223, + "elds": 13466, + "ele": 2084, + "ele": 9766, + "eleague": 36577, + "eleanor": 18604, + "elearning": 29969, + "elec": 1564, + "elec": 38768, + "elect": 15336, + "elected": 8828, + "election": 19312, + "election": 4247, + "electionday": 40540, + "elections": 6949, + "elector": 16465, + "electoral": 19544, + "electr": 3654, + "electra": 48959, + "electri": 23927, + "electric": 19547, + "electric": 5031, + "electrical": 12176, + "electrician": 46422, + "electricity": 10950, + "electrifying": 48843, + "electro": 11648, + "electro": 23244, + "electromagnetic": 46530, + "electron": 33396, + "electronic": 33865, + "electronic": 9273, + "electronica": 43119, + "electronics": 13081, + "eled": 20357, + "elee": 44112, + "eleg": 8075, + "elegance": 19146, + "elegant": 11124, + "elek": 34559, + "elem": 25406, + "element": 14909, + "elementary": 8143, + "elements": 10925, + "elen": 30654, + "elen": 39164, + "elena": 19421, + "eleng": 48180, + "eleph": 7554, + "elephant": 10299, + "elephants": 16871, + "eler": 24646, + "eless": 15244, + "eless": 30837, + "elets": 19400, + "elev": 7921, + "elevate": 26736, + "elevated": 23967, + "elevation": 23826, + "elevator": 19021, + "eleven": 31617, + "eleven": 17795, + "elf": 45961, + "elf": 11924, + "elfie": 39955, + "elg": 28790, + "elgin": 31868, + "eli": 1018, + "eli": 6292, + "elia": 10956, + "elian": 42508, + "elias": 47274, + "elias": 29902, + "elic": 34743, + "elic": 13492, + "elie": 38677, + "elie": 26501, + "elier": 14634, + "elife": 37429, + "elife": 12719, + "eligibility": 34937, + "eligible": 16978, + "elijah": 26065, + "elike": 48913, + "elim": 9296, + "elimin": 11386, + "eliminate": 19655, + "eliminated": 29075, + "eliminating": 36619, + "elimination": 24176, + "elin": 25353, + "elin": 13458, + "eline": 46199, + "eline": 7153, + "eling": 9990, + "elio": 47943, + "elion": 30682, + "elions": 44159, + "eliot": 33326, + "elis": 23411, + "elis": 48021, + "elisa": 25610, + "elisa": 44051, + "elisabeth": 33127, + "elise": 27124, + "elit": 40882, + "elite": 32277, + "elite": 6553, + "elited": 43943, + "elitedangerous": 47138, + "elites": 35975, + "elius": 35623, + "elive": 49338, + "elive": 23505, + "elives": 49174, + "elix": 32926, + "elixir": 42887, + "eliz": 42844, + "eliza": 6132, + "eliza": 29992, + "elizabeth": 22397, + "elizabeth": 7026, + "elk": 34013, + "elk": 21896, + "ell": 826, + "ell": 812, + "ella": 20692, + "ella": 2957, + "elland": 43326, + "ellar": 38443, + "ellas": 37053, + "elle": 12818, + "elle": 4765, + "elled": 13146, + "ellen": 14007, + "ellen": 12312, + "ellenshow": 34812, + "eller": 20927, + "eller": 4465, + "ellers": 19010, + "elles": 24431, + "elli": 3367, + "elli": 6673, + "ellic": 38905, + "ellie": 16769, + "ellier": 44054, + "ellin": 40374, + "elling": 2220, + "ellington": 34477, + "ellini": 43256, + "elliot": 20761, + "elliott": 44456, + "elliott": 13788, + "ellip": 44816, + "ellis": 11553, + "ellison": 32295, + "ello": 2512, + "ellor": 14594, + "ells": 2433, + "ellu": 35560, + "elly": 8041, + "elly": 20355, + "elm": 25199, + "elm": 22082, + "elman": 33622, + "elmer": 45958, + "elmo": 32150, + "elo": 6170, + "elo": 13490, + "elon": 26381, + "elon": 20406, + "elondon": 47377, + "elong": 44363, + "elonmusk": 37076, + "elope": 23367, + "eloqu": 37795, + "elos": 44733, + "elot": 43490, + "elove": 43319, + "elove": 19165, + "elover": 21732, + "elovers": 33946, + "els": 35958, + "els": 1645, + "elsa": 22050, + "else": 18857, + "else": 3344, + "elsewhere": 22906, + "elson": 19624, + "elt": 18692, + "elton": 20758, + "elu": 14208, + "elusive": 28903, + "elves": 29111, + "elvi": 47008, + "elvis": 47359, + "elvis": 14498, + "elxn": 37726, + "ely": 12189, + "ely": 1273, + "elyn": 29691, + "elyn": 18126, + "em": 908, + "em": 2270, + "ema": 7002, + "ema": 11131, + "emabiggest": 23101, + "emabiggestfans": 29587, + "email": 33537, + "email": 4462, + "emailed": 40470, + "emailmarketing": 40188, + "emails": 12871, + "eman": 24416, + "eman": 36868, + "emancip": 42996, + "emanuel": 35232, + "emb": 3692, + "embar": 8266, + "embaras": 48019, + "embark": 33953, + "embarra": 11382, + "embarrass": 27183, + "embarrassed": 28217, + "embarrassing": 19653, + "embarrassment": 41346, + "embassy": 13598, + "embe": 46041, + "embed": 19703, + "embedded": 22046, + "embelli": 32144, + "embellished": 46992, + "ember": 47049, + "emblem": 21163, + "embo": 23065, + "embr": 35267, + "embrac": 16928, + "embrace": 12118, + "embraced": 35739, + "embraces": 38404, + "embracing": 22196, + "embro": 12550, + "embroi": 18667, + "embroide": 21530, + "embroidered": 22381, + "embroidery": 20823, + "emc": 20897, + "emc": 31602, + "emcee": 42038, + "eme": 22910, + "eme": 21548, + "emea": 40352, + "emed": 11028, + "emen": 22033, + "ement": 40841, + "ement": 2057, + "ements": 11058, + "emer": 3132, + "emer": 25727, + "emerald": 46878, + "emerald": 16980, + "emerge": 22182, + "emerged": 26425, + "emergen": 24096, + "emergence": 39867, + "emergencies": 35759, + "emergency": 44038, + "emergency": 5897, + "emerges": 30801, + "emerging": 38174, + "emerging": 11113, + "emeritus": 35333, + "emerson": 24147, + "emery": 32678, + "emi": 44327, + "emi": 18525, + "emil": 26794, + "emil": 40624, + "emile": 43926, + "emili": 20709, + "emilia": 34238, + "emilio": 39722, + "emily": 14545, + "emily": 7640, + "emin": 17227, + "emin": 23995, + "eminem": 22129, + "eminent": 33779, + "eming": 40398, + "emir": 13337, + "emir": 47613, + "emirates": 47244, + "emirates": 17867, + "emission": 27761, + "emissions": 14172, + "emit": 49043, + "emma": 18177, + "emma": 7445, + "emmanuel": 48045, + "emmanuel": 20411, + "emmett": 45779, + "emmy": 35625, + "emmy": 17089, + "emmys": 21875, + "emo": 3738, + "emo": 19381, + "emoji": 16327, + "emojis": 27870, + "emon": 34406, + "emor": 45034, + "emory": 44274, + "emotion": 17464, + "emotional": 7357, + "emotionally": 24088, + "emotions": 12904, + "emp": 3831, + "emp": 41004, + "empathy": 22420, + "emper": 12522, + "emperor": 13828, + "empha": 16237, + "emphasi": 47176, + "emphasis": 29588, + "empire": 26212, + "empire": 7614, + "empires": 46510, + "emplo": 3409, + "employ": 37290, + "employ": 39626, + "employe": 5037, + "employed": 26567, + "employee": 36631, + "employee": 9560, + "employees": 7377, + "employer": 21296, + "employers": 17647, + "employment": 10959, + "empor": 27386, + "emporium": 48541, + "empower": 13612, + "empower": 17230, + "empowered": 29087, + "empowering": 20086, + "empowerment": 15747, + "empowers": 46206, + "empress": 26656, + "empty": 41203, + "empty": 7893, + "emra": 39259, + "ems": 2858, + "emt": 46360, + "emu": 48149, + "emu": 29296, + "emul": 23272, + "emy": 31076, + "en": 524, + "en": 576, + "ena": 3452, + "enab": 17308, + "enable": 15642, + "enabled": 23666, + "enables": 23417, + "enabling": 23590, + "enam": 41486, + "enamel": 22746, + "enary": 13132, + "enas": 34536, + "enation": 20860, + "enberg": 15658, + "enburg": 28430, + "enc": 33169, + "enca": 37774, + "encan": 30345, + "encapsul": 40874, + "ence": 6495, + "ence": 954, + "enced": 6549, + "ences": 3777, + "enchan": 17290, + "enchanted": 28258, + "enchanting": 32531, + "enchil": 47396, + "enci": 32207, + "encia": 30068, + "encies": 18729, + "encing": 10326, + "enclosed": 43243, + "enclosure": 37419, + "encom": 44026, + "encore": 20549, + "encoun": 17309, + "encounter": 13164, + "encountered": 32492, + "encounters": 25399, + "encoura": 6169, + "encourage": 12090, + "encouraged": 20299, + "encouragement": 24959, + "encourages": 23848, + "encouraging": 15875, + "encro": 45822, + "encry": 28600, + "encryp": 42928, + "encrypted": 48710, + "encryption": 31423, + "ency": 3484, + "encyclo": 32104, + "encyclopedia": 38376, + "end": 945, + "end": 806, + "enda": 6735, + "endale": 20290, + "endange": 13990, + "endangered": 14931, + "ende": 11373, + "ende": 40306, + "endeav": 18134, + "endeavor": 40502, + "endeavors": 44394, + "endeavour": 38035, + "ended": 2622, + "endemic": 41241, + "endent": 16265, + "ender": 48106, + "ender": 12383, + "enders": 7418, + "endez": 43850, + "endgame": 23042, + "endi": 31359, + "ending": 2695, + "endings": 36516, + "endish": 38841, + "endless": 12688, + "endlessly": 45145, + "endment": 45894, + "endo": 13476, + "endo": 15830, + "endocr": 36486, + "endof": 40786, + "endome": 46996, + "endon": 48018, + "endor": 8092, + "endorf": 37249, + "endorse": 28819, + "endorsed": 24307, + "endorsement": 21205, + "endorses": 34603, + "endorsing": 46779, + "endow": 45895, + "endra": 22321, + "ends": 1339, + "endthe": 46256, + "endu": 26032, + "endur": 19557, + "endurance": 21027, + "endure": 32419, + "enduring": 30851, + "enduro": 47042, + "ene": 3297, + "ene": 6049, + "ened": 2494, + "eneed": 45137, + "enegger": 33235, + "enei": 48906, + "enemies": 15824, + "enemy": 10310, + "enen": 45113, + "ener": 2244, + "ener": 13600, + "energ": 39451, + "energetic": 24197, + "energi": 23044, + "energies": 42374, + "energized": 48635, + "energy": 14974, + "energy": 2650, + "energye": 32271, + "energyefficiency": 40586, + "eners": 48208, + "enes": 42066, + "eness": 11806, + "enet": 46336, + "enew": 29672, + "enews": 13442, + "eney": 20706, + "enez": 33110, + "enf": 38167, + "enfield": 27808, + "enfor": 10592, + "enforce": 40224, + "enforced": 44597, + "enforcement": 12460, + "eng": 1035, + "eng": 6730, + "enga": 22297, + "engag": 6793, + "engage": 11089, + "engaged": 11475, + "engagement": 7281, + "engaging": 13060, + "enge": 26279, + "enge": 2742, + "engel": 38265, + "engen": 48286, + "enger": 6618, + "engers": 7533, + "engine": 3355, + "engine": 5857, + "engineer": 40151, + "engineer": 8517, + "engineered": 26580, + "engineering": 5273, + "engineers": 11494, + "engines": 14487, + "england": 20904, + "england": 3595, + "english": 15942, + "english": 3469, + "engra": 17560, + "engraved": 29421, + "engraving": 33309, + "engul": 43655, + "engv": 28401, + "enh": 7449, + "enhall": 48781, + "enham": 24592, + "enhan": 26827, + "enhance": 13993, + "enhanced": 16070, + "enhancement": 35601, + "enhances": 38259, + "enhancing": 25986, + "eni": 4395, + "eni": 17538, + "enic": 46780, + "enic": 28292, + "enig": 19754, + "enig": 48730, + "enight": 32848, + "enight": 20640, + "enigma": 34998, + "ening": 1133, + "enium": 34380, + "enix": 25720, + "enjo": 1498, + "enjoy": 12981, + "enjoy": 2218, + "enjoyable": 17444, + "enjoyed": 5045, + "enjoying": 3603, + "enjoyment": 34905, + "enjoys": 17024, + "enka": 43942, + "enko": 25312, + "enlar": 38136, + "enligh": 21364, + "enlighten": 28200, + "enlightened": 44032, + "enlightening": 44005, + "enlightenment": 29255, + "enlisted": 43555, + "enly": 43023, + "enn": 43563, + "enna": 8095, + "enne": 21176, + "enne": 11518, + "ennedy": 46266, + "ennes": 43613, + "enni": 7049, + "ennial": 14220, + "ennis": 48923, + "ennis": 26309, + "eno": 9429, + "eno": 12843, + "enoch": 47917, + "enor": 13955, + "enormous": 20129, + "enos": 44759, + "enote": 44955, + "enough": 2744, + "enow": 26876, + "enqu": 28417, + "enqui": 22810, + "enquire": 46658, + "enquiries": 31901, + "enquiry": 45141, + "enri": 18915, + "enrich": 20058, + "enrich": 45504, + "enriched": 45166, + "enrichment": 32903, + "enrique": 25489, + "enrol": 44279, + "enroll": 23739, + "enroll": 30366, + "enrolled": 36853, + "enrollment": 24875, + "enroute": 40548, + "ens": 41799, + "ens": 1323, + "ense": 12657, + "ense": 27658, + "ensemble": 14843, + "ensis": 32842, + "ensla": 37535, + "enslaved": 48675, + "ensure": 7492, + "ensures": 29707, + "ensuring": 19403, + "ent": 724, + "ent": 621, + "enta": 17681, + "ental": 32342, + "ental": 6168, + "entary": 9833, + "entation": 37412, + "ente": 17433, + "ente": 9935, + "ented": 3800, + "entennial": 43088, + "enter": 2963, + "enter": 3819, + "entered": 10679, + "entering": 12580, + "enterpri": 7339, + "enterprise": 9220, + "enterprises": 21219, + "enters": 15287, + "entertain": 5566, + "entertain": 23510, + "entertained": 30631, + "entertainer": 28674, + "entertaining": 13897, + "entertainment": 6166, + "entes": 24213, + "enthr": 36202, + "enthusi": 9631, + "enthusiasm": 20525, + "enthusiast": 27153, + "enthusiastic": 22068, + "enthusiasts": 27514, + "enti": 1938, + "ential": 5194, + "entially": 37695, + "entic": 10340, + "entine": 49212, + "enting": 20526, + "entire": 4709, + "entirely": 13911, + "entirety": 43242, + "entit": 15209, + "entities": 38134, + "entitled": 18680, + "entity": 28455, + "ently": 2922, + "ento": 21917, + "ento": 8762, + "entom": 31676, + "entourage": 47893, + "entr": 7129, + "entrance": 9129, + "entrata": 27304, + "entre": 34188, + "entre": 19600, + "entren": 46959, + "entrepre": 4583, + "entreprene": 4789, + "entrepreneu": 26784, + "entrepreneur": 12119, + "entrepreneur": 8033, + "entrepreneurial": 28261, + "entrepreneurs": 11054, + "entrepreneurship": 12858, + "entries": 13766, + "entry": 5362, + "ents": 870, + "entu": 6650, + "enty": 5657, + "enu": 23430, + "env": 32280, + "env": 39207, + "envel": 20052, + "envelope": 27358, + "envir": 3512, + "enviro": 46200, + "environ": 3599, + "environment": 33039, + "environment": 5501, + "environmental": 7831, + "environmentally": 32855, + "environments": 19577, + "envision": 49031, + "envoy": 29263, + "envy": 21017, + "eny": 20482, + "enya": 36509, + "enyc": 39520, + "enz": 25805, + "enz": 31873, + "enza": 25239, + "enzie": 14839, + "enzo": 31543, + "enzyme": 40348, + "enzymes": 47465, + "eo": 16054, + "eo": 11712, + "eoin": 48634, + "eon": 31915, + "eos": 17805, + "ep": 1178, + "ep": 1117, + "epa": 15866, + "epage": 26931, + "epaper": 33584, + "epcot": 32524, + "eper": 43071, + "eph": 45752, + "eph": 41240, + "ephe": 25129, + "epi": 7219, + "epi": 34641, + "epic": 12683, + "epic": 4991, + "epiconetsy": 49222, + "epide": 17382, + "epidemi": 44447, + "epidemic": 21522, + "epile": 23150, + "epilepsy": 29547, + "epilo": 31291, + "epilots": 39766, + "epiph": 40561, + "epiphany": 43251, + "epis": 24616, + "episcop": 28037, + "episcopal": 31221, + "episo": 2708, + "episode": 2965, + "episodes": 11837, + "epit": 21967, + "epitome": 35114, + "epl": 25950, + "epo": 25810, + "epp": 39054, + "epp": 39593, + "eps": 4090, + "epsilon": 40019, + "epsom": 40364, + "epstein": 34688, + "eq": 39331, + "eq": 33692, + "equ": 2563, + "equal": 17373, + "equal": 10433, + "equality": 48981, + "equality": 9578, + "equally": 18172, + "equals": 30278, + "equation": 28591, + "equations": 38225, + "eque": 19518, + "equestrian": 24728, + "equi": 8752, + "equili": 43262, + "equine": 33801, + "equinox": 32652, + "equip": 6526, + "equip": 36979, + "equipment": 6893, + "equipo": 45688, + "equipped": 18331, + "equitable": 44717, + "equities": 44015, + "equity": 11293, + "equivalent": 19489, + "er": 517, + "er": 528, + "era": 30548, + "era": 2072, + "erable": 18801, + "erad": 24194, + "eradic": 36346, + "eradicate": 46164, + "eral": 6222, + "eran": 13069, + "eras": 19325, + "eras": 39090, + "erase": 33893, + "erased": 46762, + "erasmus": 38935, + "erc": 5360, + "erc": 32382, + "erd": 25645, + "erdo": 21112, + "erdogan": 24453, + "ere": 17907, + "ere": 642, + "erec": 21526, + "erected": 39365, + "ered": 9097, + "eres": 15751, + "ergon": 38120, + "ergy": 19550, + "eri": 2769, + "eri": 9509, + "eria": 11634, + "erial": 5409, + "eric": 1206, + "eric": 5396, + "erica": 13208, + "erich": 26070, + "erick": 27434, + "erick": 36959, + "erickson": 45286, + "ericsson": 39645, + "eridge": 45408, + "erie": 7005, + "eries": 9099, + "erik": 22805, + "erik": 16532, + "erika": 25531, + "erin": 17532, + "erin": 11333, + "erina": 25176, + "ering": 1785, + "erit": 23335, + "eritrea": 30738, + "erjee": 41665, + "erly": 14380, + "erm": 31649, + "erman": 17990, + "ern": 6992, + "ern": 12140, + "ernal": 20868, + "ernan": 34617, + "ernation": 48796, + "erne": 33930, + "ernest": 23006, + "ernie": 23636, + "ernity": 14653, + "erno": 40812, + "ernst": 30099, + "ero": 3211, + "ero": 3732, + "erock": 38206, + "eron": 32837, + "eroom": 46690, + "eros": 30597, + "erose": 48657, + "erosion": 30174, + "erotic": 30708, + "erotica": 39126, + "erous": 6384, + "eroy": 36461, + "erp": 28268, + "err": 22479, + "err": 25346, + "erra": 48446, + "errands": 45485, + "error": 12097, + "errors": 21195, + "erry": 45236, + "erry": 24124, + "ers": 4840, + "ers": 612, + "ersfc": 37925, + "ership": 2884, + "erson": 25780, + "erson": 6811, + "ert": 40325, + "ert": 3112, + "erta": 32007, + "erton": 26245, + "erts": 12921, + "eru": 36068, + "erun": 41642, + "erup": 17093, + "erupted": 48862, + "eruption": 33705, + "erville": 37557, + "erwin": 43724, + "ery": 12467, + "ery": 1692, + "erz": 38711, + "es": 957, + "es": 542, + "esa": 46834, + "esa": 12489, + "esanders": 23099, + "esc": 3330, + "esc": 28420, + "escal": 15902, + "escap": 11499, + "escape": 32484, + "escape": 7568, + "escaped": 18707, + "escapes": 29916, + "escaping": 21767, + "escar": 39229, + "escence": 37972, + "esch": 46760, + "esch": 41945, + "esco": 32482, + "escobar": 48807, + "escor": 24360, + "escort": 24976, + "escorted": 47667, + "escorts": 48574, + "escu": 36517, + "esday": 19553, + "ese": 18766, + "ese": 2260, + "esg": 41674, + "esh": 17119, + "esh": 13407, + "esha": 28799, + "eshop": 38451, + "eshop": 45570, + "eshopsuk": 39349, + "esi": 30064, + "esis": 12414, + "esk": 19359, + "esl": 26201, + "eso": 29890, + "eso": 28921, + "esof": 17047, + "eson": 46845, + "esp": 3849, + "esp": 13870, + "espa": 37301, + "espan": 41731, + "españa": 41118, + "especially": 4878, + "esper": 29216, + "espino": 46633, + "espionage": 43498, + "espn": 22917, + "espn": 7540, + "espnu": 47747, + "espo": 34381, + "esports": 16035, + "espresso": 17098, + "esq": 47352, + "esqu": 34616, + "esque": 25877, + "ess": 3118, + "ess": 9764, + "essa": 39125, + "essay": 12751, + "essays": 27328, + "esse": 22305, + "essen": 30489, + "essence": 17830, + "essenti": 11163, + "essential": 47264, + "essential": 6895, + "essentially": 30042, + "essentials": 16191, + "essex": 30563, + "essex": 11623, + "est": 2291, + "est": 1509, + "esta": 41449, + "esta": 10135, + "estab": 7010, + "establi": 8412, + "establish": 19709, + "established": 13143, + "establishing": 29420, + "establishment": 20213, + "estas": 39072, + "estate": 47130, + "estate": 6159, + "estates": 26054, + "este": 12968, + "este": 20579, + "esteban": 48381, + "esteem": 31541, + "esteemed": 36293, + "ester": 45808, + "esthe": 18468, + "esther": 24393, + "estim": 8904, + "estimate": 21883, + "estimated": 16665, + "estimates": 21957, + "esto": 31589, + "esto": 23958, + "estonia": 26260, + "estonian": 48895, + "estrada": 48116, + "estre": 31271, + "estu": 26272, + "estuary": 35269, + "esur": 35758, + "esville": 39187, + "esy": 46268, + "et": 1169, + "et": 875, + "eta": 8761, + "etal": 25221, + "etary": 13074, + "etc": 5353, + "etched": 40411, + "etching": 41375, + "ete": 38820, + "ete": 40245, + "eter": 8587, + "eter": 17007, + "eternal": 13732, + "eternally": 48486, + "eternity": 23832, + "eters": 18392, + "etf": 31661, + "eth": 4819, + "eth": 5927, + "ethan": 24245, + "ethan": 15958, + "ethanol": 38166, + "ethe": 21312, + "ethel": 45921, + "ether": 23349, + "ethere": 18705, + "ethereal": 40925, + "ethereum": 19612, + "ethernet": 35026, + "ethi": 10327, + "ethic": 39104, + "ethical": 47041, + "ethical": 17679, + "ethics": 13355, + "ethiop": 10897, + "ethiopia": 13920, + "ethiopian": 24507, + "ethnic": 30522, + "ethnic": 16344, + "ethnicity": 46787, + "ethno": 34225, + "ethos": 48768, + "eti": 11188, + "eti": 30394, + "etienne": 46118, + "eties": 15137, + "etihad": 38489, + "etiquette": 37957, + "etis": 38216, + "etisation": 39733, + "etna": 41940, + "eto": 27829, + "eto": 33837, + "eton": 44339, + "etour": 41462, + "etr": 23012, + "etres": 42838, + "ets": 3442, + "etsy": 13237, + "etsy": 6282, + "etsym": 22902, + "etsymntt": 25416, + "etsyshop": 44643, + "ett": 32729, + "ett": 24998, + "etta": 30466, + "ette": 19981, + "ette": 5212, + "ettes": 35326, + "etto": 44219, + "etty": 40759, + "etu": 36593, + "etv": 49155, + "etv": 20325, + "etwork": 20585, + "ety": 25920, + "ety": 2746, + "etz": 36181, + "etz": 25301, + "eu": 1506, + "eu": 3238, + "eucalyp": 41068, + "eucalyptus": 42351, + "euchar": 38362, + "eugen": 30678, + "eugene": 17760, + "eul": 46749, + "eun": 16431, + "eun": 26219, + "eunhyuk": 47526, + "eup": 44435, + "euph": 21386, + "euphoria": 41051, + "eur": 18343, + "eur": 12018, + "eura": 32605, + "eure": 25311, + "euref": 48017, + "eureka": 31686, + "euro": 2039, + "euro": 8463, + "euroleague": 46821, + "europa": 18290, + "europale": 42473, + "europaleague": 44029, + "europarl": 44922, + "europe": 4198, + "europe": 3848, + "european": 26712, + "european": 4759, + "europeans": 37082, + "euros": 22274, + "eurovision": 17593, + "eurozone": 42555, + "eurusd": 40895, + "eus": 44214, + "euston": 46905, + "euthan": 43280, + "euve": 40652, + "eux": 25019, + "ev": 776, + "ev": 10133, + "eva": 6845, + "evacu": 13187, + "evacuated": 26806, + "evacuation": 27353, + "eval": 25139, + "eval": 9703, + "evalu": 10314, + "evaluate": 27174, + "evaluating": 34541, + "evaluation": 17640, + "evan": 12821, + "evan": 12847, + "evangel": 20518, + "evangeli": 21372, + "evangelical": 36151, + "evangelist": 42275, + "evankirstel": 46581, + "evans": 8836, + "evansville": 44782, + "evapor": 33352, + "evasion": 48795, + "eve": 5732, + "eve": 1866, + "eved": 19820, + "evel": 39315, + "evelyn": 26687, + "evement": 8210, + "even": 6359, + "even": 1427, + "evening": 34487, + "evening": 2285, + "evenings": 19994, + "evenly": 45974, + "event": 10612, + "event": 1655, + "eventful": 45628, + "evento": 38155, + "eventprofs": 24980, + "events": 3667, + "eventu": 14055, + "eventual": 45321, + "eventually": 14397, + "ever": 888, + "ever": 1247, + "everest": 21722, + "everett": 25456, + "everglades": 46294, + "evergreen": 23852, + "everlasting": 32849, + "evers": 31914, + "everton": 13315, + "every": 1091, + "every": 1505, + "everybody": 5901, + "everyday": 25049, + "everyday": 5160, + "everyone": 1584, + "everything": 36376, + "everything": 2410, + "everytime": 16911, + "everywhere": 6364, + "eves": 7323, + "evi": 5348, + "evi": 36989, + "evic": 21336, + "eviction": 37111, + "eviden": 46220, + "evidence": 6439, + "evident": 34529, + "evie": 47195, + "evil": 23218, + "evil": 6006, + "eville": 16143, + "eving": 24729, + "evo": 17962, + "evo": 13169, + "evoc": 43133, + "evol": 5350, + "evolu": 7725, + "evolution": 8902, + "evolutionary": 30629, + "evolve": 23406, + "evolved": 22613, + "evolving": 23675, + "evp": 46154, + "evs": 33576, + "ew": 11942, + "ew": 15428, + "ewan": 40247, + "ewe": 48438, + "ewing": 38873, + "ews": 9878, + "ex": 659, + "ex": 4118, + "exac": 5460, + "exact": 12651, + "exactly": 5840, + "exagger": 29766, + "exal": 49324, + "exam": 4428, + "exam": 8785, + "examination": 20970, + "examine": 25728, + "examined": 44004, + "examiner": 29149, + "examines": 28160, + "examining": 30616, + "example": 6228, + "examples": 14790, + "exams": 14028, + "exas": 47536, + "exc": 1302, + "excav": 20733, + "excavation": 45909, + "exce": 10999, + "exceed": 32521, + "exceeded": 36221, + "exceeding": 47213, + "exceeds": 49353, + "excel": 28351, + "excel": 18754, + "excell": 3298, + "excellence": 8171, + "excellency": 36503, + "excellent": 4239, + "excelsi": 47315, + "excep": 8882, + "except": 8541, + "exception": 25018, + "exceptional": 13425, + "exceptionally": 29306, + "excer": 17737, + "excerpt": 20586, + "excess": 22491, + "excessive": 21332, + "exchange": 6616, + "exchanged": 48919, + "exchanges": 29730, + "exchanging": 47760, + "excit": 10510, + "excite": 47711, + "excited": 1889, + "excitement": 11407, + "exciting": 4300, + "exclu": 3114, + "exclude": 49235, + "excluded": 46216, + "excluding": 44326, + "exclusion": 40219, + "exclusive": 3747, + "exclusively": 13565, + "exclusives": 47149, + "excu": 7324, + "excur": 27533, + "excursion": 34869, + "excuse": 9266, + "excuses": 19388, + "exe": 3554, + "exe": 48027, + "exec": 15052, + "execs": 35728, + "execu": 4360, + "execute": 36405, + "executed": 20432, + "execution": 18085, + "executive": 5944, + "executives": 24357, + "exem": 19753, + "exemp": 28602, + "exempl": 36371, + "exemplary": 39123, + "exempli": 41934, + "exempt": 44278, + "exemption": 47481, + "exer": 40295, + "exerc": 5932, + "exercise": 7016, + "exercises": 19669, + "exercising": 39036, + "exeter": 32137, + "exeter": 18837, + "exfoli": 38823, + "exhau": 11154, + "exhaust": 21812, + "exhausted": 21741, + "exhausting": 40035, + "exhaustion": 49221, + "exhi": 3022, + "exhib": 3783, + "exhibit": 24992, + "exhibit": 8209, + "exhibiting": 23889, + "exhibition": 4219, + "exhibitions": 28311, + "exhibitor": 44192, + "exhibitors": 38542, + "exhibits": 30093, + "exhilar": 40262, + "exhilarating": 49289, + "exi": 5297, + "exico": 38712, + "exile": 28566, + "exist": 10899, + "exist": 9645, + "existed": 23198, + "existence": 13832, + "existent": 43541, + "existential": 38752, + "existing": 12886, + "exists": 14608, + "exit": 9374, + "exited": 37581, + "exiting": 39577, + "exits": 34943, + "exmoor": 48260, + "exo": 15600, + "exo": 5842, + "exodus": 30098, + "exol": 42856, + "exop": 35288, + "exoplan": 37980, + "exor": 24506, + "exorcist": 46309, + "exotic": 15639, + "exp": 9923, + "exp": 19066, + "expan": 7512, + "expand": 10382, + "expand": 13141, + "expanded": 18390, + "expanding": 15755, + "expands": 22223, + "expanse": 46886, + "expansion": 10138, + "expansive": 49261, + "expat": 43900, + "expe": 2560, + "expect": 9802, + "expect": 5716, + "expectation": 34273, + "expectations": 12529, + "expected": 5573, + "expecting": 12525, + "expects": 24536, + "expedition": 16761, + "expeditions": 49327, + "expelled": 48834, + "expen": 7216, + "expend": 29302, + "expenditure": 47044, + "expense": 28473, + "expenses": 21797, + "expensive": 9649, + "exper": 1533, + "experi": 4723, + "experience": 31867, + "experience": 2415, + "experienced": 10417, + "experiences": 8233, + "experiencing": 16643, + "experiential": 44952, + "experim": 6697, + "experiment": 13079, + "experimental": 16539, + "experimenting": 28263, + "experiments": 21077, + "expert": 6284, + "expertise": 16555, + "experts": 6960, + "expi": 26850, + "expir": 35077, + "expire": 49315, + "expired": 30200, + "expires": 34739, + "expl": 3261, + "expla": 3517, + "explain": 48918, + "explain": 7304, + "explained": 14229, + "explaining": 13136, + "explains": 6655, + "explan": 13294, + "explanation": 16577, + "explanations": 34383, + "explic": 21011, + "explicit": 33228, + "explo": 3586, + "explode": 31262, + "exploded": 28947, + "explodes": 38119, + "exploding": 34683, + "exploit": 36953, + "exploited": 48554, + "explor": 11958, + "exploration": 14043, + "explore": 10405, + "explore": 5147, + "explorebc": 38754, + "explorecanada": 36600, + "explored": 25016, + "explorer": 15776, + "explorers": 28491, + "explores": 13996, + "exploring": 7584, + "explosion": 13785, + "explosions": 38646, + "explosive": 18888, + "explosives": 44705, + "expo": 7820, + "expo": 6344, + "expon": 27905, + "export": 14444, + "exporting": 47433, + "exports": 20088, + "expose": 23181, + "exposed": 12180, + "exposes": 33575, + "exposing": 28362, + "exposition": 36943, + "exposure": 11903, + "expre": 6085, + "express": 18553, + "express": 5642, + "expressed": 20777, + "expresses": 31931, + "expressing": 30207, + "expression": 11357, + "expressions": 20314, + "expressive": 42060, + "expressway": 31658, + "exquis": 16575, + "exquisite": 17958, + "ext": 5711, + "ext": 20072, + "exten": 5555, + "extend": 14492, + "extended": 9614, + "extending": 25652, + "extends": 20688, + "extension": 10275, + "extensions": 24525, + "extensive": 16870, + "extensively": 47365, + "extent": 24913, + "exter": 9797, + "exterior": 19352, + "extermin": 41671, + "external": 15028, + "extin": 13553, + "extinct": 24488, + "extinction": 21186, + "extingui": 38567, + "extor": 35620, + "extr": 29082, + "extra": 6416, + "extra": 4231, + "extrac": 18550, + "extract": 18962, + "extraction": 28789, + "extracts": 45576, + "extraordin": 23628, + "extraordinaire": 30909, + "extraordinary": 10982, + "extras": 29817, + "extravag": 22299, + "extravaganza": 29461, + "extre": 3978, + "extreme": 38357, + "extreme": 8331, + "extremely": 6519, + "extremism": 31493, + "extremist": 36383, + "extremists": 41425, + "extru": 43010, + "ey": 1541, + "ey": 1477, + "eyang": 28915, + "eye": 5034, + "eye": 3272, + "eyebrow": 34250, + "eyebrows": 19923, + "eyed": 15512, + "eyeing": 34916, + "eyel": 17075, + "eyelashes": 42074, + "eyeliner": 33354, + "eyeon": 25126, + "eyes": 3095, + "eyeshadow": 35213, + "eyewear": 30165, + "eyewitness": 36258, + "eyou": 31996, + "eyour": 40229, + "eyre": 44115, + "ez": 10082, + "ez": 8387, + "eze": 25993, + "eze": 27229, + "ezekiel": 41428, + "ezra": 27552, + "f": 69, + "f": 325, + "fa": 778, + "fa": 2800, + "faa": 27577, + "fab": 2833, + "fab": 5492, + "faber": 43461, + "faber": 42488, + "fabi": 29425, + "fabian": 34539, + "fabio": 31666, + "fabric": 16217, + "fabric": 10033, + "fabricated": 40851, + "fabrication": 33476, + "fabrics": 23159, + "fabulous": 5189, + "fac": 1053, + "fac": 35438, + "facade": 29217, + "face": 2545, + "face": 1710, + "facebook": 36156, + "facebook": 2943, + "faced": 10941, + "faceli": 32023, + "facelift": 36380, + "faceoff": 42710, + "facep": 45285, + "faces": 4905, + "faceted": 43435, + "facetime": 24076, + "facial": 11909, + "facil": 39973, + "facilit": 13567, + "facilitate": 26733, + "facilitated": 43853, + "facilitating": 34796, + "facilities": 10388, + "facility": 8165, + "facing": 7619, + "fact": 17189, + "fact": 3598, + "factfriday": 27953, + "faction": 14629, + "factor": 21082, + "factor": 8124, + "factories": 36492, + "factors": 12733, + "factory": 42483, + "factory": 6072, + "facts": 5085, + "factual": 45471, + "faculty": 9504, + "facup": 25283, + "fad": 12632, + "fad": 47669, + "fade": 20486, + "faded": 26051, + "fades": 40441, + "fading": 32882, + "fadnavis": 38945, + "faf": 31052, + "faf": 43903, + "fag": 25617, + "fag": 39305, + "fah": 25495, + "fah": 35429, + "fahren": 45527, + "fai": 20519, + "fai": 26384, + "fail": 7105, + "fail": 6801, + "failed": 8314, + "failing": 15757, + "fails": 13388, + "failure": 8732, + "failures": 25442, + "faint": 30807, + "fair": 3031, + "fair": 2849, + "fairbanks": 43962, + "faire": 34745, + "faire": 20798, + "fairfax": 29368, + "fairfield": 29664, + "fairgrounds": 38325, + "fairi": 28884, + "fairies": 33590, + "fairly": 14961, + "fairmont": 41547, + "fairness": 29388, + "fairs": 8655, + "fairtrade": 33361, + "fairview": 43479, + "fairway": 44022, + "fairy": 17021, + "fairy": 10444, + "fairytale": 28944, + "fais": 23542, + "faisal": 35459, + "fait": 20567, + "faith": 10653, + "faith": 5080, + "faithful": 15511, + "faiz": 41775, + "fake": 18794, + "fake": 5777, + "faken": 22853, + "fakenews": 26943, + "fakespeare": 49095, + "fal": 2778, + "fal": 40494, + "fala": 47120, + "falcon": 22498, + "falcon": 13571, + "falcons": 13834, + "falk": 34648, + "falkirk": 44080, + "fall": 6489, + "fall": 2359, + "fallen": 8688, + "falling": 48709, + "falling": 7293, + "fallon": 39596, + "fallon": 21281, + "fallontonight": 44627, + "fallout": 49365, + "fallout": 16009, + "falls": 4778, + "falmouth": 38261, + "false": 38948, + "false": 9078, + "falsely": 42321, + "fam": 1058, + "fam": 5128, + "fame": 6573, + "famed": 23302, + "famer": 24554, + "famil": 3395, + "famili": 8488, + "familia": 25622, + "familiar": 10020, + "families": 4612, + "family": 8137, + "family": 1315, + "familyfun": 46308, + "familytime": 47236, + "familytravel": 38222, + "famine": 35847, + "famous": 44811, + "famous": 4096, + "famously": 44505, + "fan": 1675, + "fan": 2261, + "fanart": 41059, + "fanart": 7855, + "fanartfriday": 45346, + "fanatic": 36643, + "fanatics": 39610, + "fanbase": 36921, + "fanboy": 43369, + "fanc": 29017, + "fancafe": 45080, + "fanci": 35908, + "fanclub": 31530, + "fancy": 47622, + "fancy": 6733, + "fand": 19684, + "fandom": 47634, + "fandom": 11534, + "fanfest": 42916, + "fanfic": 47243, + "fang": 14269, + "fang": 27428, + "fangirl": 28813, + "fangirling": 39463, + "fanning": 37282, + "fanny": 30401, + "fans": 32454, + "fans": 1840, + "fansign": 25288, + "fant": 4467, + "fanta": 2703, + "fantaken": 39412, + "fantasia": 49306, + "fantastic": 31289, + "fantastic": 2935, + "fantasy": 15124, + "fantasy": 5267, + "fantasyfootball": 35713, + "fao": 31155, + "faq": 28533, + "far": 1578, + "far": 2384, + "fara": 48562, + "farage": 28340, + "farah": 31547, + "fare": 8620, + "fare": 6461, + "fares": 27525, + "farewell": 10734, + "fargo": 18870, + "fari": 26197, + "farley": 43761, + "farm": 9066, + "farm": 3985, + "farmer": 19735, + "farmer": 10474, + "farmers": 29752, + "farmers": 6402, + "farmersmarket": 41808, + "farmhouse": 26293, + "farming": 10399, + "farmington": 49305, + "farmland": 45258, + "farms": 11277, + "farn": 27527, + "faroo": 39147, + "farra": 33657, + "farrakhan": 46293, + "farrell": 24234, + "fart": 34664, + "farther": 42233, + "fas": 4830, + "fas": 42995, + "fasci": 17191, + "fascin": 7327, + "fascinated": 32964, + "fascinating": 8640, + "fascism": 28213, + "fascist": 23870, + "fascists": 43598, + "fash": 42682, + "fashi": 2099, + "fashion": 6976, + "fashion": 2444, + "fashionable": 24597, + "fashionblogger": 31726, + "fashioned": 21563, + "fashioni": 26062, + "fashionista": 30415, + "fashions": 37601, + "fashionshow": 45653, + "fashionweek": 28684, + "fass": 42398, + "fast": 8509, + "fast": 1953, + "fasten": 44990, + "faster": 8835, + "fastest": 9808, + "fasting": 24656, + "fat": 4751, + "fat": 5484, + "fatal": 12124, + "fatalities": 44168, + "fatally": 34069, + "fate": 26315, + "fate": 11734, + "father": 11607, + "father": 3224, + "fathers": 12780, + "fathersday": 16731, + "fati": 13430, + "fatigue": 23747, + "fatima": 28202, + "fats": 30151, + "fatt": 44131, + "fatty": 22953, + "fau": 5571, + "fau": 31381, + "faucet": 44273, + "faul": 16230, + "faulkner": 37840, + "fault": 13862, + "faults": 42752, + "faulty": 47103, + "fauna": 30808, + "faust": 44772, + "faux": 19429, + "fav": 1355, + "fav": 5426, + "fave": 7272, + "faves": 18003, + "favor": 1766, + "favor": 12160, + "favorable": 35392, + "favored": 46640, + "favorite": 35262, + "favorite": 1916, + "favorited": 36926, + "favorites": 10564, + "favors": 36085, + "favour": 3111, + "favour": 20469, + "favourite": 3342, + "favourites": 16585, + "favs": 18879, + "faw": 21800, + "fawad": 46425, + "fawn": 48624, + "fax": 32535, + "fax": 9337, + "fay": 8939, + "fay": 40074, + "faye": 30257, + "fayette": 32043, + "fayette": 19782, + "fayetteville": 37771, + "fayre": 34982, + "faz": 26238, + "faze": 44880, + "fb": 22637, + "fb": 3307, + "fball": 29663, + "fbf": 20004, + "fbi": 10293, + "fbloggers": 41389, + "fbs": 48454, + "fc": 4278, + "fc": 1399, + "fca": 24540, + "fcb": 26639, + "fcb": 25045, + "fcbarcelona": 32174, + "fcbayern": 35033, + "fcblive": 44608, + "fcc": 21240, + "fck": 40080, + "fck": 49263, + "fcofficial": 27805, + "fcs": 32095, + "fcu": 47898, + "fd": 16972, + "fd": 11525, + "fda": 17823, + "fdi": 45579, + "fdn": 18563, + "fdny": 41084, + "fdr": 42298, + "fe": 623, + "fe": 873, + "fear": 8744, + "fear": 5402, + "feared": 31154, + "fearless": 17470, + "fears": 13867, + "fearthe": 33449, + "feasi": 34977, + "feast": 37963, + "feast": 9564, + "feat": 1703, + "feat": 5611, + "feather": 24905, + "feather": 17871, + "feathers": 21138, + "featherweight": 44939, + "feature": 30413, + "feature": 4527, + "featured": 4743, + "features": 4643, + "featuring": 3706, + "feb": 4317, + "febru": 4202, + "february": 4248, + "fect": 31293, + "fed": 22518, + "fed": 7035, + "feder": 4737, + "federal": 6369, + "federation": 15530, + "federer": 18246, + "federico": 40539, + "fedex": 32603, + "fedora": 45111, + "feds": 30593, + "fee": 28242, + "fee": 9224, + "feed": 6662, + "feed": 5839, + "feedback": 8683, + "feeder": 24482, + "feeders": 44523, + "feeding": 9879, + "feeds": 21788, + "feel": 2408, + "feel": 2051, + "feelin": 19903, + "feeling": 33087, + "feeling": 3045, + "feelings": 9452, + "feels": 4808, + "feelthe": 22322, + "feelthebern": 27743, + "fees": 11765, + "feet": 4804, + "fei": 23441, + "fei": 34217, + "fein": 46707, + "feinstein": 41313, + "fel": 2081, + "fel": 20304, + "feld": 45913, + "feld": 14219, + "feldman": 41942, + "feli": 7498, + "felic": 25845, + "felici": 23379, + "felicia": 41139, + "felicidades": 41648, + "felicity": 35123, + "feline": 29471, + "felipe": 27681, + "felix": 33455, + "felix": 16514, + "feliz": 26104, + "feliz": 20221, + "fell": 33540, + "fell": 6266, + "fella": 17586, + "fellas": 18787, + "feller": 29226, + "fellow": 12099, + "fellow": 5242, + "fellows": 15766, + "fellowship": 13857, + "felony": 31068, + "felt": 5413, + "fem": 24574, + "fem": 36615, + "fema": 41721, + "female": 22062, + "female": 3970, + "females": 21028, + "femi": 38607, + "femin": 11423, + "femini": 11894, + "feminine": 24911, + "feminism": 18784, + "feminist": 14921, + "feminists": 38809, + "femme": 31331, + "fen": 5509, + "fen": 25024, + "fence": 12679, + "fences": 34312, + "fencing": 23489, + "fender": 17117, + "fener": 41208, + "fenerbah": 46652, + "feng": 33291, + "fennel": 28689, + "fent": 26395, + "fenton": 47265, + "fenway": 29206, + "fer": 1765, + "fer": 2897, + "fera": 37705, + "feral": 29972, + "ferdin": 25541, + "ferdinand": 27591, + "fere": 43144, + "feren": 35652, + "ference": 19984, + "ferg": 44938, + "fergie": 39119, + "fergu": 10988, + "fergus": 42041, + "ferguson": 11904, + "fermentation": 45817, + "fermented": 36886, + "fern": 10747, + "fern": 21685, + "fernandes": 44391, + "fernandez": 23436, + "fernando": 17140, + "ferns": 38277, + "feroci": 45652, + "ferr": 7256, + "ferra": 47911, + "ferrari": 9606, + "ferre": 29626, + "ferred": 10432, + "ferreira": 48686, + "ferrell": 41112, + "ferrer": 38904, + "ferri": 42008, + "ferries": 28489, + "ferris": 27532, + "ferry": 38936, + "ferry": 10278, + "fers": 12378, + "fert": 14925, + "fert": 43662, + "fertil": 41987, + "fertile": 44837, + "fertili": 23912, + "fertility": 23528, + "fertilizer": 36786, + "fery": 47448, + "fes": 32300, + "fest": 17383, + "fest": 2590, + "festa": 42124, + "festi": 1943, + "festiv": 19222, + "festival": 20946, + "festival": 2240, + "festivals": 17834, + "festive": 9533, + "festivities": 21020, + "fet": 21409, + "feta": 31705, + "fetal": 42031, + "fetch": 30271, + "fete": 34629, + "fett": 37979, + "fetus": 26768, + "feu": 24912, + "feu": 32990, + "feud": 27365, + "fever": 40896, + "fever": 9989, + "fevre": 43861, + "few": 1939, + "fewer": 19128, + "fex": 41584, + "fex": 26392, + "fey": 39069, + "fey": 23298, + "fez": 43081, + "ff": 1021, + "ff": 1304, + "ffa": 15355, + "ffame": 42873, + "ffc": 19832, + "ffe": 1138, + "ffe": 8631, + "ffect": 29151, + "ffed": 8448, + "ffee": 26377, + "ffel": 22656, + "ffen": 46537, + "ffer": 27369, + "ffer": 11636, + "ffers": 32163, + "fferty": 44771, + "ffes": 46441, + "ffey": 30138, + "fff": 28106, + "ffi": 19961, + "ffic": 4762, + "ffice": 26044, + "ffici": 3639, + "fficial": 39818, + "fficial": 6463, + "fficiency": 27800, + "fficient": 20424, + "ffin": 12779, + "ffin": 7367, + "ffing": 16592, + "ffins": 17898, + "ffl": 39490, + "ffle": 7749, + "ffler": 39819, + "ffles": 19344, + "ffman": 15823, + "ffo": 42264, + "ffs": 4424, + "ffxiv": 26569, + "ffxv": 46786, + "ffy": 26404, + "ffy": 7795, + "fg": 45977, + "fg": 6823, + "fgm": 32178, + "fgo": 46113, + "fh": 21649, + "fh": 21010, + "fhs": 45094, + "fi": 701, + "fi": 3589, + "fia": 8827, + "fiable": 34373, + "fianc": 27752, + "fiance": 44114, + "fiancé": 34039, + "fiasco": 40944, + "fiat": 16740, + "fiawec": 39485, + "fib": 40594, + "fiba": 34993, + "fiber": 35074, + "fiber": 12612, + "fibers": 44587, + "fibre": 21401, + "fibro": 21294, + "fibrosis": 36307, + "fic": 1788, + "fic": 2059, + "fica": 26952, + "fically": 14854, + "fication": 4523, + "fications": 12512, + "ficial": 48192, + "fics": 42505, + "fiction": 6218, + "fictional": 25570, + "fid": 34197, + "fid": 23966, + "fidd": 25218, + "fiddle": 35968, + "fide": 45375, + "fidel": 21740, + "fidel": 36837, + "fidelity": 30109, + "fidget": 48664, + "fie": 28487, + "fie": 10348, + "fied": 29642, + "fied": 2853, + "fiel": 1361, + "field": 7571, + "field": 1570, + "fielder": 11046, + "fieldhouse": 37969, + "fielding": 30465, + "fields": 6494, + "fieldwork": 33155, + "fiends": 37869, + "fier": 11167, + "fier": 10598, + "fierc": 48609, + "fierce": 13896, + "fiercely": 49039, + "fiers": 16113, + "fiery": 24557, + "fies": 9537, + "fiesta": 14580, + "fif": 5309, + "fifa": 21976, + "fifa": 8516, + "fifaworldcup": 38819, + "fifawwc": 41329, + "fife": 24374, + "fifteen": 29504, + "fifth": 25515, + "fifth": 8772, + "fifthharmony": 31075, + "fifty": 24456, + "fifty": 15978, + "fig": 4814, + "fig": 20719, + "figaro": 48044, + "figh": 23274, + "fight": 5262, + "fight": 2757, + "fighter": 35884, + "fighter": 6438, + "fighters": 7371, + "fightfor": 48909, + "fightfor": 35740, + "fighting": 38625, + "fighting": 4652, + "fighton": 45578, + "fights": 12132, + "figs": 38882, + "figu": 6390, + "figur": 16948, + "figurative": 44042, + "figure": 48820, + "figure": 5274, + "figured": 15630, + "figures": 8739, + "figurine": 33306, + "figuring": 31513, + "fiji": 48270, + "fiji": 18285, + "fik": 46589, + "fil": 1142, + "fil": 14915, + "fila": 30992, + "filament": 49252, + "file": 12545, + "file": 4512, + "filed": 13864, + "files": 7850, + "filet": 43155, + "fili": 9590, + "filing": 16576, + "filip": 14368, + "filipino": 19153, + "fill": 15904, + "fill": 6277, + "filled": 5589, + "filler": 32816, + "fillers": 45005, + "fillet": 39276, + "filling": 9736, + "fillion": 38048, + "fillmore": 43922, + "fills": 21750, + "filly": 27690, + "film": 5117, + "film": 1860, + "filmed": 15801, + "filmfare": 42224, + "filmfest": 24508, + "filmfestival": 28066, + "filming": 6866, + "filmmaker": 17202, + "filmmakers": 24896, + "filmmaking": 18226, + "films": 5370, + "fils": 40271, + "filter": 7541, + "filtered": 29926, + "filtering": 47770, + "filters": 18385, + "filth": 39713, + "filthy": 26899, + "filtr": 21408, + "filtration": 42036, + "fim": 47525, + "fin": 735, + "fin": 10663, + "fina": 34497, + "final": 11968, + "final": 1755, + "finale": 7844, + "finalfantasy": 44543, + "finalfour": 46999, + "finalist": 12620, + "finalists": 13422, + "finalized": 48930, + "finally": 1992, + "finals": 4536, + "finan": 4807, + "finance": 6117, + "finances": 28767, + "financi": 12846, + "financial": 19783, + "financial": 4930, + "financially": 28124, + "financing": 18375, + "finch": 18523, + "find": 18638, + "find": 1416, + "finder": 15045, + "finders": 43884, + "findia": 47064, + "finding": 37455, + "finding": 6002, + "findings": 16529, + "findlay": 48227, + "findom": 36463, + "finds": 6680, + "findyour": 25936, + "findyourpark": 38924, + "fine": 12042, + "fine": 3797, + "fineart": 7484, + "fineart": 16005, + "fineartamerica": 7724, + "fined": 20094, + "finely": 46120, + "finer": 36681, + "fines": 25053, + "finesse": 46047, + "finest": 7707, + "fing": 6485, + "fing": 17955, + "finger": 13480, + "finger": 8895, + "fingerprint": 39579, + "fingers": 9690, + "fini": 2405, + "finish": 42178, + "finish": 3958, + "finished": 3078, + "finisher": 38636, + "finishers": 48661, + "finishes": 13078, + "finishing": 7912, + "finite": 48312, + "finity": 41463, + "finity": 21273, + "fink": 40158, + "finland": 10775, + "finley": 41652, + "finn": 28479, + "finn": 16925, + "finna": 35180, + "finnish": 19616, + "fino": 30083, + "fins": 32810, + "fintech": 48929, + "fintech": 8899, + "fion": 27476, + "fiona": 20099, + "fior": 37086, + "fiore": 44997, + "fioren": 33188, + "fiorentina": 43713, + "fios": 42521, + "fir": 770, + "fir": 16233, + "fire": 2951, + "fire": 1769, + "firearm": 40311, + "firearms": 23960, + "fireball": 40543, + "firec": 42806, + "fired": 8846, + "firefighter": 20498, + "firefighters": 12600, + "firefly": 33997, + "firefox": 35372, + "fireman": 46085, + "firen": 34752, + "firenze": 38445, + "fireplace": 23050, + "fires": 8749, + "fireside": 36185, + "firework": 40750, + "fireworks": 10641, + "firing": 15105, + "firm": 16936, + "firm": 7705, + "firmly": 29156, + "firms": 13655, + "firmware": 42691, + "first": 6853, + "first": 874, + "firstdayof": 44297, + "firsth": 48512, + "firsts": 47884, + "firth": 26078, + "fis": 7846, + "fis": 47683, + "fiscal": 20825, + "fischer": 26532, + "fish": 6431, + "fish": 2759, + "fisher": 11175, + "fisher": 9176, + "fisheries": 24612, + "fisherman": 25055, + "fishermen": 28547, + "fishers": 42065, + "fishery": 49057, + "fishes": 35470, + "fishing": 31703, + "fishing": 4935, + "fishy": 35665, + "fist": 48340, + "fist": 17085, + "fit": 2366, + "fit": 2478, + "fitbit": 33768, + "fitch": 44614, + "fitfam": 20662, + "fitnes": 47285, + "fitness": 20044, + "fitness": 4838, + "fits": 6401, + "fitt": 32994, + "fitted": 14863, + "fitter": 42096, + "fitters": 32364, + "fitting": 11769, + "fittings": 45787, + "fitz": 11120, + "fitz": 25913, + "fitzgerald": 20606, + "fitzpatrick": 37141, + "fiu": 38374, + "five": 19508, + "five": 3127, + "fives": 44066, + "fix": 4596, + "fix": 6028, + "fixed": 9393, + "fixes": 25473, + "fixing": 17423, + "fixture": 17317, + "fixtures": 19904, + "fizz": 31242, + "fj": 43183, + "fj": 46447, + "fjor": 31260, + "fk": 12410, + "fl": 1082, + "fl": 2685, + "fla": 1577, + "fla": 20292, + "flag": 11536, + "flag": 4859, + "flagged": 45012, + "flags": 12221, + "flagship": 19779, + "flagstaff": 40406, + "flair": 24938, + "flake": 21221, + "flakes": 20934, + "flam": 10559, + "flame": 40351, + "flame": 13484, + "flamen": 28826, + "flamenco": 37362, + "flames": 13441, + "flamin": 42693, + "flaming": 34782, + "flamingo": 30323, + "flan": 14572, + "flanagan": 28641, + "flanders": 34837, + "flank": 44553, + "flann": 39510, + "flannel": 37807, + "flap": 35253, + "flappy": 40241, + "flare": 21185, + "flares": 46088, + "flash": 6089, + "flash": 5815, + "flashback": 14616, + "flashback": 11988, + "flashbackfriday": 15014, + "flashbacks": 47056, + "flashes": 31259, + "flashing": 31764, + "flashlight": 37256, + "flask": 36194, + "flat": 8986, + "flat": 6313, + "flats": 17228, + "flatt": 45498, + "flattering": 43267, + "flaun": 41421, + "flav": 7191, + "flavo": 28895, + "flavor": 31835, + "flavor": 11818, + "flavored": 29350, + "flavorful": 49135, + "flavors": 16930, + "flavour": 17026, + "flavoured": 42397, + "flavours": 21083, + "flaw": 14268, + "flaw": 34978, + "flawed": 35136, + "flawless": 15531, + "flaws": 30492, + "flax": 43443, + "fle": 2428, + "fle": 44964, + "flea": 24883, + "fleck": 28143, + "fled": 26731, + "flee": 19427, + "flee": 30167, + "fleece": 25038, + "fleeing": 30543, + "fleek": 43513, + "fleet": 35922, + "fleet": 9147, + "fleetwood": 28883, + "fleming": 25769, + "fler": 48789, + "flesh": 17495, + "flet": 16102, + "fletcher": 19810, + "fleur": 28593, + "flew": 13768, + "flex": 16426, + "flex": 12038, + "flexi": 10032, + "flexibility": 22547, + "flexible": 14502, + "flexing": 48483, + "fli": 2472, + "flick": 13746, + "flick": 23414, + "flickr": 17755, + "flies": 8070, + "flight": 24701, + "flight": 3795, + "flights": 10515, + "flin": 24730, + "flin": 43816, + "flinders": 44647, + "fling": 22768, + "flint": 28306, + "flint": 18324, + "flip": 20385, + "flip": 11035, + "flipk": 30829, + "flipkart": 33154, + "flipped": 28144, + "flipping": 25881, + "flips": 35089, + "flir": 24330, + "flirt": 38352, + "flirting": 35243, + "flix": 40663, + "flo": 1945, + "flo": 20711, + "float": 16123, + "floating": 12619, + "floats": 33272, + "flock": 36297, + "flock": 21822, + "flondon": 47366, + "floo": 4062, + "flood": 23793, + "flood": 7148, + "flooded": 19706, + "flooding": 10204, + "floods": 16369, + "floor": 23657, + "floor": 4125, + "flooring": 19227, + "floors": 15671, + "flop": 22994, + "floppy": 38267, + "flops": 29146, + "flor": 15784, + "flor": 41669, + "flora": 18906, + "floral": 10732, + "florals": 48331, + "floren": 37706, + "florence": 11617, + "flores": 21537, + "flori": 3482, + "florian": 41861, + "florida": 34264, + "florida": 3966, + "florist": 38403, + "floss": 36453, + "flotus": 35181, + "flour": 18592, + "flouri": 23239, + "flourish": 36038, + "flow": 2180, + "flow": 5608, + "flower": 12772, + "flower": 4055, + "flowering": 19953, + "flowers": 4023, + "flowing": 14922, + "flown": 25659, + "flows": 16715, + "floyd": 46369, + "floyd": 13656, + "flu": 3698, + "flu": 13528, + "fluctu": 40181, + "fluence": 38169, + "fluent": 30025, + "fluff": 31174, + "fluffy": 40346, + "fluffy": 17054, + "fluid": 43803, + "fluid": 16717, + "fluids": 41490, + "fluor": 45127, + "fluore": 26974, + "fluorescent": 35036, + "fluori": 45611, + "flur": 31591, + "flush": 25777, + "flushing": 43754, + "flute": 23746, + "flux": 25249, + "flwx": 30907, + "fly": 5666, + "fly": 3228, + "flye": 30873, + "flyeagles": 39927, + "flyeaglesfly": 39931, + "flyer": 11875, + "flyers": 14181, + "flyfishing": 31800, + "flying": 20782, + "flying": 4610, + "flyn": 40676, + "flynn": 15721, + "flyo": 33506, + "flyover": 38083, + "fm": 13715, + "fm": 3689, + "fman": 25152, + "fml": 26730, + "fmr": 32875, + "fn": 22773, + "fn": 21763, + "fnc": 46506, + "fo": 898, + "fo": 6157, + "foal": 40386, + "foam": 30039, + "foam": 14587, + "foamed": 26711, + "fob": 40315, + "focal": 30934, + "focu": 5827, + "focus": 4353, + "focused": 9319, + "focuses": 20093, + "focusing": 15551, + "fod": 31015, + "fod": 43299, + "fodils": 44411, + "foe": 22952, + "foes": 46279, + "fog": 9417, + "foggy": 19770, + "foil": 17302, + "fol": 1106, + "fol": 48616, + "fold": 35201, + "fold": 11021, + "foldable": 48307, + "folded": 25233, + "folder": 25717, + "folding": 15464, + "folds": 24266, + "foley": 22850, + "foli": 7713, + "folia": 48964, + "foliage": 26350, + "folio": 10772, + "folk": 10665, + "folk": 6032, + "folke": 47190, + "folkl": 27273, + "folklore": 22133, + "folklore": 28620, + "folklorethursday": 23270, + "folks": 5422, + "follo": 41417, + "follow": 1964, + "follow": 1979, + "followart": 40957, + "followback": 33863, + "followed": 6499, + "follower": 17039, + "followers": 4856, + "following": 3473, + "followme": 29668, + "followparty": 44757, + "follows": 11287, + "followthe": 30747, + "folly": 41408, + "folsom": 42108, + "fom": 34540, + "fon": 5017, + "fon": 38318, + "fond": 19964, + "fonda": 44609, + "fondue": 48321, + "fone": 40672, + "font": 37610, + "font": 16248, + "fontaine": 37864, + "fontana": 43643, + "fontein": 45062, + "fonts": 32801, + "foo": 1183, + "foo": 23435, + "food": 4586, + "food": 1559, + "foodand": 38317, + "foodbank": 31926, + "foodie": 30762, + "foodie": 9847, + "foodies": 22416, + "foodnetwork": 46793, + "foods": 7057, + "foodsecurity": 49329, + "foodtruck": 47682, + "fool": 23959, + "fool": 12212, + "fooled": 28761, + "fooling": 47964, + "foolish": 33824, + "fools": 15946, + "foot": 6702, + "foot": 4738, + "footage": 11130, + "footb": 33466, + "football": 9376, + "football": 1882, + "footballer": 20646, + "footballers": 30269, + "footed": 38040, + "footh": 25951, + "foothills": 37020, + "footpath": 48858, + "footprint": 23206, + "footprints": 39640, + "footsteps": 27289, + "footwear": 22772, + "footy": 39866, + "footy": 18922, + "for": 645, + "for": 556, + "forage": 46871, + "foraging": 39056, + "forall": 17824, + "forbe": 49098, + "forbes": 13925, + "forbi": 24754, + "forbidden": 25164, + "force": 12068, + "force": 2869, + "forced": 8201, + "forces": 5381, + "forchange": 35848, + "forcing": 21573, + "ford": 3751, + "ford": 1623, + "fordfc": 28581, + "fordham": 48792, + "fords": 29351, + "fordshire": 14645, + "fore": 1484, + "fore": 1332, + "forec": 34155, + "forecast": 7361, + "forecasting": 38133, + "forecasts": 27696, + "foreclo": 44916, + "forefront": 37679, + "foreground": 35186, + "forehead": 25394, + "foreig": 26497, + "foreign": 42255, + "foreign": 6046, + "foreigners": 38549, + "foreman": 36174, + "foremost": 42128, + "foren": 16526, + "forensic": 23158, + "forensics": 38763, + "forest": 18760, + "forest": 4167, + "forestation": 33939, + "forestry": 26281, + "forests": 14095, + "forever": 14748, + "forever": 3225, + "forevery": 40605, + "forex": 40200, + "forex": 17395, + "forfe": 44871, + "forge": 19232, + "forged": 28105, + "forget": 46153, + "forget": 2678, + "forgets": 35613, + "forgetting": 25452, + "forgi": 22080, + "forgive": 15332, + "forgiven": 44894, + "forgiveness": 23585, + "forgood": 39169, + "forgot": 6483, + "forgotten": 7994, + "fork": 24501, + "fork": 13700, + "forkids": 48571, + "forklift": 43202, + "forks": 28769, + "forlife": 17624, + "form": 1157, + "form": 1907, + "forma": 38829, + "formal": 12978, + "formally": 24867, + "format": 16252, + "format": 11874, + "formation": 2510, + "formations": 37715, + "formative": 48882, + "formats": 32085, + "forme": 42085, + "formed": 6528, + "former": 2276, + "formerly": 20866, + "formid": 38599, + "formidable": 39834, + "forming": 15443, + "formity": 42290, + "forms": 5161, + "formu": 8689, + "formul": 23923, + "formula": 24485, + "formula": 10776, + "formulae": 34586, + "formulated": 45066, + "forre": 38876, + "forrest": 25205, + "forrester": 45338, + "forsa": 48958, + "forsale": 13303, + "forster": 42923, + "forsy": 29629, + "forsyth": 40952, + "fort": 12300, + "fort": 2921, + "forte": 44350, + "forte": 27367, + "forth": 17068, + "forth": 11932, + "forthcoming": 19989, + "forthe": 12521, + "forti": 26984, + "fortified": 46486, + "fortn": 14428, + "fortnight": 39235, + "fortnite": 38734, + "fortnite": 17890, + "fortress": 19988, + "fortun": 6950, + "fortunate": 19898, + "fortunately": 34358, + "fortune": 40931, + "fortune": 11451, + "fortunes": 41989, + "forty": 24399, + "forum": 37851, + "forum": 4538, + "forums": 31518, + "forwar": 34364, + "forward": 47031, + "forward": 2342, + "forwards": 38974, + "foryou": 35150, + "forz": 46056, + "forza": 33293, + "forza": 28089, + "fos": 36925, + "fos": 22081, + "foss": 14240, + "foss": 37911, + "fossil": 20419, + "fossil": 15202, + "fossilfriday": 26079, + "fossils": 30652, + "foster": 26778, + "foster": 8139, + "fostering": 35996, + "fosters": 37644, + "foto": 15908, + "foto": 12823, + "fotogra": 23687, + "fotografia": 40256, + "fotos": 26124, + "fou": 14516, + "fought": 10844, + "foul": 19784, + "foun": 3154, + "found": 3454, + "found": 1546, + "foundation": 4058, + "foundations": 25219, + "founded": 12240, + "founder": 5145, + "founders": 14602, + "founding": 15317, + "foundry": 31426, + "fountain": 44863, + "fountain": 13405, + "fountains": 37411, + "four": 5113, + "four": 2721, + "foursquare": 34484, + "fourteen": 46255, + "fourth": 7516, + "fourthofjuly": 47805, + "fow": 17084, + "fowl": 31685, + "fowler": 20980, + "fox": 5007, + "fox": 3240, + "foxandfriends": 45841, + "foxes": 24145, + "foxnews": 18830, + "foxsports": 39267, + "foxtv": 49396, + "foxx": 32993, + "foxy": 27945, + "foy": 30284, + "foyer": 38011, + "foyle": 47902, + "fp": 28058, + "fp": 8941, + "fpl": 27970, + "fpp": 36464, + "fps": 25300, + "fpv": 43175, + "fr": 936, + "fr": 5512, + "fra": 3368, + "fra": 15644, + "frac": 15607, + "fracking": 21894, + "fractal": 46471, + "fraction": 26788, + "fractu": 25847, + "fracture": 28995, + "fractured": 37421, + "fractures": 46213, + "frag": 13093, + "fragile": 23579, + "fragment": 39209, + "fragments": 41424, + "fragr": 15403, + "fragrance": 17874, + "fragrances": 44567, + "fragrant": 37030, + "fram": 27987, + "frame": 11029, + "frame": 6481, + "framed": 13135, + "frames": 15479, + "framework": 13195, + "frameworks": 43136, + "framing": 24539, + "frampton": 41733, + "fran": 2118, + "fran": 18878, + "franc": 3872, + "franc": 42340, + "franca": 48952, + "france": 12045, + "france": 3552, + "frances": 20803, + "francesca": 32327, + "francesco": 25816, + "franch": 11756, + "franchi": 46438, + "franchise": 13664, + "franci": 46458, + "francis": 22187, + "francis": 7660, + "francisco": 6887, + "franco": 17934, + "franco": 17052, + "francois": 29317, + "frank": 5390, + "frank": 5229, + "franken": 20487, + "franken": 48252, + "frankenstein": 26410, + "frankfur": 17442, + "frankfurt": 18598, + "franki": 39227, + "frankie": 38373, + "frankie": 16215, + "franklin": 40935, + "franklin": 9999, + "frankly": 38015, + "franks": 42855, + "frans": 47892, + "franz": 25449, + "franç": 38381, + "fraser": 39082, + "fraser": 16754, + "frat": 15225, + "frat": 39292, + "fraternity": 24433, + "frau": 23063, + "fraud": 40647, + "fraud": 9961, + "fraudul": 42655, + "fraudulent": 47408, + "fray": 41154, + "frazier": 32841, + "frc": 41507, + "fre": 821, + "fre": 43165, + "freak": 20352, + "freak": 13701, + "freaked": 43511, + "freakin": 23900, + "freaking": 11992, + "freaks": 27009, + "freaky": 31583, + "freck": 33328, + "freckles": 48036, + "fred": 9486, + "fred": 6678, + "freddie": 41890, + "freddie": 17014, + "freddy": 24394, + "freder": 10745, + "frederic": 41165, + "frederick": 37103, + "frederick": 18570, + "fredo": 48241, + "free": 2065, + "free": 1139, + "freebie": 35865, + "freebies": 28630, + "freec": 46569, + "freed": 12585, + "freed": 23392, + "freedom": 17992, + "freedom": 4511, + "freedoms": 32500, + "freef": 48678, + "freel": 14174, + "freelance": 21942, + "freely": 24436, + "freeman": 16450, + "freep": 32499, + "freepalestine": 39242, + "freer": 44676, + "frees": 27455, + "freestyle": 15594, + "freeway": 24927, + "freeze": 14187, + "freezer": 25390, + "freezing": 12499, + "frei": 30183, + "freight": 17023, + "fremantle": 48012, + "fremont": 34578, + "fren": 2919, + "french": 13118, + "french": 3461, + "frenzy": 30084, + "frequ": 9211, + "frequencies": 45319, + "frequency": 18825, + "frequent": 19836, + "frequently": 22434, + "fresco": 31609, + "fresh": 4065, + "fresh": 2975, + "fresher": 49284, + "freshers": 35810, + "freshest": 46809, + "freshly": 16081, + "freshman": 9381, + "freshmen": 21292, + "freshness": 45872, + "freshwater": 24803, + "fresno": 40879, + "fresno": 20995, + "fret": 40510, + "freud": 40787, + "frey": 22136, + "frey": 9082, + "fri": 815, + "fri": 6882, + "friars": 30513, + "fric": 18981, + "frick": 46304, + "friction": 38563, + "frid": 46388, + "frida": 36001, + "friday": 6350, + "friday": 1461, + "fridayfeeling": 11952, + "fridaymotivation": 38544, + "fridaynight": 44858, + "fridayreads": 37736, + "fridays": 15589, + "fridaythe": 47642, + "fridge": 13491, + "fridges": 40734, + "frie": 36999, + "fried": 13743, + "fried": 7310, + "friedman": 29402, + "friedrich": 34171, + "friend": 3017, + "friend": 1625, + "friendly": 44612, + "friendly": 4681, + "friends": 38875, + "friends": 1574, + "friendship": 42674, + "friendship": 7679, + "friendships": 28840, + "fries": 11369, + "frifotos": 40493, + "friger": 20785, + "friggin": 48300, + "frigh": 34831, + "fright": 24277, + "fright": 40207, + "frightened": 47136, + "frightening": 39290, + "fringe": 10640, + "fris": 37252, + "frisbee": 45768, + "frisco": 35945, + "frit": 34614, + "fritz": 29860, + "friyay": 38887, + "frm": 12951, + "fro": 626, + "fro": 26603, + "frock": 45306, + "frog": 26494, + "frog": 11438, + "frogs": 20781, + "from": 8330, + "from": 633, + "frome": 48691, + "fromhome": 41477, + "fromthe": 18756, + "fron": 1847, + "fron": 18036, + "front": 10996, + "front": 2184, + "frontal": 35794, + "frontier": 18253, + "frontiers": 38396, + "frontline": 29589, + "frontman": 36775, + "fronts": 26846, + "froome": 48560, + "frosh": 47069, + "frost": 39420, + "frost": 11619, + "frosted": 35988, + "frosting": 33872, + "frosty": 22760, + "froze": 47788, + "frozen": 42464, + "frozen": 8507, + "frs": 26216, + "fru": 3248, + "fruit": 16771, + "fruit": 5190, + "fruitful": 31494, + "fruits": 13282, + "fruity": 22320, + "frustr": 16046, + "frustrated": 25111, + "frustrating": 31342, + "frustration": 30535, + "fry": 33914, + "fry": 13686, + "fryer": 49217, + "frying": 38516, + "fs": 23699, + "fs": 3854, + "fsa": 33373, + "fsu": 44185, + "fsu": 19317, + "ft": 3391, + "ft": 981, + "fta": 41975, + "ftc": 33752, + "fted": 5612, + "fter": 25063, + "fthe": 22886, + "ftheday": 9823, + "fting": 6174, + "fton": 26605, + "ftp": 42649, + "fts": 3767, + "ftse": 46717, + "ftw": 19298, + "fty": 17494, + "fu": 665, + "fu": 9098, + "fuch": 42617, + "fudge": 24270, + "fue": 43723, + "fuego": 41500, + "fuel": 21113, + "fuel": 5945, + "fueled": 28792, + "fueling": 38793, + "fuelled": 48357, + "fuels": 19365, + "fuentes": 44393, + "fuer": 29645, + "fug": 29227, + "fugitive": 39257, + "fuji": 15573, + "fuji": 21634, + "fujifilm": 24765, + "fuk": 31051, + "fuku": 20728, + "fukushima": 33929, + "ful": 1814, + "ful": 857, + "fulbright": 41834, + "fulfill": 43675, + "fulfill": 27467, + "fulfilled": 29919, + "fulfilling": 30621, + "fulfillment": 45573, + "fulham": 25574, + "full": 9407, + "full": 1476, + "fuller": 20225, + "fullerton": 42822, + "fullest": 35603, + "fully": 39142, + "fully": 2401, + "fulness": 10526, + "fuls": 41606, + "fulton": 26725, + "fum": 38393, + "fumble": 49373, + "fun": 1229, + "fun": 1499, + "func": 8679, + "function": 8093, + "functional": 12885, + "functionality": 33316, + "functioning": 25479, + "functions": 18001, + "fund": 19089, + "fund": 4877, + "fundam": 11670, + "fundament": 18852, + "fundamental": 17627, + "fundamentally": 45378, + "fundamentals": 27887, + "funday": 15439, + "funded": 10588, + "funding": 5588, + "fundra": 6201, + "fundraiser": 10049, + "fundraising": 10755, + "funds": 7066, + "funer": 40693, + "funeral": 10606, + "funfact": 31596, + "funfactfriday": 40710, + "fungal": 38838, + "fungi": 27837, + "fungus": 30677, + "funk": 37353, + "funk": 13372, + "funko": 49402, + "funko": 23697, + "funky": 16492, + "funnel": 27862, + "funnier": 42232, + "funniest": 15557, + "funny": 19124, + "funny": 3789, + "funrun": 34185, + "fur": 2395, + "fur": 9686, + "furi": 40816, + "furious": 17522, + "furman": 49238, + "furn": 21348, + "furnace": 31913, + "furnished": 37388, + "furnitu": 45696, + "furniture": 7993, + "furry": 33414, + "furry": 15351, + "fursuit": 25306, + "fursuit": 43083, + "fursuitfriday": 27917, + "further": 5583, + "fury": 14404, + "fus": 18419, + "fuse": 23386, + "fused": 38994, + "fusion": 44661, + "fusion": 9364, + "fuss": 26331, + "fut": 21460, + "fut": 34049, + "futbol": 33014, + "futsal": 20558, + "futu": 33454, + "futur": 38840, + "future": 7959, + "future": 1904, + "futureof": 22599, + "futureofwork": 33202, + "futures": 13488, + "futuri": 19068, + "futurism": 48435, + "futurist": 48086, + "futuristic": 30987, + "fuzz": 47128, + "fuzz": 40443, + "fuzzy": 25876, + "fv": 29795, + "fw": 23934, + "fw": 5277, + "fwd": 27052, + "fx": 17807, + "fx": 9025, + "fy": 8440, + "fy": 2702, + "fyi": 16014, + "fying": 5294, + "fz": 46400, + "fé": 34072, + "g": 70, + "g": 326, + "ga": 1275, + "ga": 1531, + "gaa": 10715, + "gaal": 40867, + "gaard": 24645, + "gab": 3927, + "gab": 37382, + "gabbana": 36272, + "gabby": 48115, + "gabby": 24567, + "gabe": 18916, + "gabi": 41931, + "gable": 33387, + "gables": 40928, + "gabri": 8311, + "gabriel": 31684, + "gabriel": 13244, + "gabrielle": 33572, + "gaby": 46420, + "gac": 32520, + "gad": 7786, + "gad": 44651, + "gadget": 25525, + "gadgets": 22840, + "gado": 29489, + "gae": 22003, + "gael": 35663, + "gaelic": 31173, + "gaf": 21354, + "gaf": 32670, + "gag": 14121, + "gag": 18844, + "gaga": 9782, + "gage": 21081, + "gah": 27750, + "gai": 24214, + "gai": 25153, + "gaia": 41269, + "gail": 41160, + "gail": 27676, + "gain": 21536, + "gain": 6202, + "gaine": 35747, + "gained": 14489, + "gaines": 49225, + "gainesville": 40427, + "gaining": 15260, + "gains": 42751, + "gains": 12107, + "gal": 2001, + "gal": 4488, + "gala": 7211, + "galac": 18864, + "galactic": 25514, + "galap": 41115, + "galapagos": 44057, + "galat": 39853, + "galatasar": 42413, + "galatasaray": 47787, + "galax": 5647, + "galaxies": 32435, + "galaxy": 32130, + "galaxy": 6545, + "gale": 37658, + "gale": 21380, + "galerie": 44539, + "gales": 48633, + "gali": 17546, + "gali": 30552, + "galicia": 47927, + "galileo": 39671, + "gall": 3011, + "gall": 33374, + "galla": 16847, + "gallagher": 19168, + "galleria": 40656, + "galleries": 22304, + "gallery": 36648, + "gallery": 3830, + "galley": 48917, + "galli": 22568, + "gallipoli": 47249, + "gallo": 37350, + "gallo": 33265, + "gallon": 24615, + "gallons": 29335, + "galloway": 27796, + "galore": 22286, + "gals": 20125, + "galvani": 46046, + "galve": 34328, + "galveston": 36003, + "galway": 38045, + "galway": 17112, + "gam": 1162, + "gam": 34195, + "gama": 35873, + "gambia": 32988, + "gamble": 26121, + "gambling": 20287, + "game": 2882, + "game": 1063, + "gameart": 31490, + "gameboy": 40951, + "gamecube": 44079, + "gameday": 9241, + "gamedev": 7544, + "gameinsight": 42626, + "gameof": 10987, + "gameofthrones": 11822, + "gameon": 47691, + "gameplay": 16794, + "gamer": 12595, + "gamer": 11598, + "gamergate": 25961, + "gamers": 16166, + "gamersunite": 26423, + "games": 18551, + "games": 1955, + "gamescom": 37003, + "gamestop": 39436, + "gametime": 45899, + "gami": 42025, + "gamification": 48908, + "gaming": 28803, + "gaming": 4017, + "gamma": 22180, + "gamo": 39325, + "gan": 1822, + "gan": 1670, + "gand": 8399, + "ganda": 27261, + "gander": 44508, + "gandhi": 12322, + "ganesh": 30362, + "ganesha": 45185, + "gang": 8066, + "gang": 5674, + "ganga": 36275, + "gangnam": 46777, + "gangs": 29844, + "gangsta": 37365, + "gangster": 26514, + "gani": 48324, + "gann": 45665, + "gannon": 45837, + "gano": 25304, + "gao": 26556, + "gaon": 19279, + "gap": 29906, + "gap": 7609, + "gaps": 25296, + "gar": 1099, + "gar": 5824, + "gara": 28710, + "garage": 8474, + "garbage": 13760, + "garci": 44658, + "garcia": 10529, + "gard": 7751, + "gard": 21003, + "garda": 31906, + "garde": 22649, + "garden": 4674, + "garden": 2756, + "gardenchat": 46292, + "gardener": 28554, + "gardeners": 38205, + "gardening": 10483, + "gardens": 6152, + "gardiner": 43121, + "gardner": 18710, + "gare": 5633, + "gare": 48402, + "gareth": 37140, + "gareth": 18175, + "garfield": 26728, + "garh": 16762, + "gari": 40898, + "gari": 43080, + "garis": 37839, + "garland": 23418, + "garlic": 9685, + "garment": 31418, + "garments": 43341, + "garmin": 39885, + "garner": 20340, + "garnet": 37669, + "garo": 30388, + "garrett": 15881, + "garri": 21764, + "garrison": 30108, + "garros": 40425, + "garry": 24398, + "gars": 12055, + "gart": 18380, + "gart": 18751, + "garten": 14684, + "garter": 48420, + "garth": 45398, + "garth": 24469, + "gartner": 43334, + "gartner": 29678, + "garty": 46383, + "garu": 31140, + "garvey": 39511, + "garwal": 38623, + "gary": 10535, + "gary": 4516, + "garza": 49393, + "gas": 5047, + "gas": 2474, + "gases": 36971, + "gasoline": 27691, + "gasp": 43762, + "gaston": 40669, + "gastri": 49197, + "gastro": 23740, + "gastron": 30699, + "gastronomy": 46987, + "gat": 5314, + "gat": 18941, + "gata": 44575, + "gate": 8071, + "gate": 3302, + "gated": 23997, + "gates": 9472, + "gateshead": 40051, + "gateway": 45221, + "gateway": 14943, + "gather": 36345, + "gather": 12602, + "gathered": 14646, + "gathering": 9197, + "gatherings": 48096, + "gathers": 39250, + "gating": 27561, + "gation": 11095, + "gations": 33906, + "gato": 44492, + "gator": 20216, + "gator": 16390, + "gatorade": 36354, + "gators": 17173, + "gatory": 24796, + "gatsby": 32586, + "gatwick": 37122, + "gau": 5919, + "gau": 43068, + "gauge": 18728, + "gaunt": 31862, + "gauntlet": 37163, + "gautam": 45853, + "gautam": 31356, + "gauteng": 40333, + "gav": 8966, + "gave": 3485, + "gavin": 32974, + "gavin": 16389, + "gaw": 15405, + "gawd": 43239, + "gawx": 43420, + "gay": 7460, + "gay": 5627, + "gaya": 39477, + "gaye": 41401, + "gayle": 29998, + "gayo": 36768, + "gays": 28001, + "gaz": 4837, + "gaz": 36475, + "gaza": 38391, + "gaza": 10112, + "gazaunderattack": 42458, + "gaze": 23212, + "gazette": 20443, + "gazing": 28373, + "gb": 8727, + "gb": 4619, + "gba": 18528, + "gbbo": 34474, + "gbc": 42993, + "gbp": 27391, + "gbr": 31984, + "gby": 40509, + "gc": 8577, + "gc": 6043, + "gcc": 26804, + "gcse": 28763, + "gcu": 34137, + "gd": 13264, + "gd": 14604, + "gdc": 32793, + "gden": 44928, + "gdp": 17100, + "gdpr": 22963, + "ge": 619, + "ge": 710, + "gea": 26790, + "gear": 15532, + "gear": 4802, + "gearbox": 42454, + "geared": 33903, + "gearing": 19027, + "gears": 21147, + "geaux": 36313, + "gecko": 38616, + "ged": 17252, + "ged": 3480, + "geddon": 31720, + "gedly": 13991, + "gee": 9806, + "gee": 9071, + "geek": 17920, + "geek": 7135, + "geeks": 20110, + "geeky": 47332, + "geel": 25906, + "geelong": 34555, + "gees": 38088, + "geese": 26413, + "geez": 42394, + "geh": 30320, + "geist": 38290, + "gel": 7343, + "gel": 5697, + "gelato": 29577, + "gels": 42552, + "gely": 14637, + "gem": 14261, + "gem": 7613, + "gement": 19495, + "gemini": 23086, + "gemma": 23952, + "gems": 14355, + "gemstone": 27747, + "gemstones": 43972, + "gen": 1024, + "gen": 3278, + "gence": 16088, + "gency": 5245, + "gend": 33247, + "gender": 22976, + "gender": 5906, + "gendere": 35824, + "genderequality": 43338, + "gene": 5822, + "gene": 7962, + "genealo": 24142, + "genealogy": 29381, + "gener": 1832, + "general": 20576, + "general": 3658, + "generally": 19256, + "generals": 30296, + "generate": 16896, + "generated": 19450, + "generates": 33938, + "generating": 23882, + "generation": 41211, + "generation": 4883, + "generational": 34506, + "generations": 12247, + "generative": 29472, + "generator": 19399, + "generators": 41917, + "generic": 26978, + "generosity": 23015, + "generous": 12570, + "generously": 35113, + "genes": 19683, + "genesis": 13518, + "genetic": 47746, + "genetic": 13578, + "genetically": 36745, + "genetics": 18276, + "geneva": 14799, + "genevie": 41633, + "genevieve": 46584, + "geni": 22334, + "genic": 15750, + "genie": 24221, + "genital": 32960, + "genius": 8235, + "geniuses": 41406, + "geno": 41544, + "geno": 46776, + "genoa": 43993, + "genoci": 14687, + "genocide": 15903, + "genome": 23991, + "genomic": 44371, + "genomics": 26227, + "genre": 14249, + "genres": 30340, + "gens": 17449, + "gent": 3685, + "gent": 7139, + "gente": 34325, + "gentle": 7262, + "gentle": 13577, + "gentleman": 13293, + "gentlemen": 11692, + "gently": 17187, + "gento": 28320, + "gentri": 41148, + "gentry": 47225, + "gents": 18862, + "genu": 9182, + "genuine": 12184, + "genuinely": 20006, + "genus": 38161, + "geny": 35323, + "geo": 5038, + "geo": 11604, + "geocaching": 47908, + "geof": 20629, + "geoff": 33697, + "geoff": 20386, + "geoffrey": 29520, + "geograph": 45920, + "geographic": 22635, + "geographical": 39380, + "geography": 17101, + "geological": 38380, + "geology": 21578, + "geom": 46135, + "geome": 12958, + "geometric": 22419, + "geometry": 21731, + "geon": 20844, + "geon": 7295, + "geons": 15914, + "geopol": 39758, + "geor": 2549, + "georg": 43126, + "george": 8377, + "george": 3296, + "georges": 25042, + "georgetown": 22970, + "georgie": 42115, + "georgina": 43892, + "geospatial": 46238, + "geothermal": 38413, + "geous": 3068, + "ger": 1291, + "ger": 1502, + "gera": 48867, + "gerald": 29901, + "gerald": 13269, + "gerard": 35979, + "gerard": 20826, + "gerber": 45058, + "gered": 40179, + "geri": 41664, + "geri": 46214, + "gering": 24077, + "germain": 38786, + "german": 14972, + "german": 4710, + "germans": 28400, + "germany": 4464, + "germin": 44721, + "germs": 47731, + "geronimo": 45171, + "gerrard": 26538, + "gerry": 29825, + "gerry": 23026, + "gers": 3314, + "gertrude": 46950, + "gervais": 36527, + "gery": 32845, + "ges": 3316, + "gest": 11843, + "gest": 2033, + "gesture": 21780, + "gestures": 43524, + "get": 5670, + "get": 779, + "geta": 13155, + "getaway": 16131, + "gether": 27224, + "getic": 20661, + "getin": 25822, + "getit": 44891, + "getit": 48315, + "getoutside": 35644, + "gets": 39448, + "gets": 2127, + "gett": 6647, + "gett": 27965, + "gettable": 15620, + "gette": 29800, + "gettin": 13428, + "getting": 30885, + "getting": 1500, + "getty": 31185, + "getty": 13965, + "gettys": 35189, + "gettysburg": 37062, + "getyour": 42159, + "gey": 29289, + "gf": 28953, + "gf": 10846, + "gfriend": 35245, + "gfs": 37553, + "gg": 1129, + "gg": 3286, + "gga": 26003, + "ggan": 25626, + "gge": 21521, + "gge": 31659, + "gged": 6095, + "gger": 12367, + "gger": 3493, + "ggers": 7480, + "ggg": 20143, + "gggg": 33513, + "ggi": 21662, + "ggin": 17160, + "gging": 4966, + "ggins": 12444, + "ggle": 34981, + "ggle": 11430, + "ggled": 46328, + "ggles": 14703, + "ggling": 16523, + "ggly": 39407, + "ggs": 4797, + "ggy": 24935, + "ggy": 6476, + "gh": 583, + "gh": 790, + "gha": 10010, + "gha": 25183, + "gham": 21456, + "ghan": 18945, + "ghan": 6624, + "ghana": 30330, + "ghana": 9731, + "ghanaian": 34223, + "ghani": 36699, + "ghar": 37334, + "ghar": 36973, + "ghat": 43989, + "ghaz": 37493, + "ghc": 42139, + "ghe": 10754, + "ghe": 28561, + "ghead": 40783, + "ghee": 34794, + "gher": 21542, + "gher": 14796, + "ghet": 18447, + "ghetti": 17485, + "ghetto": 22403, + "ghi": 22436, + "ghi": 22279, + "ghibli": 40555, + "ghj": 38439, + "ghlin": 24131, + "gho": 4307, + "ghorn": 38094, + "ghosh": 43279, + "ghoshal": 49134, + "ghost": 11417, + "ghost": 7108, + "ghostbusters": 25462, + "ghostly": 44901, + "ghosts": 16737, + "ghou": 35843, + "ghoul": 45302, + "ghouse": 38238, + "ghs": 14157, + "ght": 1413, + "ght": 630, + "ghted": 4963, + "ghter": 2427, + "ghters": 12994, + "ghtful": 8334, + "ghting": 3019, + "ghtly": 6993, + "ghtning": 39740, + "ghton": 16353, + "ghts": 1259, + "ghty": 20968, + "ghty": 5866, + "ghu": 25808, + "ghue": 45675, + "ghyun": 25010, + "ghz": 24325, + "gi": 707, + "gi": 4478, + "gia": 8864, + "giac": 35444, + "giam": 39623, + "gian": 17274, + "gian": 12866, + "gianni": 46752, + "giant": 23668, + "giant": 4687, + "giants": 7076, + "giar": 34241, + "gib": 9816, + "gibb": 18964, + "gibbons": 31974, + "gibbs": 26488, + "gibility": 33297, + "gible": 13159, + "gibr": 20206, + "gibraltar": 23988, + "gibson": 37420, + "gibson": 12178, + "gic": 27900, + "gic": 2570, + "gical": 32973, + "gically": 26320, + "gid": 36774, + "gid": 21413, + "giddy": 40894, + "gideon": 43867, + "gidi": 30603, + "gie": 11459, + "gie": 3991, + "gier": 28974, + "gies": 5505, + "gif": 11363, + "gif": 11677, + "gifford": 47850, + "gifs": 37643, + "gift": 20569, + "gift": 2733, + "gifted": 15110, + "giftide": 20152, + "giftideas": 23487, + "gifting": 39546, + "gifts": 5836, + "gig": 26981, + "gig": 7471, + "gigab": 34530, + "gigan": 24104, + "gigantic": 31507, + "giggle": 36426, + "giggles": 42731, + "giggs": 44692, + "gigi": 44106, + "gigi": 26171, + "gigs": 20316, + "gil": 3997, + "gil": 10088, + "gila": 46952, + "gilbert": 14154, + "gilded": 44341, + "giles": 24802, + "gill": 14280, + "gill": 12003, + "gille": 29610, + "gilles": 39590, + "gillespie": 36242, + "gillette": 38603, + "gilli": 13695, + "gillian": 28753, + "gills": 48851, + "gilmore": 27603, + "gilt": 44378, + "gim": 31284, + "gimm": 40692, + "gimme": 21525, + "gin": 3374, + "gin": 4941, + "gina": 15604, + "gine": 27482, + "ging": 10829, + "ging": 3905, + "ginger": 16287, + "ginger": 9718, + "gingerbread": 23692, + "gini": 35768, + "gino": 36521, + "gins": 18328, + "gio": 16329, + "gio": 8050, + "gion": 41226, + "gior": 14920, + "giorgio": 33271, + "giorno": 33310, + "gios": 41927, + "gious": 14419, + "giov": 21404, + "giovanni": 26574, + "gipp": 41351, + "gir": 1077, + "gir": 25481, + "gira": 16949, + "giraffe": 22826, + "giri": 31709, + "girl": 3914, + "girl": 1611, + "girlfriend": 8217, + "girlfriends": 30736, + "girlpower": 37433, + "girls": 15480, + "girls": 1917, + "girly": 29605, + "giro": 39664, + "giro": 26454, + "girona": 47842, + "giroud": 41177, + "gis": 16266, + "gis": 12773, + "gist": 21241, + "git": 16060, + "git": 20918, + "gita": 40838, + "github": 31196, + "giu": 17931, + "giuli": 29762, + "giuliani": 47739, + "giuse": 29385, + "giuseppe": 33563, + "give": 4120, + "give": 1781, + "giveaway": 5310, + "giveaways": 18974, + "giveback": 41385, + "given": 33323, + "given": 4302, + "givenchy": 38245, + "giver": 43339, + "gives": 3926, + "giveup": 35485, + "giving": 14673, + "giving": 2339, + "givingback": 49300, + "givingtuesday": 23556, + "giz": 29237, + "gk": 38953, + "gk": 18719, + "gl": 1849, + "gl": 14751, + "gla": 1523, + "gla": 36904, + "glaci": 14924, + "glacial": 40782, + "glacier": 19282, + "glaciers": 42528, + "glad": 20841, + "glad": 4761, + "glades": 37432, + "gladi": 21742, + "gladiator": 38477, + "gladiators": 41087, + "gladly": 41598, + "gladys": 43168, + "glam": 8738, + "glam": 16905, + "glamorous": 22896, + "glamour": 42876, + "glamour": 17499, + "glamping": 46167, + "glan": 40482, + "glan": 45844, + "glance": 26557, + "gland": 41441, + "glar": 48535, + "glar": 41702, + "glare": 46035, + "glas": 29935, + "glas": 43654, + "glasgo": 6757, + "glasgow": 29990, + "glasgow": 7363, + "glass": 16305, + "glass": 3313, + "glasses": 6116, + "glaston": 26848, + "glastonbury": 28233, + "glau": 39171, + "glaze": 28112, + "glazed": 24122, + "gle": 7166, + "gle": 2865, + "glee": 32379, + "glee": 21614, + "glen": 6158, + "glen": 11049, + "glend": 38332, + "glendale": 33043, + "glenn": 32004, + "glenn": 12861, + "gler": 34649, + "gley": 21998, + "gli": 5896, + "gli": 28791, + "glia": 22217, + "glide": 37321, + "glider": 41636, + "glimp": 12888, + "glimpse": 13817, + "glio": 29785, + "glit": 21079, + "glitch": 29563, + "glitter": 16528, + "glitz": 44542, + "glo": 1721, + "glo": 30474, + "glob": 13363, + "global": 6707, + "global": 2779, + "globalgoals": 33211, + "globalhealth": 46751, + "globalization": 47680, + "globally": 17775, + "globalwarming": 46017, + "globe": 19436, + "globe": 9368, + "globes": 38085, + "glock": 38818, + "glomer": 43689, + "gloom": 48594, + "gloomy": 32199, + "glori": 7270, + "gloria": 19244, + "glorious": 9171, + "glory": 36107, + "glory": 7285, + "glos": 40633, + "gloss": 38258, + "gloss": 22014, + "glossy": 29802, + "glou": 15989, + "gloucester": 28133, + "gloucester": 23835, + "gloucestershire": 33789, + "glove": 16078, + "glover": 21594, + "gloves": 12363, + "glow": 30472, + "glow": 10111, + "glowing": 18437, + "glows": 48107, + "glu": 5952, + "glu": 32281, + "glucose": 34642, + "glue": 22103, + "glued": 38135, + "gluten": 15482, + "gluten": 15524, + "glutenfree": 16138, + "gly": 13027, + "glycer": 48914, + "gm": 18743, + "gm": 5918, + "gma": 18155, + "gmail": 11119, + "gman": 41043, + "gman": 36936, + "gmb": 35934, + "gmb": 31799, + "gmbh": 46877, + "gmc": 27257, + "gmo": 23486, + "gms": 36987, + "gmt": 13803, + "gn": 2455, + "gn": 9831, + "gna": 23009, + "gnation": 45912, + "gne": 25407, + "gni": 5104, + "gnment": 25110, + "gno": 23376, + "gno": 43686, + "gnocchi": 48299, + "gnome": 33643, + "gnon": 20561, + "go": 650, + "go": 861, + "goa": 14399, + "goal": 9003, + "goal": 3321, + "goalie": 20723, + "goalkeeper": 16601, + "goals": 3295, + "goalscorer": 43547, + "goaltender": 44151, + "goat": 34082, + "goat": 9530, + "goats": 18393, + "gob": 29559, + "gobeavs": 48285, + "goblin": 26223, + "goblue": 25232, + "gobucks": 29175, + "gocougs": 34202, + "god": 4190, + "god": 1731, + "godawgs": 40436, + "godbless": 46616, + "godbless": 44007, + "godd": 16589, + "goddamn": 28495, + "goddard": 37827, + "goddess": 10808, + "godfather": 26222, + "godfrey": 40148, + "godis": 38521, + "godly": 42438, + "gods": 33620, + "gods": 10328, + "goducks": 35889, + "godzilla": 23369, + "goe": 22084, + "goers": 27784, + "goes": 43581, + "goes": 2635, + "gof": 17537, + "goff": 34399, + "goftheday": 39360, + "gofund": 34445, + "gofundme": 34686, + "gog": 42949, + "goggles": 31027, + "gogh": 19697, + "gogo": 22688, + "gogreen": 36279, + "gohawks": 34884, + "goi": 24917, + "goin": 13939, + "going": 25787, + "going": 1245, + "goku": 29550, + "gol": 1537, + "gol": 18257, + "gola": 41090, + "gold": 4999, + "gold": 2209, + "goldberg": 25161, + "goldcoast": 34634, + "golden": 10763, + "golden": 3878, + "goldeng": 20650, + "goldenglobes": 26842, + "goldfish": 40293, + "goldie": 42805, + "goldman": 27164, + "golds": 30526, + "golds": 40283, + "goldsmith": 40214, + "gole": 41297, + "golf": 9096, + "golf": 3096, + "golfclub": 45742, + "golfer": 24579, + "golfers": 28441, + "golfing": 31379, + "goli": 29265, + "goliath": 41602, + "gom": 7051, + "goma": 46198, + "gomes": 39128, + "gomez": 16433, + "gon": 1854, + "gon": 3379, + "gona": 34835, + "gone": 35135, + "gone": 3601, + "gong": 28486, + "gonna": 2562, + "gonz": 10587, + "gonzaga": 36241, + "gonzale": 17512, + "gonzales": 31265, + "gonzalez": 18198, + "goo": 1381, + "goo": 17882, + "good": 2185, + "good": 886, + "goodbye": 6968, + "goodday": 46284, + "goode": 42076, + "goodfood": 46844, + "goodfriday": 40360, + "goodie": 29213, + "goodies": 13308, + "goodluck": 19718, + "goodman": 24146, + "goodmorning": 14421, + "goodness": 10531, + "goodnight": 8540, + "goodreads": 31629, + "goods": 9340, + "goodtimes": 22570, + "goodvibes": 43146, + "goodwill": 24902, + "goodwin": 28080, + "goodwood": 30008, + "goody": 35937, + "goodyear": 42858, + "goofy": 26879, + "goog": 18581, + "google": 12195, + "google": 3460, + "googled": 40345, + "googleplay": 37309, + "goon": 15267, + "goons": 30440, + "goooo": 35876, + "goooo": 48957, + "goose": 21445, + "goose": 13822, + "goosebumps": 32254, + "gop": 18942, + "gop": 6250, + "gopack": 46995, + "gopackgo": 47719, + "gopal": 47268, + "gopdebate": 39806, + "gopher": 47750, + "gopher": 48905, + "gophers": 31957, + "gopro": 17511, + "gor": 1747, + "gor": 29827, + "gordo": 47707, + "gordon": 20485, + "gordon": 8244, + "gore": 30311, + "gore": 17872, + "gorg": 46815, + "gorge": 35548, + "gorge": 20038, + "gorgeous": 3241, + "gori": 12461, + "goria": 43359, + "gorilla": 37910, + "gorilla": 21994, + "gorman": 35741, + "goro": 44977, + "gory": 7160, + "gos": 20517, + "gos": 5693, + "gosh": 15395, + "gosling": 35320, + "gosp": 9617, + "gospel": 11313, + "goss": 39734, + "goss": 36924, + "gossi": 15684, + "gossip": 18963, + "got": 10125, + "got": 1005, + "gota": 36693, + "gotcha": 43275, + "gote": 49345, + "goth": 48465, + "goth": 20437, + "gotham": 46123, + "gotham": 18299, + "gothic": 15426, + "goti": 9497, + "goto": 39715, + "gots": 35215, + "gott": 5089, + "gott": 36466, + "gotta": 4633, + "gotten": 5889, + "gotti": 41881, + "gotv": 36089, + "gou": 10520, + "gou": 36555, + "gouache": 43314, + "goul": 33187, + "gould": 31087, + "gour": 13580, + "gourmet": 19111, + "gov": 4022, + "gov": 4564, + "gove": 36997, + "govegan": 38886, + "gover": 10471, + "gover": 16759, + "govern": 2351, + "govern": 32404, + "governance": 13386, + "governing": 30946, + "government": 3149, + "governmental": 42609, + "governments": 19582, + "governor": 17459, + "governor": 6630, + "governors": 26881, + "govin": 42451, + "govt": 5345, + "govuk": 28830, + "gow": 21885, + "gow": 33788, + "gowan": 31307, + "gower": 43448, + "gown": 13719, + "gowns": 38029, + "goyal": 35105, + "gp": 19329, + "gp": 5051, + "gpa": 24098, + "gps": 13639, + "gpu": 38561, + "gq": 40286, + "gq": 31324, + "gr": 709, + "gr": 6062, + "gra": 782, + "gra": 15276, + "grab": 4646, + "grabbed": 22856, + "grabbing": 26440, + "grabs": 17076, + "grac": 11323, + "grace": 13225, + "grace": 5142, + "graced": 31894, + "graceful": 25242, + "graces": 38629, + "graci": 11174, + "gracias": 16463, + "gracie": 23235, + "gracing": 37263, + "gracious": 29044, + "grad": 19869, + "grad": 7291, + "gradable": 41529, + "grade": 45435, + "grade": 3394, + "graded": 13823, + "grader": 23930, + "graders": 10930, + "grades": 10838, + "gradient": 36885, + "grading": 19016, + "grads": 17811, + "gradu": 3230, + "gradual": 45210, + "gradually": 32192, + "graduate": 6675, + "graduated": 15128, + "graduates": 12236, + "graduating": 14819, + "graduation": 8060, + "grady": 33980, + "graeme": 30192, + "graf": 46478, + "graf": 39765, + "graff": 10656, + "graffiti": 11676, + "graft": 32698, + "grafton": 47347, + "graham": 19805, + "graham": 7711, + "grail": 37184, + "grain": 44003, + "grain": 12109, + "grains": 25791, + "gral": 25631, + "gram": 2949, + "gram": 2338, + "grammar": 16077, + "grammy": 15388, + "grammys": 18121, + "grams": 6294, + "gran": 3892, + "gran": 14493, + "granada": 31172, + "grand": 3058, + "grand": 2991, + "grandad": 29148, + "grandchildren": 36856, + "granddaughter": 29460, + "grande": 37514, + "grande": 10757, + "grandes": 36382, + "grandfather": 15346, + "grandma": 10525, + "grandmother": 17469, + "grandpa": 14582, + "grandparents": 21311, + "grandprix": 39358, + "grandson": 20766, + "grandstand": 43172, + "grange": 45027, + "grange": 23850, + "granger": 42968, + "granite": 18813, + "grann": 45585, + "granny": 22710, + "granola": 34271, + "grant": 18682, + "grant": 5442, + "granted": 14156, + "granth": 41283, + "grants": 15123, + "grape": 19131, + "grape": 15959, + "grapefruit": 28347, + "grapes": 18580, + "grapevine": 47619, + "graph": 1349, + "graph": 4407, + "graphene": 38387, + "grapher": 14987, + "graphers": 32088, + "graphic": 15653, + "graphic": 4245, + "graphical": 20878, + "graphicdesign": 21907, + "graphics": 9492, + "graphies": 40164, + "graphite": 29447, + "graphs": 24670, + "graphy": 4897, + "grapp": 30843, + "gras": 31517, + "gras": 17584, + "grasp": 34975, + "grass": 11584, + "grass": 5922, + "grasses": 46807, + "grasshopper": 48894, + "grassi": 42294, + "grasso": 34808, + "grassroots": 21991, + "grassy": 44140, + "grat": 9221, + "grate": 32463, + "grateful": 45659, + "grateful": 5730, + "grati": 36402, + "gratis": 33638, + "gratitude": 12614, + "grav": 20663, + "grave": 16606, + "grave": 9981, + "gravel": 27054, + "graves": 17665, + "graveyard": 31176, + "gravit": 26150, + "gravitational": 45268, + "gravity": 47426, + "gravity": 15160, + "gravy": 21225, + "gray": 12703, + "gray": 7048, + "grays": 46848, + "grayson": 45831, + "grayson": 25471, + "grazi": 42427, + "grazie": 38698, + "grazing": 29889, + "grc": 44069, + "gre": 689, + "gre": 17878, + "grease": 24132, + "greasy": 44376, + "great": 3265, + "great": 830, + "greate": 31930, + "greater": 32725, + "greater": 7033, + "greatest": 39080, + "greatest": 4153, + "greatly": 13978, + "greatness": 14189, + "greats": 21855, + "greaves": 42350, + "greco": 39103, + "gree": 9987, + "gree": 30774, + "greece": 6965, + "greed": 26147, + "greedy": 33301, + "greek": 23844, + "greek": 6842, + "greeks": 35866, + "green": 2762, + "green": 1901, + "greenberg": 46662, + "greene": 16383, + "greener": 31169, + "greenery": 42493, + "greenfield": 39924, + "greeng": 42077, + "greenhouse": 20819, + "greening": 48673, + "greenland": 27345, + "greenpeace": 44755, + "greens": 10235, + "greensboro": 33436, + "greenville": 25156, + "greenway": 35205, + "greenwich": 18658, + "greenwood": 25782, + "greer": 34345, + "greet": 11042, + "greet": 11997, + "greeted": 24546, + "greeting": 17754, + "greetings": 11569, + "greets": 25464, + "greg": 6894, + "greg": 7943, + "gregation": 20131, + "gregg": 39422, + "gregg": 22929, + "gregor": 33856, + "gregor": 16177, + "gregory": 16253, + "gren": 13941, + "gren": 20119, + "grenade": 33679, + "grenfell": 42107, + "gres": 39670, + "gress": 2752, + "gret": 30041, + "greta": 33443, + "gretchen": 45516, + "grette": 38774, + "grew": 10451, + "grey": 9190, + "grey": 5046, + "greyhound": 27363, + "greyhounds": 45718, + "greys": 44311, + "greysanatomy": 36833, + "gri": 2169, + "gri": 18484, + "grid": 29067, + "grid": 9882, + "gridi": 41063, + "gridiron": 47786, + "grids": 46500, + "grief": 21058, + "grier": 22016, + "griev": 36400, + "grieving": 42383, + "griez": 47962, + "griezmann": 48396, + "griff": 17855, + "griff": 35551, + "griffi": 28676, + "griffin": 46612, + "griffin": 13161, + "griffith": 24375, + "griffiths": 34182, + "gril": 49091, + "grill": 44083, + "grill": 9519, + "grille": 34748, + "grilled": 10691, + "grilling": 28324, + "grills": 39464, + "grim": 20383, + "grim": 23635, + "grime": 37101, + "grimes": 25057, + "grimm": 27865, + "grims": 34861, + "grimsby": 41513, + "grin": 11033, + "grin": 28697, + "grinch": 40527, + "grind": 25730, + "grind": 11810, + "grinder": 31733, + "grinding": 21541, + "gring": 40135, + "grip": 15521, + "gripping": 34567, + "grips": 27819, + "gris": 29150, + "grit": 22037, + "grit": 22087, + "grits": 44307, + "gritty": 33704, + "grizz": 14877, + "grizz": 44088, + "grizzlies": 25594, + "grizzly": 29676, + "grl": 48005, + "gro": 1464, + "gro": 12691, + "grocer": 11633, + "groceries": 32409, + "grocery": 13826, + "grom": 45284, + "gron": 22345, + "groningen": 45639, + "groo": 9015, + "groom": 39883, + "groom": 22813, + "grooming": 25575, + "groot": 37708, + "groove": 39484, + "groove": 17680, + "grooves": 43954, + "groovy": 30143, + "gros": 26834, + "gros": 32639, + "gross": 31080, + "gross": 11541, + "grosven": 46911, + "grote": 47207, + "grotto": 45260, + "grou": 1582, + "groun": 45110, + "ground": 9558, + "ground": 2461, + "groundbreaking": 21006, + "grounded": 27799, + "grounds": 8454, + "groundwater": 39457, + "group": 19045, + "group": 1771, + "groupe": 47654, + "groups": 6776, + "grouse": 36327, + "grove": 31756, + "grove": 7463, + "grover": 31345, + "groves": 27306, + "grow": 3179, + "grow": 4559, + "grower": 44925, + "growers": 25689, + "growing": 28429, + "growing": 4425, + "growingup": 43433, + "growler": 47096, + "grown": 41762, + "grown": 7120, + "grows": 13352, + "growth": 17925, + "growth": 4026, + "growthhacking": 25963, + "grp": 27321, + "grt": 28557, + "gru": 5957, + "grub": 34019, + "grue": 42047, + "gruesome": 47111, + "grum": 45454, + "grump": 49015, + "grumpy": 23610, + "grun": 16203, + "grunge": 33745, + "gry": 16140, + "gry": 5364, + "gs": 25818, + "gs": 1345, + "gsa": 40433, + "gsc": 47751, + "gshore": 43392, + "gsm": 32181, + "gsp": 49173, + "gst": 22239, + "gt": 16151, + "gt": 4725, + "gta": 14826, + "gta": 15338, + "gtaonline": 27292, + "gtav": 27283, + "gti": 39954, + "gto": 39071, + "gtr": 33407, + "gts": 37338, + "gtx": 35230, + "gu": 700, + "gu": 12916, + "gua": 23751, + "guacam": 37477, + "guacamole": 40115, + "guad": 22966, + "guadal": 46097, + "guadalu": 36994, + "guadalupe": 38360, + "guam": 37325, + "guan": 44191, + "guan": 42406, + "guang": 27019, + "guangzhou": 37857, + "guar": 4119, + "guaran": 9242, + "guarantee": 17421, + "guaranteed": 14731, + "guarantees": 40154, + "guard": 30776, + "guard": 4901, + "guarded": 40602, + "guardi": 12008, + "guardia": 43628, + "guardian": 23713, + "guardian": 9498, + "guardians": 21479, + "guarding": 24966, + "guardiola": 32100, + "guards": 12810, + "guatem": 19423, + "guatemala": 21670, + "guay": 48591, + "guay": 24247, + "gubernat": 41400, + "gubernatorial": 41618, + "gucci": 16779, + "gud": 48061, + "gud": 22378, + "gue": 2030, + "gue": 2917, + "gued": 38893, + "guel": 23146, + "guelph": 27660, + "guer": 10391, + "guern": 29277, + "guernsey": 33982, + "guerra": 38215, + "guerrero": 31967, + "guerrilla": 36715, + "gues": 39971, + "gues": 12601, + "guess": 35506, + "guess": 3135, + "guessed": 28005, + "guesses": 30623, + "guessing": 21891, + "guest": 27349, + "guest": 3781, + "guests": 6212, + "guet": 36797, + "guetta": 45904, + "guez": 12313, + "gug": 31358, + "guggen": 35086, + "guggenheim": 37135, + "gui": 2587, + "gui": 25746, + "guid": 11437, + "guidance": 12508, + "guide": 21845, + "guide": 3555, + "guided": 13194, + "guidelines": 16591, + "guides": 14375, + "guiding": 22759, + "guido": 41818, + "guil": 5008, + "guild": 19755, + "guild": 16597, + "guildford": 34450, + "guildhall": 47224, + "guillau": 41123, + "guillaume": 45394, + "guiller": 33660, + "guillermo": 39524, + "guilt": 26354, + "guilty": 9761, + "guin": 13284, + "guin": 47863, + "guine": 13759, + "guinea": 18537, + "guinness": 16648, + "guire": 18209, + "guise": 42024, + "guit": 3759, + "guitar": 21746, + "guitar": 5084, + "guitarist": 13035, + "guitars": 15023, + "guj": 34935, + "gujar": 12698, + "gujarat": 14714, + "guk": 20280, + "gul": 5530, + "gul": 21350, + "gula": 27426, + "gular": 34969, + "gulf": 22101, + "gulf": 11279, + "gull": 48764, + "gull": 28778, + "gulls": 37501, + "gully": 46112, + "gum": 22041, + "gum": 11235, + "gumb": 40147, + "gumbo": 47126, + "gummy": 34276, + "gums": 46609, + "gun": 2748, + "gun": 3496, + "guna": 43333, + "gundam": 26087, + "gundy": 21162, + "gunman": 32743, + "gunmen": 44738, + "gunn": 27473, + "gunna": 24002, + "gunnar": 45301, + "gunner": 35285, + "gunners": 37788, + "guns": 7591, + "gunsense": 44781, + "gunshot": 49250, + "gunsn": 49028, + "gup": 38632, + "gup": 47335, + "gupta": 15905, + "gur": 3218, + "gur": 30224, + "gura": 46836, + "gurgaon": 33240, + "guri": 43888, + "gurl": 25445, + "gurmee": 35482, + "gurmeetramrahim": 36549, + "guru": 18629, + "guru": 10800, + "gurudev": 48647, + "gus": 8018, + "gust": 24629, + "gusta": 23024, + "gusta": 44196, + "gustav": 32062, + "gustav": 37921, + "gustave": 43170, + "gustavo": 45943, + "gusto": 37937, + "gusts": 20896, + "gusty": 27589, + "gut": 24780, + "gut": 13486, + "guter": 44963, + "guterres": 48738, + "guth": 31696, + "guthrie": 33164, + "gutier": 32773, + "gutierrez": 33739, + "guts": 25983, + "gutted": 26524, + "gutter": 40537, + "guwa": 43063, + "guwahati": 45045, + "guy": 10008, + "guy": 2149, + "guyana": 45215, + "guyen": 28031, + "guys": 43588, + "guys": 1791, + "guyz": 48170, + "guzman": 37960, + "gv": 15462, + "gv": 17336, + "gw": 7172, + "gw": 15717, + "gwen": 32165, + "gwen": 24182, + "gwin": 43005, + "gwy": 32226, + "gwyne": 36923, + "gx": 40227, + "gy": 2168, + "gy": 1164, + "gya": 43214, + "gyan": 43814, + "gye": 21728, + "gyllen": 49348, + "gym": 9902, + "gym": 5222, + "gymna": 13517, + "gymnasium": 42847, + "gymnast": 42658, + "gymnastics": 20116, + "gyn": 39603, + "gyne": 45836, + "gyp": 40053, + "gypsy": 22354, + "gypt": 41921, + "gz": 45937, + "gz": 35841, + "gö": 40778, + "gü": 31907, + "h": 71, + "h": 327, + "ha": 560, + "ha": 1429, + "haa": 26814, + "haal": 35869, + "haan": 36284, + "haar": 45247, + "haar": 35859, + "haas": 27443, + "haasan": 26601, + "hab": 20573, + "hab": 20002, + "haban": 46225, + "haber": 44737, + "habit": 8491, + "habit": 17215, + "habitat": 11747, + "habitats": 35344, + "habits": 14540, + "habs": 27489, + "hac": 20343, + "hace": 43623, + "haci": 40674, + "hack": 6610, + "hack": 11182, + "hackathon": 25182, + "hacked": 19575, + "hacker": 22376, + "hackers": 21498, + "hacking": 12939, + "hackney": 48811, + "hackney": 24928, + "hacks": 19965, + "had": 10660, + "had": 1100, + "hadi": 39058, + "hadid": 26415, + "hadith": 46907, + "hadley": 44995, + "hadn": 21480, + "hadoop": 43868, + "hae": 30723, + "hae": 27193, + "hafi": 39914, + "hag": 26855, + "hag": 43207, + "hagan": 47489, + "hagen": 14664, + "hager": 48773, + "hagg": 26324, + "hague": 28988, + "hah": 18108, + "hah": 13680, + "haha": 1913, + "haha": 3060, + "hahah": 27253, + "hahah": 15441, + "hahaha": 4722, + "hahahah": 37513, + "hahahah": 20096, + "hahahaha": 8058, + "hahahaha": 9501, + "hahahahah": 33334, + "hahahahaha": 16347, + "hahahahahaha": 26487, + "hahahahahahaha": 43653, + "hahahahahahahaha": 36126, + "hahahha": 49205, + "hahn": 35596, + "hai": 8734, + "hai": 5234, + "haider": 42200, + "haiku": 19542, + "hail": 15272, + "hail": 8634, + "hailed": 44604, + "hailey": 27703, + "hailing": 47288, + "hails": 32571, + "hailstate": 35063, + "hain": 23861, + "hair": 4658, + "hair": 2225, + "haircare": 43682, + "haircut": 14711, + "hairdresser": 47468, + "haired": 27202, + "hairs": 27951, + "hairstyle": 22324, + "hairstyles": 40627, + "hairy": 26513, + "haiti": 17368, + "haitian": 37577, + "haj": 27885, + "haj": 43191, + "haji": 41889, + "hajj": 35576, + "hak": 25142, + "hak": 40671, + "haka": 44011, + "hake": 41663, + "hal": 1296, + "hal": 8708, + "hala": 25918, + "halal": 34216, + "halam": 29061, + "halamadrid": 31132, + "halder": 32201, + "hale": 37038, + "hale": 14701, + "halen": 39204, + "halep": 49017, + "haley": 37330, + "haley": 16839, + "half": 7453, + "half": 2349, + "halftime": 13742, + "halfway": 16736, + "hali": 9860, + "hali": 43030, + "halibut": 49030, + "halifax": 13411, + "hall": 6850, + "hall": 2140, + "halla": 29569, + "halle": 27763, + "halle": 32239, + "hallelujah": 36993, + "halli": 32665, + "hallmark": 31040, + "hallmark": 32053, + "hallmarkchannel": 36840, + "hallo": 3463, + "halloffame": 48578, + "halloween": 28537, + "halloween": 3739, + "halls": 18052, + "hallucin": 35385, + "hallway": 26845, + "halo": 33331, + "halo": 11918, + "halsey": 34256, + "halt": 25640, + "halter": 47194, + "halton": 45445, + "ham": 1522, + "ham": 1714, + "hama": 17944, + "hamas": 14818, + "hamburg": 18409, + "hamburger": 33928, + "hamid": 32377, + "hamil": 6725, + "hamill": 45784, + "hamill": 48729, + "hamillhimself": 47324, + "hamilton": 22448, + "hamilton": 7684, + "hamlet": 27722, + "hamlin": 49326, + "hamm": 46110, + "hammer": 15331, + "hammer": 9401, + "hammered": 37251, + "hammers": 35649, + "hammersmith": 42127, + "hammock": 33682, + "hammond": 21761, + "hamont": 18518, + "hamp": 6665, + "hamper": 27692, + "hampshire": 16006, + "hampstead": 37340, + "hampton": 36582, + "hampton": 12285, + "hamptons": 42415, + "hamr": 47979, + "hamradio": 36712, + "hams": 25619, + "hamster": 33313, + "hamstring": 39990, + "hamza": 45762, + "han": 1545, + "han": 3565, + "hana": 16801, + "hand": 1722, + "hand": 2463, + "handbag": 22654, + "handbags": 35667, + "handball": 27988, + "handbook": 25147, + "handcrafted": 22185, + "handed": 10881, + "handedly": 48656, + "handel": 40072, + "handful": 23725, + "handheld": 26812, + "handic": 17812, + "handicap": 27063, + "handicapp": 42349, + "handing": 19196, + "handle": 43681, + "handle": 7245, + "handled": 26824, + "handler": 29097, + "handles": 22124, + "handling": 14071, + "handmade": 18054, + "handmade": 6737, + "handmadehour": 25724, + "handover": 46922, + "hands": 3500, + "handshake": 38418, + "handsome": 7438, + "handwriting": 29986, + "handwritten": 35192, + "handy": 13479, + "hane": 28411, + "hang": 3351, + "hang": 5592, + "hangar": 33439, + "hanged": 40807, + "hanger": 28905, + "hangin": 22670, + "hanging": 4850, + "hangout": 17572, + "hangover": 20755, + "hangs": 21785, + "hani": 39944, + "hani": 18374, + "hank": 35993, + "hank": 17655, + "hanks": 29943, + "hanley": 47284, + "hann": 5584, + "hanna": 10075, + "hannah": 18622, + "hannah": 9142, + "hannel": 43477, + "hanni": 19493, + "hannibal": 25149, + "hannity": 24569, + "hannover": 39976, + "hanoi": 36134, + "hanover": 33246, + "hans": 35172, + "hans": 16628, + "hansen": 19729, + "hanson": 24602, + "hant": 40641, + "hanuk": 32774, + "hanukkah": 34247, + "hanuman": 46975, + "hao": 27184, + "hap": 44981, + "hap": 47988, + "happ": 784, + "happen": 21486, + "happen": 4506, + "happened": 4402, + "happening": 4284, + "happeningnow": 43107, + "happenings": 41998, + "happens": 4988, + "happier": 14118, + "happiest": 13811, + "happily": 17316, + "happiness": 5096, + "happy": 2952, + "happy": 900, + "happybirthday": 9651, + "happybirthday": 12207, + "happydays": 25106, + "happye": 33922, + "happyeaster": 38745, + "happyfathersday": 43534, + "happyfriday": 33340, + "happyhalloween": 28750, + "happyholidays": 32186, + "happyhour": 32036, + "happymonday": 47364, + "happymothersday": 42425, + "happynewyear": 18655, + "happythanksgiving": 40593, + "happyvalentinesday": 42403, + "haps": 9114, + "haq": 32445, + "har": 915, + "har": 5888, + "hara": 10367, + "haram": 35732, + "haram": 22950, + "haran": 27921, + "harare": 43562, + "haras": 26644, + "harass": 16481, + "harassed": 43067, + "harassment": 16641, + "harat": 28984, + "harb": 5856, + "harbaugh": 45220, + "harbor": 40686, + "harbor": 10202, + "harbour": 35430, + "harbour": 10011, + "harcourt": 48093, + "hard": 3312, + "hard": 1626, + "hardcover": 31123, + "harden": 27350, + "harder": 12274, + "hardest": 15258, + "hardin": 43802, + "harding": 24382, + "hardly": 17363, + "hardro": 28126, + "hardrock": 48365, + "hardrock": 40739, + "hards": 44048, + "hardship": 45085, + "hardt": 17922, + "hardware": 11957, + "hardwell": 45572, + "hardwick": 46864, + "hardwood": 28167, + "hardwork": 42554, + "hardwork": 27404, + "hardworking": 28095, + "hardworkpaysoff": 49193, + "hardy": 48179, + "hardy": 14113, + "hare": 27903, + "hare": 18464, + "harga": 39738, + "hari": 25472, + "hari": 8981, + "harlan": 49133, + "harle": 29096, + "harlem": 17771, + "harley": 24702, + "harley": 13632, + "harleydavidson": 39183, + "harlow": 34113, + "harm": 16656, + "harm": 14452, + "harman": 42434, + "harmed": 39637, + "harmful": 21725, + "harmless": 44369, + "harmon": 10828, + "harmon": 28729, + "harmony": 10785, + "harms": 46703, + "harne": 43323, + "harness": 23205, + "harold": 16917, + "harp": 27339, + "harper": 31288, + "harper": 12634, + "harri": 6639, + "harrier": 37372, + "harriet": 27154, + "harrington": 34340, + "harris": 25356, + "harris": 6925, + "harrisburg": 40590, + "harrison": 34389, + "harrison": 10540, + "harro": 18939, + "harrogate": 30842, + "harrow": 38807, + "harry": 11094, + "harry": 3600, + "harrypotter": 23375, + "harsh": 30596, + "harsh": 16944, + "hart": 9335, + "hart": 7752, + "hartford": 23434, + "harth": 35619, + "hartle": 47482, + "hartley": 31268, + "hartman": 43294, + "haru": 35099, + "harvard": 28118, + "harvard": 12848, + "harve": 6405, + "harvest": 44495, + "harvest": 8971, + "harvested": 35899, + "harvesting": 26674, + "harvey": 33289, + "harvey": 9586, + "harvick": 46983, + "haryana": 27661, + "has": 13855, + "has": 791, + "hasan": 30049, + "hasbro": 37405, + "hash": 6338, + "hash": 19199, + "hashi": 41831, + "hashmi": 35852, + "hashtag": 34015, + "hashtag": 9238, + "hashtags": 23514, + "haskell": 48550, + "hasn": 9143, + "hass": 9298, + "hassan": 15829, + "hassee": 37117, + "hassel": 32204, + "hassle": 35762, + "hast": 18146, + "hasta": 36623, + "hastings": 22035, + "hat": 3447, + "hat": 3801, + "hatch": 24202, + "hatch": 17809, + "hatchback": 42348, + "hatched": 42158, + "hate": 23546, + "hate": 3753, + "hated": 21298, + "hateful": 36418, + "hater": 36917, + "haters": 14027, + "hates": 14957, + "hatfield": 38448, + "hath": 27894, + "hath": 34416, + "hathaway": 31801, + "hati": 26045, + "hating": 25668, + "hatred": 19046, + "hats": 9812, + "hatt": 8747, + "hatton": 44861, + "hau": 5152, + "hauer": 48751, + "haul": 23743, + "haul": 12332, + "hauled": 46620, + "hauling": 43132, + "haun": 9676, + "haunt": 31039, + "haunted": 14944, + "haunting": 24034, + "haunts": 48035, + "haus": 41755, + "haus": 16478, + "hausen": 33338, + "hauser": 46586, + "haute": 28854, + "hav": 13443, + "hav": 20447, + "havan": 36304, + "havana": 23357, + "havas": 46261, + "have": 18053, + "have": 720, + "haven": 33074, + "haven": 3871, + "havent": 29130, + "haver": 27876, + "haves": 49088, + "havin": 31937, + "having": 1977, + "havoc": 24447, + "haw": 2788, + "haw": 26954, + "hawa": 6067, + "hawa": 46278, + "hawai": 15800, + "hawaii": 32413, + "hawaii": 8265, + "hawaiian": 17734, + "hawan": 27765, + "hawk": 14704, + "hawk": 8218, + "hawke": 38178, + "hawker": 39051, + "hawkeye": 38666, + "hawkeyes": 34266, + "hawking": 33437, + "hawkins": 19740, + "hawks": 44806, + "hawks": 5841, + "hawthorn": 45372, + "hawthorne": 36730, + "hay": 4871, + "hay": 11367, + "haya": 41325, + "hayat": 49360, + "hayden": 19806, + "haydn": 48207, + "haye": 36583, + "hayes": 13555, + "hayley": 39986, + "hayley": 22204, + "haynes": 30496, + "hays": 41524, + "hayward": 29400, + "haz": 5040, + "haz": 39921, + "hazard": 26174, + "hazard": 15178, + "hazardous": 27102, + "hazards": 30639, + "haze": 22785, + "hazel": 19838, + "hazel": 21882, + "hazelnut": 35816, + "hazi": 22740, + "hazmat": 48887, + "hazrat": 45775, + "hazy": 32655, + "hb": 6854, + "hb": 12576, + "hbcu": 40008, + "hbd": 25277, + "hbd": 13594, + "hbo": 15252, + "hc": 15831, + "hc": 7821, + "hcs": 46850, + "hd": 11601, + "hd": 4414, + "hdd": 40508, + "hdmi": 33302, + "hdr": 28065, + "he": 651, + "he": 797, + "hea": 27150, + "hea": 32790, + "head": 1603, + "head": 1375, + "headache": 23849, + "headaches": 38025, + "headband": 28556, + "headed": 6153, + "header": 11077, + "heading": 4409, + "headless": 45219, + "headlights": 42422, + "headline": 10891, + "headliner": 38880, + "headlines": 14706, + "headlining": 26971, + "headphone": 37524, + "headphones": 14906, + "headquarters": 13041, + "heads": 5174, + "headset": 23883, + "headshot": 34890, + "heal": 1231, + "heal": 13833, + "healed": 31456, + "healer": 38328, + "healey": 38985, + "healing": 9295, + "heals": 32384, + "health": 2145, + "health": 1728, + "healthand": 43704, + "healthcare": 42500, + "healthcare": 6023, + "healthier": 18242, + "healthtech": 42694, + "healthy": 10330, + "healthy": 3782, + "healthye": 31532, + "healthyeating": 33761, + "healthyfood": 39996, + "healthylifestyle": 46254, + "healthyliving": 27293, + "healy": 34299, + "heap": 34781, + "heaps": 44446, + "hear": 2749, + "hear": 2584, + "heard": 4063, + "hearing": 46353, + "hearing": 5541, + "hearings": 33175, + "hearn": 36613, + "hears": 25395, + "heart": 4975, + "heart": 1936, + "heartbeat": 29154, + "heartbreak": 29281, + "heartbreaking": 21322, + "heartbroken": 35383, + "hearted": 21679, + "heartfelt": 22904, + "hearth": 31563, + "hearthstone": 34054, + "hearti": 29345, + "hearties": 44572, + "heartland": 31923, + "heartless": 47022, + "heartnews": 40426, + "hearts": 5516, + "heartw": 30002, + "heartwarming": 34080, + "hearty": 26994, + "heat": 12175, + "heat": 4403, + "heated": 17057, + "heater": 23246, + "heath": 12794, + "heath": 11719, + "heather": 20230, + "heather": 12470, + "heathrow": 24171, + "heating": 12478, + "heaton": 34557, + "heats": 36106, + "heatwave": 25726, + "heav": 2409, + "heaven": 15520, + "heaven": 5545, + "heavenly": 19117, + "heavens": 26026, + "heavier": 31253, + "heaviest": 33268, + "heavily": 14123, + "heavy": 12048, + "heavy": 4200, + "heavymetal": 39804, + "heavyweight": 17448, + "heb": 24700, + "heb": 34515, + "hebdo": 41817, + "hebrew": 27298, + "hebrides": 45121, + "hebron": 45725, + "hec": 18932, + "heck": 22985, + "heck": 14427, + "hectares": 44162, + "hectic": 37245, + "hector": 25852, + "hed": 18271, + "hedge": 16229, + "hedge": 20294, + "hedgehog": 21940, + "hedges": 41345, + "hee": 18364, + "hee": 15773, + "heechul": 42487, + "heed": 15118, + "heel": 33646, + "heel": 16861, + "heels": 10909, + "heem": 30061, + "heer": 40473, + "hef": 29473, + "heff": 48756, + "hefty": 48584, + "heg": 41995, + "heh": 25834, + "hehe": 48723, + "hehe": 10658, + "hehehe": 24138, + "hei": 6101, + "hei": 29051, + "heidel": 42927, + "heidelberg": 48445, + "heidi": 44860, + "heidi": 23867, + "heifer": 48219, + "heigh": 43883, + "height": 10788, + "heights": 8418, + "heim": 10931, + "heim": 9768, + "heimer": 39517, + "hein": 15487, + "hein": 43206, + "heine": 28742, + "heineken": 36874, + "heinrich": 47877, + "heinz": 32359, + "heir": 27083, + "heir": 34007, + "heirloom": 34232, + "heirs": 43834, + "heis": 21849, + "heisman": 34537, + "heist": 31035, + "heit": 37255, + "hel": 919, + "hel": 11579, + "hela": 48212, + "held": 4042, + "hele": 46129, + "helen": 17576, + "helen": 11291, + "helena": 23109, + "helene": 41591, + "helens": 45940, + "heli": 33874, + "heli": 40183, + "helicop": 10035, + "helicopter": 11956, + "helicopters": 26922, + "helium": 46505, + "helix": 35247, + "hell": 8410, + "hell": 4141, + "hella": 19800, + "hellboy": 48428, + "helle": 48600, + "helle": 46968, + "hellenic": 42544, + "heller": 44464, + "hello": 12887, + "hello": 3306, + "hells": 47989, + "helly": 48690, + "helm": 47970, + "helm": 19520, + "helmet": 11122, + "helmets": 21843, + "help": 8641, + "help": 1318, + "helped": 4845, + "helper": 29321, + "helpers": 36316, + "helpful": 12695, + "helping": 3875, + "helpless": 47638, + "helpline": 43101, + "helps": 5144, + "helsin": 17842, + "helsinki": 19626, + "hem": 20270, + "hem": 11148, + "hemi": 14256, + "hemi": 46856, + "heming": 30819, + "hemingway": 33470, + "hemisphere": 32767, + "hemmings": 34882, + "hemo": 43788, + "hemp": 28225, + "hemp": 18467, + "hems": 32451, + "hemsworth": 39428, + "hen": 2385, + "hen": 8047, + "hence": 23640, + "hend": 11560, + "hender": 49248, + "henderson": 14348, + "hendrick": 45296, + "hendricks": 37588, + "hendrix": 23605, + "henge": 33104, + "henley": 27853, + "henna": 39455, + "hennessy": 42667, + "henri": 19431, + "henri": 21610, + "henrik": 35772, + "henry": 16018, + "henry": 5508, + "hens": 31742, + "henson": 32935, + "hep": 17724, + "hep": 48791, + "hepat": 23767, + "hepatitis": 32169, + "hepburn": 26348, + "her": 1223, + "her": 899, + "hera": 38724, + "heral": 37809, + "herald": 27625, + "herald": 12851, + "herb": 26116, + "herb": 15302, + "herbal": 21868, + "herbali": 44087, + "herbalife": 48364, + "herbert": 19935, + "herbs": 17320, + "hercules": 26539, + "herd": 36142, + "herd": 18589, + "here": 9134, + "here": 763, + "hered": 47976, + "hereford": 35543, + "heres": 13566, + "hereto": 47673, + "heri": 31392, + "herit": 4720, + "heritag": 38273, + "heritage": 20962, + "heritage": 5455, + "herman": 31890, + "herman": 21568, + "hermann": 40942, + "hermes": 34563, + "hermi": 35265, + "hermione": 45502, + "hermit": 43953, + "hermitage": 47706, + "hermo": 40967, + "hermosa": 42531, + "hern": 30571, + "hern": 43576, + "hernandez": 17707, + "hero": 7338, + "hero": 3756, + "heroes": 38010, + "heroes": 5506, + "heroic": 24255, + "heroin": 23841, + "heroine": 27420, + "heron": 22593, + "heros": 37642, + "herr": 38537, + "herrera": 27755, + "herring": 30211, + "hers": 25359, + "herself": 9207, + "hersh": 20379, + "hershey": 29734, + "hert": 26744, + "hertfordshire": 41070, + "herts": 35784, + "herty": 23454, + "hertz": 49383, + "hes": 30553, + "hes": 12784, + "hesit": 23933, + "hesitate": 34967, + "hess": 41888, + "hester": 31105, + "het": 37527, + "het": 19678, + "hetero": 26405, + "heu": 20105, + "heughan": 32298, + "hew": 48141, + "hew": 43051, + "hewitt": 28871, + "hex": 16255, + "hex": 31241, + "hey": 10759, + "hey": 2189, + "hez": 34591, + "hezbollah": 37636, + "hf": 26606, + "hf": 20603, + "hfx": 47297, + "hg": 23986, + "hg": 26237, + "hgtv": 47657, + "hh": 3280, + "hh": 5180, + "hhh": 8281, + "hhhh": 19391, + "hhhh": 13121, + "hhhhh": 24246, + "hhhhhh": 37278, + "hhs": 27006, + "hi": 677, + "hi": 1883, + "hia": 20672, + "hiatus": 27823, + "hib": 15922, + "hiber": 38799, + "hibis": 36226, + "hibiscus": 36460, + "hibition": 24658, + "hibs": 42814, + "hic": 3549, + "hic": 38079, + "hick": 14813, + "hickman": 49148, + "hickory": 29905, + "hicks": 23429, + "hid": 15552, + "hid": 14451, + "hidalgo": 47464, + "hidden": 28305, + "hidden": 7029, + "hiddleston": 31444, + "hide": 17725, + "hide": 9379, + "hideous": 46588, + "hides": 30800, + "hiding": 11371, + "hie": 15763, + "hier": 23433, + "hier": 29913, + "hierarchy": 44442, + "hifi": 38168, + "hig": 38108, + "higgins": 21783, + "high": 1487, + "high": 1400, + "higher": 5321, + "highered": 27072, + "highest": 5317, + "highland": 32244, + "highland": 16062, + "highlander": 46251, + "highlanders": 40445, + "highlands": 16883, + "highlight": 8264, + "highlighted": 22252, + "highlighter": 45460, + "highlighting": 17344, + "highlights": 6173, + "highly": 5302, + "highness": 38694, + "highs": 15144, + "highschool": 23102, + "highway": 45344, + "highway": 7620, + "highways": 28007, + "higu": 39115, + "hihi": 36240, + "hii": 42315, + "hijab": 31407, + "hika": 41356, + "hikari": 44624, + "hike": 9404, + "hiked": 36471, + "hiker": 40947, + "hikers": 46090, + "hikes": 27076, + "hiking": 9118, + "hiko": 48708, + "hil": 3508, + "hil": 17927, + "hila": 38837, + "hilar": 37337, + "hilari": 7784, + "hilarious": 8358, + "hilariously": 43476, + "hilary": 45898, + "hilary": 25415, + "hilde": 45382, + "hill": 3671, + "hill": 2682, + "hillary": 13257, + "hillary": 7074, + "hillaryclinton": 15357, + "hilli": 32513, + "hills": 24178, + "hills": 5289, + "hillsborough": 32157, + "hillside": 37194, + "hilltop": 45858, + "hilly": 32483, + "hilton": 33621, + "hilton": 14012, + "him": 4128, + "him": 1269, + "himach": 29132, + "himachal": 35461, + "himalay": 17552, + "himalayan": 30318, + "himalayas": 32872, + "hime": 45892, + "himself": 4530, + "himss": 41730, + "hin": 1676, + "hin": 37930, + "hina": 40571, + "hinakhan": 45518, + "hinch": 49320, + "hind": 34460, + "hind": 23293, + "hindi": 14967, + "hinds": 47859, + "hindu": 17587, + "hindu": 12053, + "hinduism": 40592, + "hindus": 25701, + "hindustan": 46553, + "hines": 37462, + "hing": 37968, + "hini": 33564, + "hino": 45343, + "hint": 11868, + "hinton": 47165, + "hints": 20594, + "hio": 32897, + "hip": 11725, + "hip": 6584, + "hipho": 8819, + "hiphop": 26598, + "hiphop": 10914, + "hipp": 13607, + "hippie": 28637, + "hippo": 28398, + "hippo": 36729, + "hips": 30191, + "hipstamatic": 31002, + "hipster": 19987, + "hipsters": 48265, + "hir": 4959, + "hir": 14728, + "hira": 42577, + "hire": 32356, + "hire": 8243, + "hired": 17602, + "hires": 24133, + "hiring": 7835, + "hiro": 17396, + "hiro": 20588, + "hiroshima": 33867, + "hirsch": 46967, + "his": 15211, + "his": 787, + "hism": 23502, + "hispan": 16843, + "hispanic": 22676, + "hist": 21710, + "hist": 13779, + "histo": 33479, + "histor": 2993, + "historia": 46010, + "historian": 20697, + "historians": 35200, + "historic": 30195, + "historic": 5726, + "historical": 34154, + "historical": 8039, + "historically": 30445, + "histories": 34736, + "history": 11142, + "history": 1695, + "historymonth": 19356, + "historyof": 35905, + "hit": 5453, + "hit": 2341, + "hitch": 22937, + "hitch": 36203, + "hitler": 16518, + "hitman": 33290, + "hits": 4712, + "hitter": 23538, + "hitters": 39724, + "hitting": 7957, + "hiv": 44410, + "hiv": 11018, + "hive": 38162, + "hive": 18521, + "hiya": 42393, + "hk": 22648, + "hk": 12307, + "hl": 8297, + "hl": 5956, + "hle": 32389, + "hler": 35418, + "hm": 17913, + "hm": 7631, + "hmm": 13725, + "hmmm": 17032, + "hmmmm": 34598, + "hms": 14625, + "hmu": 21630, + "hmv": 49288, + "hn": 22905, + "hn": 7478, + "hns": 48412, + "ho": 606, + "ho": 2971, + "hoa": 37517, + "hoar": 31628, + "hoax": 33438, + "hob": 18212, + "hobart": 31646, + "hobb": 16175, + "hobbies": 36370, + "hobbit": 23207, + "hobbs": 34343, + "hobby": 41120, + "hobby": 17557, + "hobo": 34613, + "hobo": 41334, + "hoboken": 41568, + "hoc": 35880, + "hoch": 43772, + "hock": 34914, + "hock": 46574, + "hockey": 16499, + "hockey": 4111, + "hoco": 34771, + "hod": 31062, + "hodg": 23660, + "hodge": 40585, + "hodges": 35061, + "hodgson": 37044, + "hoe": 32502, + "hoe": 11262, + "hoek": 40073, + "hoes": 21164, + "hof": 20186, + "hof": 12789, + "hofer": 38654, + "hoff": 32860, + "hoff": 22751, + "hofficial": 41949, + "hoffman": 22026, + "hog": 12075, + "hog": 13255, + "hogan": 19757, + "hogg": 42005, + "hogs": 23242, + "hogwarts": 29168, + "hoh": 43947, + "hoi": 39295, + "hok": 26942, + "hok": 47167, + "hokies": 35168, + "hokkaido": 49145, + "hol": 1187, + "hol": 7349, + "hola": 28724, + "hold": 36496, + "hold": 3254, + "holden": 21869, + "holder": 7862, + "holders": 10074, + "holding": 5050, + "holdings": 24832, + "holds": 7286, + "hole": 47242, + "hole": 5341, + "holes": 11266, + "holi": 2093, + "holi": 21926, + "holic": 16348, + "holics": 29782, + "holiday": 13168, + "holiday": 2878, + "holidays": 5372, + "holiness": 37259, + "holistic": 26300, + "holl": 27699, + "holla": 26500, + "holland": 31608, + "holland": 9978, + "hollande": 47690, + "holler": 49047, + "holli": 24019, + "holliday": 41624, + "hollow": 41221, + "hollow": 16691, + "holloway": 29435, + "holly": 12731, + "holly": 11923, + "hollyo": 41525, + "hollyoaks": 43352, + "hollywood": 24655, + "hollywood": 5518, + "holm": 34758, + "holm": 12739, + "holme": 46149, + "holmes": 12756, + "holo": 10317, + "holocau": 14688, + "holocaust": 16476, + "hols": 33344, + "holt": 18868, + "holtz": 44743, + "holy": 13910, + "holy": 4874, + "hom": 906, + "hom": 47397, + "homa": 9557, + "homage": 17746, + "home": 2143, + "home": 1137, + "homebrew": 35046, + "homec": 33869, + "homecoming": 9008, + "homedecor": 15695, + "homedepot": 38707, + "homegrown": 32554, + "homeitems": 42972, + "homeland": 21633, + "homeless": 18403, + "homeless": 9661, + "homelessness": 19851, + "homemade": 7889, + "homeof": 48856, + "homeowner": 37267, + "homeowners": 29882, + "homepage": 29828, + "homer": 29307, + "homer": 16931, + "homers": 38333, + "homes": 19480, + "homes": 5416, + "homeschool": 40994, + "homestead": 32609, + "homeswee": 46298, + "hometown": 12238, + "homework": 12495, + "homicide": 21520, + "homie": 12540, + "homies": 18893, + "homme": 26193, + "homo": 18129, + "homo": 30504, + "homophobia": 37875, + "homophobic": 40975, + "homosexual": 44288, + "homosexuality": 46720, + "homs": 45413, + "hon": 1279, + "hon": 10296, + "honda": 8553, + "honduras": 29715, + "hone": 38640, + "honest": 7814, + "honest": 9602, + "honestly": 9155, + "honesty": 24939, + "honey": 9843, + "honey": 6406, + "honeycomb": 48583, + "honeymoon": 22527, + "hong": 12144, + "hong": 8598, + "hongkong": 16659, + "honi": 17918, + "honolulu": 28096, + "honor": 9206, + "honor": 3402, + "honorable": 19498, + "honorary": 15675, + "honore": 25868, + "honored": 5494, + "honoree": 38993, + "honorees": 43012, + "honoring": 10771, + "honors": 10248, + "honour": 8240, + "honourable": 29855, + "honoured": 11945, + "honouring": 37754, + "honours": 22558, + "hoo": 2300, + "hoo": 7920, + "hood": 18681, + "hood": 3222, + "hooded": 33631, + "hoodie": 13444, + "hoodies": 25974, + "hoods": 16664, + "hoof": 44555, + "hook": 30488, + "hook": 10395, + "hookah": 34214, + "hooked": 18138, + "hookem": 31465, + "hooker": 37891, + "hooking": 35240, + "hooks": 25068, + "hooligans": 48176, + "hoon": 21368, + "hooo": 44538, + "hoop": 31516, + "hoop": 19573, + "hooper": 35221, + "hoops": 9351, + "hoor": 22155, + "hooray": 24940, + "hoos": 46462, + "hoosier": 48886, + "hoosiers": 42780, + "hoot": 29164, + "hoover": 25691, + "hop": 10848, + "hop": 5833, + "hope": 5263, + "hope": 1683, + "hoped": 30628, + "hopeful": 21453, + "hopefully": 7602, + "hopeless": 35586, + "hopes": 10018, + "hoping": 7207, + "hopkins": 17821, + "hopp": 48839, + "hopped": 34220, + "hopper": 21748, + "hopping": 27606, + "hoppy": 38359, + "hops": 21137, + "hor": 1407, + "hor": 33847, + "hora": 26013, + "horace": 39282, + "horan": 26857, + "horde": 44947, + "hore": 15380, + "horiz": 8144, + "horizon": 17924, + "horizon": 11920, + "horizons": 29685, + "horizontal": 25775, + "hormon": 27096, + "hormone": 31283, + "hormones": 35162, + "horn": 15771, + "horn": 9607, + "horne": 38143, + "horned": 34526, + "hornet": 28739, + "hornets": 20124, + "horns": 22109, + "horny": 32622, + "horo": 21500, + "horoscope": 38453, + "horowitz": 44669, + "horri": 8656, + "horrible": 13726, + "horribly": 45484, + "horrific": 25314, + "horrifying": 38901, + "horror": 13787, + "horror": 5032, + "horrormovies": 46682, + "horrors": 33321, + "horse": 8562, + "horse": 4558, + "horseback": 43673, + "horseman": 48885, + "horsepower": 36882, + "horser": 23096, + "horseracing": 30693, + "horses": 8809, + "horseshoe": 29242, + "horst": 37182, + "hort": 19482, + "horticul": 27141, + "horticulture": 39998, + "horton": 25945, + "hortons": 38422, + "horus": 29794, + "hos": 44320, + "hos": 25008, + "hosa": 44618, + "hose": 19662, + "hoseok": 38817, + "hosp": 2847, + "hosp": 37853, + "hospice": 20533, + "hospit": 7180, + "hospital": 29399, + "hospital": 3851, + "hospitality": 11657, + "hospitalized": 36915, + "hospitals": 13816, + "host": 17403, + "host": 3953, + "hostage": 26119, + "hoste": 31700, + "hosted": 6017, + "hostel": 27225, + "hostess": 39692, + "hostile": 28074, + "hosting": 4857, + "hosts": 8718, + "hot": 2851, + "hot": 2069, + "hota": 43289, + "hotdog": 43758, + "hotel": 14591, + "hotel": 2738, + "hotels": 8654, + "hotline": 30516, + "hotmail": 46427, + "hotness": 39803, + "hotra": 27109, + "hotro": 47823, + "hotspot": 36606, + "hotspur": 35176, + "hotter": 23591, + "hottest": 8279, + "hottie": 22804, + "hotties": 46027, + "hou": 1011, + "hou": 10122, + "hough": 44529, + "houghton": 36133, + "houn": 39273, + "houn": 33607, + "hound": 33996, + "hound": 13561, + "hounds": 21178, + "hounews": 48373, + "hour": 14930, + "hour": 2232, + "hourly": 30918, + "hours": 2382, + "house": 4107, + "house": 1212, + "housed": 37518, + "household": 12412, + "households": 27167, + "housel": 48685, + "housemusic": 28468, + "houseof": 19928, + "houses": 7791, + "housewives": 38523, + "housing": 32924, + "housing": 5734, + "houston": 16564, + "houston": 5663, + "hov": 40291, + "hove": 29674, + "hoven": 35559, + "hover": 36252, + "hover": 49016, + "hovering": 43437, + "how": 7470, + "how": 829, + "howar": 37672, + "howard": 25447, + "howard": 7632, + "howdy": 42216, + "howe": 8179, + "howe": 24614, + "howell": 25297, + "hower": 32920, + "however": 8467, + "howi": 47883, + "howie": 42939, + "howl": 40332, + "howling": 41771, + "howto": 38191, + "howto": 44060, + "hoy": 39625, + "hoy": 13278, + "hoya": 40978, + "hp": 23753, + "hp": 6371, + "hpa": 30983, + "hpc": 39936, + "hpe": 33787, + "hpv": 45765, + "hq": 33571, + "hq": 4693, + "hr": 4810, + "hr": 4086, + "hra": 21320, + "hra": 17212, + "hrc": 18139, + "hrh": 29103, + "hri": 21068, + "hrithik": 45371, + "hrs": 7157, + "hru": 24127, + "hrw": 25064, + "hs": 9343, + "hs": 2466, + "hsbc": 31508, + "hsc": 43510, + "hse": 34057, + "hsfb": 29539, + "hsv": 47311, + "ht": 11123, + "ht": 7801, + "hta": 23452, + "hta": 49384, + "htafc": 42821, + "htc": 48942, + "htc": 17635, + "html": 18231, + "hts": 43710, + "htt": 10620, + "http": 15066, + "https": 30901, + "httr": 49372, + "httweets": 43198, + "hu": 845, + "hu": 5949, + "hua": 22138, + "huan": 41405, + "huang": 32013, + "huar": 46916, + "huawe": 17709, + "huawei": 21128, + "hub": 18775, + "hub": 7028, + "hubb": 23183, + "hubbard": 33288, + "hubble": 30421, + "hubby": 16947, + "hubert": 40699, + "hubs": 29327, + "huck": 22909, + "huckabee": 43666, + "hud": 7169, + "hud": 28563, + "hudder": 22629, + "huddersfield": 24220, + "huddle": 33435, + "hudson": 25873, + "hudson": 11260, + "hue": 48380, + "hue": 21465, + "hues": 38003, + "huey": 39663, + "huff": 18746, + "huff": 44999, + "huffpost": 45887, + "hug": 40790, + "hug": 10359, + "huge": 2699, + "hugely": 24648, + "hugged": 41333, + "hugging": 27058, + "hugh": 8723, + "hugh": 15385, + "hughes": 11418, + "hugo": 43935, + "hugo": 17132, + "hugs": 14248, + "huh": 13348, + "huhu": 32134, + "hui": 29978, + "hul": 7911, + "hula": 40145, + "hulk": 17637, + "hull": 25154, + "hull": 10375, + "hulu": 24666, + "hum": 5823, + "hum": 16283, + "human": 3175, + "human": 2751, + "humane": 20220, + "humanitarian": 14170, + "humanities": 24949, + "humanity": 9420, + "humanright": 44385, + "humanrights": 14148, + "humans": 8324, + "humb": 9988, + "humber": 30602, + "humber": 38063, + "humble": 38703, + "humble": 10889, + "humbled": 19682, + "humbling": 39757, + "humbold": 24739, + "humboldt": 31389, + "hume": 38197, + "humid": 14778, + "humid": 27447, + "humidi": 47666, + "humidity": 15469, + "humil": 27205, + "humili": 25332, + "humility": 28535, + "humming": 26515, + "hummingbird": 33072, + "hummus": 31785, + "humor": 29369, + "humor": 11186, + "humorous": 38173, + "humour": 19161, + "hump": 16673, + "hump": 24529, + "humpback": 47662, + "humpday": 27693, + "humph": 19767, + "humphrey": 31549, + "hun": 1616, + "hun": 10795, + "hundre": 8505, + "hundred": 11898, + "hundreds": 8879, + "hung": 13825, + "hungar": 19420, + "hungarian": 23325, + "hungary": 17232, + "hunger": 25565, + "hunger": 10184, + "hungergames": 47507, + "hungover": 41110, + "hungry": 44845, + "hungry": 8451, + "hunk": 33912, + "hunt": 16498, + "hunt": 5774, + "hunted": 37373, + "hunter": 16531, + "hunter": 6099, + "hunters": 16115, + "hunting": 27830, + "hunting": 7507, + "huntington": 23521, + "hunts": 34041, + "huntsville": 34544, + "hur": 2305, + "hur": 34523, + "hurd": 44915, + "hurdle": 27486, + "hurdles": 25440, + "huri": 42486, + "hurley": 30166, + "hurling": 24738, + "huron": 36147, + "hurrah": 40599, + "hurric": 6543, + "hurrican": 36105, + "hurricane": 24051, + "hurricane": 8782, + "hurricanes": 22357, + "hurry": 10921, + "hurst": 44742, + "hurst": 11760, + "hurt": 7413, + "hurting": 24017, + "hurts": 13059, + "hus": 5111, + "hus": 35853, + "husband": 6179, + "husbands": 33612, + "hush": 28728, + "husk": 19246, + "huskers": 26946, + "huskies": 20988, + "husky": 20421, + "huss": 13733, + "hussain": 17940, + "hussein": 31336, + "hust": 27279, + "hustle": 15709, + "huston": 46480, + "hut": 20924, + "hut": 16503, + "hutch": 31018, + "hutch": 33203, + "hutchinson": 35721, + "hutto": 27662, + "hutton": 38321, + "hv": 17209, + "hv": 18593, + "hvac": 27492, + "hw": 27491, + "hw": 18876, + "hwa": 32352, + "hwan": 44390, + "hwang": 46775, + "hwy": 13812, + "hy": 1441, + "hy": 17827, + "hya": 31600, + "hyacin": 47263, + "hyatt": 44856, + "hyatt": 25146, + "hybri": 9084, + "hybrid": 10156, + "hyd": 42382, + "hyde": 46484, + "hyde": 16343, + "hyder": 13960, + "hyderabad": 14801, + "hydr": 8031, + "hydra": 44414, + "hydra": 40420, + "hydrange": 43298, + "hydrate": 29628, + "hydrated": 23300, + "hydrating": 47653, + "hydration": 24174, + "hydrau": 26017, + "hydraulic": 26189, + "hydro": 8368, + "hydro": 22595, + "hydrogen": 20974, + "hye": 32724, + "hye": 25792, + "hygi": 16277, + "hygiene": 19591, + "hymn": 41350, + "hyo": 38960, + "hyo": 35078, + "hyp": 16964, + "hype": 30353, + "hype": 11111, + "hyped": 22507, + "hyper": 7997, + "hyper": 22146, + "hypertension": 40698, + "hypno": 23355, + "hypnosis": 48138, + "hypnoti": 40440, + "hypo": 10252, + "hypocr": 30711, + "hypocri": 25606, + "hypocrisy": 26296, + "hypocrite": 44125, + "hypothe": 46966, + "hypothesis": 44956, + "hyster": 24235, + "hysteria": 45965, + "hysterical": 48627, + "hyuk": 20452, + "hyun": 11831, + "hyun": 8589, + "hyundai": 17094, + "hyung": 46901, + "hyung": 16551, + "hz": 32533, + "i": 72, + "i": 328, + "ia": 12486, + "ia": 1073, + "iac": 32838, + "iac": 44063, + "iaf": 40789, + "iah": 35052, + "iain": 30103, + "ial": 11530, + "ial": 1974, + "ials": 20940, + "iam": 3579, + "iam": 11415, + "iambic": 43668, + "iambicpent": 43891, + "iamsrk": 15103, + "ian": 7723, + "ian": 1800, + "ians": 6451, + "iansomerhalder": 47077, + "iart": 18413, + "iartg": 18669, + "ias": 32303, + "ias": 14620, + "ib": 3962, + "ib": 13554, + "iba": 39763, + "ibadan": 44691, + "iban": 47145, + "ibc": 49014, + "ibd": 40732, + "iber": 23814, + "ibi": 12337, + "ibis": 47048, + "ibiza": 13853, + "ible": 37792, + "ibles": 44102, + "ibm": 23415, + "ibm": 13918, + "ibn": 25729, + "ibooks": 46887, + "ibra": 15476, + "ibrahi": 40350, + "ibrahim": 20816, + "ibrox": 46883, + "ibs": 41993, + "ibu": 43587, + "ibu": 46117, + "ic": 535, + "ic": 1029, + "ica": 2576, + "icago": 37492, + "ical": 6082, + "ical": 1110, + "ically": 3161, + "icals": 13999, + "ican": 17653, + "ican": 5246, + "icans": 20511, + "icar": 37211, + "ication": 21629, + "icc": 12945, + "ice": 2739, + "ice": 733, + "iceberg": 33662, + "icec": 13636, + "icecream": 21334, + "iced": 8049, + "icelan": 34114, + "iceland": 46716, + "iceland": 11935, + "icelandic": 34705, + "ices": 1931, + "ich": 5333, + "ich": 1232, + "icha": 31453, + "iche": 28972, + "iche": 21143, + "ichi": 21669, + "ichi": 14647, + "ichick": 45022, + "ichiro": 43787, + "ici": 948, + "ici": 22189, + "icia": 11774, + "icial": 17543, + "icial": 6397, + "ician": 40522, + "ician": 5374, + "icians": 6264, + "iciary": 21329, + "icic": 46006, + "icide": 6558, + "icides": 28253, + "icing": 7676, + "icio": 24207, + "icion": 45905, + "icious": 3325, + "icist": 21165, + "icists": 42171, + "icity": 7243, + "ick": 1168, + "ick": 1068, + "icked": 39799, + "icker": 40357, + "ickers": 30701, + "icki": 35468, + "icking": 6619, + "icks": 3727, + "icky": 11587, + "icn": 44516, + "ico": 13697, + "ico": 3040, + "icom": 17693, + "icom": 29796, + "icon": 13843, + "icon": 5646, + "iconic": 6959, + "icons": 15553, + "icop": 9389, + "icos": 32002, + "ics": 1324, + "ict": 6349, + "icted": 36515, + "iction": 40560, + "icton": 36548, + "icu": 45118, + "icu": 30443, + "icular": 40660, + "icus": 31459, + "icy": 28780, + "icy": 3495, + "icymi": 5315, + "icz": 46387, + "id": 1568, + "id": 1014, + "ida": 11032, + "ida": 11600, + "idad": 22462, + "idaho": 48817, + "idaho": 15165, + "idal": 39684, + "idan": 17929, + "idc": 22386, + "ide": 1909, + "ide": 14104, + "idea": 3612, + "ideal": 8789, + "ideally": 48247, + "ideals": 45096, + "ideas": 4452, + "ident": 7113, + "identi": 6009, + "identical": 25587, + "identification": 23337, + "identified": 15217, + "identifies": 35712, + "identify": 10949, + "identifying": 23589, + "identities": 34292, + "identity": 8892, + "ideology": 25840, + "iders": 8980, + "ides": 31791, + "idf": 28987, + "idge": 35567, + "idh": 44325, + "idi": 9611, + "idi": 14264, + "idio": 15994, + "idiot": 14087, + "idiots": 20856, + "idk": 8972, + "idle": 34754, + "idlib": 36199, + "ido": 6763, + "ido": 29641, + "idol": 24866, + "idol": 8884, + "idols": 21398, + "idr": 10106, + "idri": 46435, + "idris": 41312, + "ids": 6111, + "idu": 28655, + "idy": 33058, + "idyl": 44879, + "idyllic": 46632, + "ie": 6789, + "ie": 1718, + "iec": 44773, + "ied": 10059, + "ieee": 39860, + "iel": 27875, + "iel": 22729, + "ience": 1542, + "ient": 13115, + "ier": 33173, + "ier": 5912, + "iers": 45060, + "ies": 27912, + "ies": 963, + "iest": 10818, + "if": 8063, + "if": 878, + "ifa": 37574, + "ifc": 36524, + "ife": 41172, + "ife": 19590, + "iff": 35753, + "ification": 35755, + "ified": 41403, + "ift": 31143, + "iftar": 35153, + "ifu": 41523, + "ify": 32807, + "ig": 1089, + "ig": 3072, + "iga": 16493, + "igan": 27468, + "igans": 25419, + "igbo": 44591, + "ige": 10806, + "igen": 33070, + "iger": 30758, + "iger": 20685, + "igers": 40755, + "igers": 48928, + "iggy": 46219, + "iggy": 27604, + "igh": 2712, + "igh": 5451, + "ight": 14571, + "ight": 897, + "ighton": 35292, + "igi": 21901, + "igle": 29912, + "iglesias": 39432, + "ign": 7303, + "ign": 2326, + "ignati": 37573, + "ignatius": 48318, + "igne": 45843, + "ignite": 25210, + "ignition": 36115, + "igno": 15375, + "ignor": 7653, + "ignorance": 22735, + "ignorant": 26933, + "ignore": 12304, + "ignored": 20428, + "ignores": 40129, + "ignoring": 23969, + "igor": 33024, + "igs": 31344, + "igu": 21279, + "ih": 12162, + "ih": 34135, + "ihear": 13043, + "iheart": 30332, + "iheartawards": 18811, + "iheartradio": 25934, + "ihop": 45511, + "ihri": 39108, + "ihrithik": 39326, + "ii": 5103, + "ii": 2329, + "iii": 46236, + "iii": 6572, + "iiii": 20133, + "iiii": 45393, + "iiot": 30704, + "iit": 39330, + "iit": 33238, + "ij": 7337, + "ija": 42802, + "ik": 3903, + "ik": 10177, + "ika": 18188, + "ike": 12329, + "ike": 19696, + "ikea": 20528, + "iker": 38653, + "ikh": 44655, + "ikh": 12758, + "iklan": 32028, + "iklan": 29584, + "iko": 35659, + "iko": 39272, + "ikon": 38543, + "ikon": 19156, + "iku": 17780, + "il": 543, + "il": 958, + "ila": 4344, + "ilah": 32211, + "ilan": 13889, + "ilan": 28076, + "iland": 20957, + "ilation": 16180, + "ilay": 45093, + "ild": 22278, + "ild": 17164, + "ile": 18398, + "ile": 989, + "iled": 3358, + "iler": 22446, + "iler": 3615, + "ilers": 8975, + "iles": 42274, + "ili": 2076, + "ili": 19601, + "ilia": 14855, + "ilian": 10272, + "iliary": 32585, + "ilife": 42835, + "ilike": 44989, + "ilinan": 48497, + "iling": 3299, + "ilio": 47256, + "ilion": 12561, + "ilis": 43442, + "ilit": 11178, + "ilities": 5446, + "ility": 1787, + "ilive": 26478, + "ill": 828, + "ill": 660, + "illa": 8877, + "illa": 3043, + "illac": 17218, + "illage": 48922, + "illard": 21920, + "illary": 33667, + "illas": 23404, + "ille": 18213, + "ille": 5559, + "illed": 2527, + "illeg": 35808, + "illegal": 7983, + "illegally": 24466, + "illegals": 40490, + "iller": 23341, + "iller": 2956, + "illers": 30547, + "illery": 14514, + "illes": 20037, + "illi": 1086, + "illi": 25187, + "illia": 48776, + "illiams": 30301, + "illian": 48775, + "illian": 17355, + "illic": 37152, + "illicit": 40998, + "illie": 26083, + "illin": 35868, + "illing": 2803, + "illini": 28957, + "illino": 8920, + "illinois": 9414, + "illion": 35542, + "illion": 2035, + "illness": 11145, + "illnesses": 33861, + "illo": 34153, + "illo": 7588, + "illon": 20516, + "ills": 1900, + "illu": 3025, + "illumin": 11446, + "illuminate": 43261, + "illuminated": 28814, + "illuminati": 34551, + "illuminating": 46601, + "illumination": 43680, + "illus": 41386, + "illusion": 20318, + "illusions": 47429, + "illustr": 6268, + "illustrate": 37468, + "illustrated": 13151, + "illustrates": 38129, + "illustrating": 43322, + "illustration": 6052, + "illustrations": 17852, + "illustrator": 16649, + "illustri": 43116, + "illustrious": 44304, + "illy": 11707, + "illy": 9532, + "ilm": 36326, + "ilo": 4220, + "ilo": 14835, + "ilove": 7183, + "ilove": 32914, + "iloveart": 41114, + "ilovemy": 28863, + "iloveyou": 28829, + "ils": 1543, + "ilt": 25334, + "ilton": 28494, + "ilu": 27337, + "ilwx": 43777, + "ily": 4881, + "ily": 1026, + "ilya": 33377, + "ilysm": 29228, + "im": 732, + "im": 1496, + "ima": 2414, + "ima": 6432, + "imac": 40675, + "imacele": 47281, + "imag": 2316, + "image": 24101, + "image": 2867, + "imagery": 22828, + "images": 4952, + "imagin": 18178, + "imaginary": 30417, + "imagination": 13783, + "imaginative": 47233, + "imagine": 35752, + "imagine": 4826, + "imagined": 18478, + "imagines": 47379, + "imaging": 14231, + "imagining": 27384, + "imam": 37552, + "imam": 19024, + "iman": 45684, + "iman": 16247, + "imation": 44566, + "imax": 32066, + "imc": 45616, + "imdanielpadilla": 36357, + "imdb": 30407, + "ime": 44937, + "ime": 31151, + "imel": 31594, + "iment": 37157, + "imer": 21802, + "imes": 47744, + "imf": 28403, + "img": 24157, + "imi": 23559, + "imin": 23942, + "imit": 23462, + "imitation": 41630, + "imma": 19487, + "immac": 25085, + "immaculate": 29649, + "immature": 45531, + "immedi": 7366, + "immediate": 14440, + "immediately": 10108, + "immen": 17278, + "immense": 22722, + "immensely": 35013, + "immer": 13954, + "immerse": 46240, + "immersion": 31861, + "immersive": 27521, + "immigr": 5851, + "immigrant": 16474, + "immigrants": 14460, + "immigration": 9588, + "imminent": 27299, + "immort": 39244, + "immortal": 24717, + "immun": 8961, + "immune": 15606, + "immuni": 44571, + "immunity": 26254, + "immuno": 24361, + "immunology": 44483, + "immunotherapy": 39185, + "imo": 26349, + "imo": 13738, + "imp": 3335, + "imp": 31037, + "impac": 7573, + "impact": 33036, + "impact": 3844, + "impacted": 21424, + "impactful": 41631, + "impacting": 29359, + "impacts": 15069, + "impair": 36451, + "impaired": 28028, + "impairment": 44501, + "impala": 36641, + "impe": 23612, + "impeach": 16874, + "impeach": 43497, + "impeachment": 32979, + "impeachtrump": 38006, + "impecc": 34511, + "impeccable": 40111, + "impending": 34486, + "imper": 7727, + "imperative": 39833, + "imperfect": 46034, + "imperi": 30911, + "imperial": 32425, + "imperial": 12361, + "imperialism": 48855, + "imperson": 25551, + "implant": 33106, + "implants": 32202, + "imple": 7423, + "implement": 17966, + "implementation": 15102, + "implemented": 24315, + "implementing": 22862, + "implic": 15269, + "implications": 19229, + "implo": 40337, + "impo": 45704, + "import": 2336, + "import": 16294, + "importance": 6821, + "important": 2829, + "importantly": 21580, + "imported": 28798, + "imports": 25286, + "impose": 35879, + "imposed": 25871, + "imposing": 42289, + "impossible": 9815, + "impre": 3763, + "impress": 20015, + "impressed": 9689, + "impression": 14468, + "impressionism": 36114, + "impressionist": 44904, + "impressions": 22276, + "impressive": 6634, + "imprint": 43863, + "imprison": 22141, + "imprisoned": 32999, + "imprisonment": 39024, + "impro": 2531, + "impromp": 28100, + "impromptu": 28611, + "improv": 22868, + "improve": 4971, + "improved": 9446, + "improvement": 10790, + "improvements": 16320, + "improves": 18035, + "improving": 10381, + "improvis": 32343, + "improvised": 40886, + "impulse": 29683, + "impy": 42690, + "imran": 19647, + "imran": 19212, + "imrankhan": 25956, + "imrankhanpti": 26688, + "ims": 17800, + "imsa": 37262, + "imv": 35731, + "imvkohli": 37136, + "imwith": 26822, + "imwithher": 32651, + "in": 512, + "in": 530, + "ina": 18026, + "ina": 1366, + "inability": 47517, + "inaccurate": 49192, + "inaction": 41916, + "inactive": 49274, + "inadequate": 43403, + "inak": 46549, + "inal": 19178, + "inals": 26438, + "inan": 26204, + "inappropriate": 26722, + "inari": 48620, + "inary": 11337, + "inas": 36731, + "inas": 12362, + "inated": 38530, + "ination": 4706, + "inau": 10832, + "inaugu": 11309, + "inaugur": 11448, + "inaugural": 11340, + "inaugurated": 29011, + "inauguration": 16805, + "inbound": 24420, + "inbox": 18683, + "inc": 14570, + "inc": 4438, + "incan": 45964, + "incar": 18070, + "incarcer": 26334, + "incarcerated": 49178, + "incarceration": 39887, + "incase": 30463, + "ince": 44303, + "incen": 13259, + "incense": 35059, + "incentive": 29024, + "incentives": 29813, + "inception": 36653, + "inch": 6523, + "incheon": 30645, + "inches": 10809, + "inci": 5747, + "incidence": 43371, + "incident": 10103, + "incidents": 22120, + "incindia": 26161, + "inciner": 46434, + "incl": 27857, + "incl": 13338, + "inclined": 45470, + "inclu": 1738, + "include": 5942, + "included": 7414, + "includes": 6197, + "including": 2814, + "inclusion": 12079, + "inclusive": 13393, + "income": 8044, + "incoming": 15416, + "incomparable": 36027, + "incompetent": 45069, + "incomplete": 34040, + "incon": 42372, + "inconvenience": 40563, + "incorpor": 19335, + "incorporate": 34168, + "incorporated": 29494, + "incorporating": 40303, + "incorrect": 31872, + "incre": 1870, + "increase": 5230, + "increased": 9156, + "increases": 13797, + "increasing": 10270, + "increasingly": 16106, + "incredi": 2883, + "incredible": 22128, + "incredible": 3457, + "incredibleindia": 24680, + "incredibles": 48641, + "incredibly": 9513, + "incu": 38830, + "incub": 24587, + "incubator": 35736, + "incumb": 32246, + "incumbent": 38038, + "incur": 42356, + "ind": 5386, + "ind": 4655, + "inda": 15710, + "inde": 2645, + "indeed": 10031, + "indefin": 29501, + "indefinitely": 43750, + "independ": 4147, + "independence": 23117, + "independence": 7955, + "independenceday": 25971, + "independent": 33844, + "independent": 7088, + "independently": 39831, + "inder": 29225, + "index": 35209, + "index": 9458, + "indhoven": 44229, + "indi": 1098, + "indi": 46536, + "india": 27067, + "india": 1762, + "indian": 7685, + "indian": 3606, + "indiana": 8615, + "indianapolis": 17196, + "indianfootball": 45979, + "indians": 10271, + "indic": 7136, + "indicate": 26679, + "indicated": 39416, + "indicates": 29412, + "indication": 38539, + "indicator": 24776, + "indicators": 30054, + "indicted": 34992, + "indictment": 42278, + "indie": 5260, + "indie": 9383, + "indiedev": 10863, + "indiefilm": 22588, + "indiegame": 17969, + "indiegamedev": 40466, + "indiegames": 35864, + "indiegogo": 38057, + "indies": 23618, + "indiffe": 41372, + "indigen": 8348, + "indigenous": 9303, + "indigo": 21002, + "indira": 43887, + "indirec": 26398, + "indirect": 35416, + "indivi": 5649, + "individu": 9574, + "individual": 8512, + "individually": 33782, + "individuals": 11990, + "indo": 26303, + "indo": 18297, + "indom": 42926, + "indone": 6180, + "indonesia": 7229, + "indonesian": 19593, + "indoor": 44478, + "indoor": 9546, + "indoors": 22973, + "indore": 46143, + "indu": 2298, + "induc": 7973, + "induced": 24103, + "inducted": 20596, + "inductee": 39558, + "inductees": 44796, + "induction": 18338, + "indul": 19402, + "indulg": 28388, + "indulge": 24851, + "indulgence": 40856, + "indulgent": 49147, + "industri": 5082, + "industrial": 30853, + "industrial": 7520, + "industries": 11700, + "industry": 47407, + "industry": 3318, + "indv": 16942, + "indy": 9821, + "indy": 10098, + "indycar": 20484, + "indyref": 22569, + "ine": 855, + "ine": 715, + "ineau": 38122, + "inec": 45214, + "ined": 2038, + "inee": 43252, + "inee": 7986, + "inees": 13056, + "ineffe": 47202, + "inely": 18234, + "inem": 48876, + "inema": 29232, + "inen": 44365, + "inequalities": 45507, + "inequality": 17372, + "iner": 17438, + "iner": 5155, + "iners": 41863, + "ines": 2137, + "inese": 35966, + "iness": 1463, + "inet": 8121, + "inette": 38911, + "inev": 19527, + "inevit": 45871, + "inevitable": 25004, + "inews": 24300, + "inexpensive": 38614, + "iney": 30254, + "inez": 12700, + "inf": 1529, + "inf": 35241, + "infamous": 18688, + "infan": 17219, + "infant": 19192, + "infantry": 21655, + "infants": 34726, + "infe": 7164, + "infec": 26088, + "infected": 26136, + "infection": 14774, + "infections": 22227, + "infectious": 29157, + "infeld": 25035, + "infer": 16258, + "inferno": 31290, + "infertility": 40701, + "infield": 48933, + "infiltr": 28683, + "infin": 6246, + "infinite": 12748, + "infiniti": 34644, + "infinity": 34863, + "infinity": 12895, + "infl": 7627, + "inflam": 16080, + "inflammation": 24893, + "inflammatory": 26831, + "inflatable": 30135, + "inflation": 17497, + "inflicted": 48188, + "influ": 4835, + "influen": 13229, + "influence": 9199, + "influenced": 21183, + "influencer": 25013, + "influencers": 29891, + "influences": 24926, + "influencing": 45126, + "influential": 17553, + "influenza": 39897, + "info": 5680, + "info": 2222, + "infographic": 10076, + "infographics": 33172, + "infor": 31773, + "inform": 10241, + "inform": 19449, + "informal": 25705, + "informat": 29625, + "informatics": 35685, + "information": 3204, + "informative": 19364, + "informed": 13876, + "informing": 45388, + "informs": 48440, + "infosec": 17863, + "infr": 29718, + "infra": 7312, + "infra": 45877, + "infrared": 22867, + "infrastructure": 9034, + "infringe": 44882, + "infringement": 48712, + "infront": 37668, + "infu": 15048, + "infuri": 48461, + "infused": 21461, + "infusion": 43464, + "ing": 653, + "ing": 519, + "inga": 15233, + "ingco": 40444, + "ingday": 16561, + "ingdon": 38731, + "inge": 11790, + "inge": 7071, + "inged": 30046, + "ingen": 19088, + "ingeni": 36884, + "inger": 33883, + "inger": 3541, + "ingfor": 33430, + "ingh": 9170, + "ingh": 30495, + "ingham": 24497, + "ingham": 4291, + "inghamshire": 39289, + "inghour": 42728, + "inging": 4066, + "ingl": 45662, + "ingle": 22228, + "ingle": 17005, + "ingles": 24490, + "ingley": 44428, + "inglis": 46327, + "ingly": 4796, + "ingnow": 34766, + "ingo": 30175, + "ingo": 9012, + "ingra": 45165, + "ingrad": 44124, + "ingram": 26998, + "ingredi": 9272, + "ingredient": 19799, + "ingredients": 11788, + "ingrid": 33496, + "ings": 895, + "ingthe": 20170, + "ingtips": 39373, + "ington": 11846, + "ington": 2156, + "ingu": 8714, + "ingual": 22795, + "ingue": 36838, + "ingui": 12788, + "inguish": 36146, + "inha": 32612, + "inhabit": 36189, + "inhabitants": 44968, + "inhal": 30786, + "inhe": 32617, + "inher": 24611, + "inherent": 47327, + "inherit": 34322, + "inheritance": 39341, + "inherited": 39111, + "inhi": 25557, + "inhibit": 32196, + "inho": 12984, + "ini": 6154, + "ini": 3581, + "inian": 36638, + "inim": 38717, + "inindia": 34021, + "ining": 1389, + "inist": 30976, + "init": 42670, + "initi": 4580, + "initial": 13980, + "initially": 28123, + "initials": 48794, + "initiated": 27756, + "initiation": 41009, + "initiative": 8152, + "initiatives": 16549, + "inity": 22126, + "inj": 5112, + "injec": 13688, + "injection": 21438, + "inju": 5006, + "injured": 7505, + "injuries": 9481, + "injury": 6223, + "injustice": 20541, + "ink": 4547, + "ink": 967, + "inka": 40685, + "inked": 29356, + "inki": 46176, + "inkigayo": 47882, + "inking": 37586, + "inks": 20966, + "inktober": 9387, + "inland": 21943, + "inlet": 35161, + "inline": 45004, + "inlove": 28415, + "inmate": 32341, + "inmates": 28216, + "inmy": 42657, + "inn": 27260, + "inn": 5569, + "inna": 35088, + "inner": 24512, + "inner": 6955, + "inning": 4415, + "innings": 11580, + "innis": 44059, + "inno": 7961, + "innocence": 26383, + "innocent": 11241, + "innov": 2890, + "innovate": 24549, + "innovation": 33063, + "innovation": 4272, + "innovations": 18817, + "innovative": 8494, + "innovator": 34735, + "innovators": 27834, + "ino": 4211, + "ino": 2691, + "inoa": 25649, + "inos": 21828, + "inous": 47801, + "inox": 22698, + "input": 16952, + "inputs": 48763, + "inqu": 10628, + "inqui": 18527, + "inquirer": 45172, + "inquiries": 29469, + "inquiry": 15865, + "inquis": 31171, + "inr": 36325, + "ins": 12786, + "ins": 1041, + "insan": 7875, + "insane": 10260, + "insanely": 27846, + "insanity": 26645, + "inscribed": 49168, + "inscription": 41127, + "insec": 15744, + "insect": 21297, + "insects": 18714, + "insecure": 35112, + "insecurity": 36964, + "inser": 13830, + "insert": 18807, + "insi": 3453, + "inside": 19141, + "inside": 2912, + "insider": 13300, + "insiders": 32171, + "insig": 40503, + "insight": 8795, + "insightful": 20354, + "insights": 8729, + "insignia": 48864, + "insist": 35504, + "insisted": 40423, + "insists": 27255, + "inski": 32630, + "insky": 24607, + "insol": 42366, + "insom": 21755, + "insomni": 42040, + "insomnia": 30598, + "inson": 21007, + "insp": 1597, + "inspec": 7915, + "inspect": 40815, + "inspecting": 40565, + "inspection": 15142, + "inspections": 39513, + "inspector": 20514, + "inspir": 2573, + "inspiration": 4195, + "inspirational": 41936, + "inspirational": 9855, + "inspirations": 35093, + "inspire": 27901, + "inspire": 8583, + "inspired": 39849, + "inspired": 3516, + "inspires": 17245, + "inspiring": 41847, + "inspiring": 5705, + "inspo": 26897, + "inst": 1264, + "inst": 1581, + "insta": 22411, + "insta": 11694, + "instability": 41377, + "instac": 46678, + "instaf": 33800, + "instag": 14612, + "instagood": 23718, + "instagram": 27910, + "instagram": 2659, + "instal": 38805, + "install": 6940, + "install": 11168, + "installation": 9358, + "installations": 27909, + "installed": 8807, + "installing": 18301, + "installment": 25315, + "installs": 45568, + "instalment": 47766, + "instance": 34572, + "instant": 38810, + "instant": 10635, + "instantly": 17703, + "instap": 23758, + "instapic": 34378, + "instaweather": 43078, + "instaweatherpro": 43150, + "inste": 3571, + "instead": 4191, + "instein": 13421, + "instem": 27030, + "instin": 23382, + "instinct": 30544, + "institu": 4257, + "institute": 5861, + "institutes": 43674, + "institution": 18823, + "institutional": 27442, + "institutions": 15207, + "instore": 41679, + "instru": 4544, + "instruc": 19648, + "instruction": 19407, + "instructional": 31022, + "instructions": 17040, + "instructor": 16087, + "instructors": 31998, + "instrument": 42196, + "instrument": 15806, + "instrumental": 23041, + "instruments": 14793, + "instyle": 41321, + "insu": 8805, + "insul": 9615, + "insulated": 42051, + "insulation": 28194, + "insulin": 29311, + "insult": 26673, + "insulting": 39646, + "insults": 40451, + "insur": 5024, + "insurance": 5870, + "insured": 31321, + "insurers": 43142, + "insurtech": 28716, + "int": 1828, + "int": 1207, + "inta": 38314, + "intact": 26870, + "intake": 19539, + "intan": 47695, + "inte": 1598, + "inte": 41900, + "intech": 26504, + "inted": 6147, + "integr": 5151, + "integral": 27018, + "integrate": 25735, + "integrated": 12797, + "integrating": 31555, + "integration": 12583, + "integrity": 14791, + "intel": 11778, + "intel": 11426, + "intellec": 13281, + "intellect": 47828, + "intellectu": 31966, + "intellectual": 18069, + "intelli": 5324, + "intellig": 5632, + "intelligence": 6846, + "intelligent": 14063, + "inten": 2967, + "intend": 36674, + "intended": 16812, + "intense": 10258, + "intensi": 22928, + "intensity": 19956, + "intensive": 21049, + "intent": 18881, + "intention": 26786, + "intentional": 29536, + "intentionally": 31215, + "intentions": 26710, + "inter": 1006, + "inter": 10093, + "interact": 21736, + "interacting": 35045, + "interaction": 17650, + "interactions": 22162, + "interactive": 9456, + "intercep": 23676, + "interception": 48762, + "interceptions": 45313, + "interchange": 34222, + "intercontinental": 31983, + "interdisciplinary": 38132, + "intere": 2008, + "interest": 5095, + "interested": 4620, + "interesting": 3628, + "interests": 16425, + "interface": 18753, + "interfaith": 38399, + "interference": 29099, + "interim": 19509, + "interior": 10700, + "interior": 7305, + "interiordesign": 12902, + "interiors": 14836, + "intermedi": 20246, + "intermediate": 24304, + "intermission": 44805, + "intermitt": 44946, + "intern": 9976, + "intern": 14068, + "internal": 11285, + "internally": 41134, + "internation": 42534, + "international": 8566, + "international": 2436, + "internationaldayof": 41518, + "internationally": 24059, + "internationalwomensday": 17682, + "interne": 32713, + "internet": 30180, + "internet": 4757, + "internetof": 44449, + "internetofthings": 45925, + "interns": 19902, + "internship": 16661, + "internships": 39410, + "interoper": 45754, + "interpre": 11162, + "interpret": 49154, + "interpret": 40459, + "interpretation": 20652, + "interpreted": 42157, + "interpreting": 46525, + "interro": 29548, + "interrup": 21609, + "interrupt": 48449, + "interrupted": 30288, + "intersec": 45246, + "intersection": 19210, + "interstate": 21963, + "interstellar": 41506, + "interval": 36032, + "intervals": 44884, + "interven": 18245, + "intervention": 16804, + "interventions": 28848, + "interview": 2885, + "interviewed": 11688, + "interviewing": 16399, + "interviews": 9910, + "intestin": 37938, + "intestinal": 38896, + "inthe": 7486, + "inti": 14459, + "intim": 38832, + "intimacy": 46430, + "intimate": 16382, + "intimid": 24041, + "intimidating": 44405, + "intimidation": 49258, + "inting": 15571, + "intl": 38186, + "intl": 14224, + "intment": 9020, + "intments": 21420, + "into": 35235, + "into": 1095, + "intoler": 28534, + "intolerance": 37808, + "intothe": 38511, + "intra": 20922, + "intrac": 46195, + "intram": 40956, + "intre": 29397, + "intrepid": 39127, + "intri": 15421, + "intric": 23763, + "intricate": 29616, + "intrigu": 18856, + "intrigue": 45140, + "intrigued": 40034, + "intriguing": 24334, + "intrin": 45181, + "intro": 2999, + "intro": 13224, + "introduc": 3621, + "introduce": 9813, + "introduced": 10446, + "introduces": 12933, + "introducing": 6256, + "introduction": 11812, + "introductory": 38121, + "intru": 22949, + "ints": 2514, + "intu": 17225, + "intuition": 40897, + "intuitive": 35224, + "inu": 21131, + "inuit": 41250, + "inus": 45857, + "inv": 2279, + "inv": 43786, + "inva": 10084, + "invade": 34609, + "invaded": 32596, + "invaders": 35188, + "invading": 40101, + "invali": 31592, + "invalid": 46998, + "invaluable": 33976, + "invasi": 38100, + "invasion": 13378, + "invasive": 19554, + "inve": 2024, + "inven": 26233, + "invent": 11665, + "invent": 23558, + "invented": 14100, + "invention": 23607, + "inventions": 44914, + "inventor": 22836, + "inventory": 19444, + "inver": 12061, + "inverness": 33080, + "inverte": 46397, + "inverted": 40709, + "invest": 4180, + "invest": 9716, + "invested": 22536, + "investig": 4626, + "investigate": 15703, + "investigated": 29180, + "investigates": 29621, + "investigating": 13713, + "investigation": 8194, + "investigations": 24020, + "investigative": 30233, + "investigator": 30528, + "investigators": 24121, + "investin": 40195, + "investing": 10554, + "investment": 5605, + "investments": 14675, + "investor": 15490, + "investors": 10486, + "invests": 38378, + "invic": 25253, + "invigor": 48722, + "invin": 30252, + "invincible": 38052, + "invisible": 16093, + "invit": 12454, + "invitation": 15032, + "invitational": 14511, + "invitations": 40120, + "invite": 8109, + "invited": 7731, + "invites": 16034, + "inviting": 14349, + "invo": 29417, + "invol": 4000, + "involve": 26325, + "involved": 5320, + "involvement": 19502, + "involves": 22652, + "involving": 14786, + "inwx": 35674, + "iny": 23257, + "inyour": 47954, + "io": 3167, + "io": 3752, + "ioc": 43018, + "iom": 33000, + "iom": 31135, + "ion": 14871, + "ion": 3668, + "ions": 26289, + "ior": 7354, + "ior": 2498, + "iority": 46016, + "iors": 6427, + "ios": 6614, + "iot": 32694, + "iot": 6627, + "iota": 37294, + "ious": 6994, + "iously": 38233, + "iow": 7439, + "iowa": 38847, + "iowa": 8290, + "ip": 1719, + "ip": 8600, + "ipa": 11199, + "ipad": 39067, + "ipad": 7491, + "ipads": 35281, + "ipc": 41981, + "iphone": 26030, + "iphone": 4314, + "iphones": 37561, + "ipl": 13440, + "ipment": 37824, + "ipo": 40218, + "ipo": 24090, + "ipod": 17889, + "ipp": 31706, + "ips": 26910, + "ipsw": 22221, + "ipswich": 24494, + "iq": 15554, + "iq": 19996, + "iqbal": 33553, + "ir": 582, + "ir": 742, + "ira": 4923, + "ira": 5371, + "irah": 35724, + "iran": 19273, + "iran": 5075, + "irandeal": 46533, + "irani": 37984, + "iranian": 14158, + "iraq": 8543, + "iraqi": 18617, + "irc": 41527, + "ird": 2770, + "ire": 3013, + "ire": 1454, + "ired": 32728, + "ired": 2995, + "ireland": 32806, + "ireland": 4157, + "irene": 21600, + "ires": 12435, + "irez": 21581, + "irgc": 47942, + "iri": 2155, + "iri": 13880, + "irical": 33366, + "irie": 42979, + "irina": 46664, + "iring": 10169, + "iris": 16437, + "irish": 9386, + "irish": 4889, + "irl": 34494, + "irl": 8570, + "irling": 26493, + "irls": 24344, + "irma": 22406, + "irn": 42603, + "iro": 23209, + "iro": 7280, + "iron": 7699, + "iron": 5391, + "ironic": 24518, + "ironically": 36779, + "ironing": 46655, + "ironman": 20330, + "irons": 30032, + "irony": 20681, + "irport": 27769, + "irr": 24641, + "irrational": 47413, + "irregular": 38692, + "irrelevant": 34677, + "irresi": 31200, + "irresistible": 35252, + "irresponsible": 44714, + "irri": 21484, + "irrigation": 23761, + "irrit": 24218, + "irs": 6086, + "irst": 32701, + "iru": 48206, + "irvin": 47053, + "irvine": 24201, + "irving": 19738, + "irwin": 23750, + "iry": 7239, + "is": 595, + "is": 533, + "isa": 11034, + "isa": 6536, + "isaac": 37544, + "isaac": 13659, + "isab": 13357, + "isabel": 27466, + "isabella": 26192, + "isabelle": 31072, + "isable": 46631, + "isai": 15365, + "isaiah": 17952, + "isak": 40619, + "isance": 46893, + "isation": 7194, + "isback": 43811, + "isc": 39316, + "isch": 47888, + "isco": 5736, + "iscoming": 26458, + "isd": 46816, + "isd": 12002, + "ise": 7669, + "ise": 1479, + "ised": 2861, + "iselle": 48491, + "iser": 23080, + "iser": 5626, + "isers": 34879, + "ises": 5153, + "isf": 44036, + "isgreat": 34595, + "ish": 6844, + "ish": 1061, + "isha": 28050, + "ishable": 37949, + "ished": 35341, + "ishere": 46053, + "ishi": 26224, + "ishq": 27996, + "ishqba": 32503, + "ishqbaaaz": 36591, + "isi": 7233, + "isi": 17880, + "isil": 34636, + "isin": 37676, + "ising": 3426, + "isis": 7531, + "isk": 30171, + "isl": 31368, + "isla": 22807, + "islam": 6003, + "islam": 8770, + "islamabad": 19959, + "islamic": 31627, + "islamic": 9552, + "islamist": 38798, + "islamophobia": 43459, + "island": 13408, + "island": 2619, + "islander": 45651, + "islanders": 27804, + "islands": 7145, + "islay": 49279, + "isle": 19082, + "isle": 11849, + "isleof": 24718, + "isles": 21816, + "islife": 26433, + "islington": 34945, + "ism": 47730, + "ism": 1935, + "isma": 43937, + "ismail": 36140, + "isme": 43570, + "ismo": 41926, + "isms": 18700, + "isn": 2923, + "isner": 48246, + "isnow": 43694, + "isnt": 19416, + "iso": 2462, + "iso": 12263, + "isol": 11414, + "isolated": 19044, + "isolation": 26400, + "ison": 12949, + "ison": 4553, + "isons": 33318, + "isoo": 35857, + "isp": 31397, + "isp": 39041, + "isra": 3591, + "israel": 20837, + "israel": 4779, + "israeli": 8994, + "israelis": 45713, + "isreal": 47147, + "isro": 44841, + "iss": 11738, + "iss": 4950, + "issa": 38579, + "issa": 7560, + "issan": 49358, + "issance": 40828, + "issant": 38828, + "isse": 18986, + "ission": 37946, + "issu": 2049, + "issue": 3202, + "issued": 9246, + "issues": 4082, + "issuing": 37226, + "ist": 9751, + "ist": 2304, + "istanbul": 12258, + "istandwith": 33820, + "iste": 32563, + "ister": 14555, + "isthe": 46748, + "istic": 29556, + "ists": 8426, + "isu": 17030, + "isu": 23328, + "it": 529, + "it": 585, + "ita": 36920, + "ita": 2864, + "itable": 8915, + "ital": 2306, + "ital": 1660, + "itali": 11644, + "italia": 11025, + "italian": 20264, + "italian": 5175, + "italians": 44744, + "italk": 32894, + "italy": 4052, + "itan": 18383, + "itans": 40711, + "itar": 47161, + "itarian": 11599, + "itary": 17604, + "itas": 31634, + "itas": 13436, + "itate": 42457, + "itated": 36744, + "itation": 5070, + "itative": 22892, + "itc": 36449, + "itch": 2387, + "itch": 8147, + "itchen": 32664, + "itchy": 41980, + "ite": 2732, + "ite": 802, + "iteam": 37828, + "itec": 3099, + "itec": 43936, + "itech": 44215, + "itech": 23040, + "ited": 8603, + "ited": 1108, + "itel": 44638, + "itely": 4605, + "item": 8532, + "items": 6207, + "iter": 7938, + "iter": 19773, + "iteracy": 39634, + "iterate": 43106, + "iteration": 38790, + "ites": 2454, + "itez": 42131, + "itf": 35436, + "itfc": 36519, + "ith": 6133, + "ith": 1757, + "ithaca": 46257, + "iti": 760, + "iti": 6165, + "itia": 22634, + "itian": 23365, + "itic": 11950, + "itical": 48767, + "itics": 33967, + "ities": 41423, + "ities": 1480, + "itim": 15676, + "itiner": 32803, + "itinerary": 41564, + "iting": 1257, + "ition": 25263, + "ition": 1104, + "itions": 5540, + "itious": 13329, + "itis": 33539, + "itis": 8388, + "itive": 3067, + "itly": 42240, + "ito": 22167, + "ito": 4661, + "iton": 21119, + "itor": 47267, + "itor": 4584, + "itors": 22005, + "itos": 24560, + "its": 7140, + "its": 902, + "itsa": 45032, + "itself": 7290, + "itsme": 41125, + "itss": 47040, + "itt": 1031, + "itt": 11228, + "itta": 21233, + "itte": 31962, + "itted": 24429, + "itten": 30014, + "itten": 4343, + "itter": 11456, + "itters": 13082, + "itti": 28629, + "ittin": 25646, + "itting": 3147, + "ittle": 24208, + "ittle": 21366, + "ittles": 38989, + "itton": 25707, + "itty": 35096, + "itu": 1668, + "itu": 32128, + "itude": 43382, + "itude": 5012, + "itudes": 20459, + "itunes": 7007, + "itup": 35838, + "iture": 25547, + "itus": 24364, + "itutes": 32883, + "itv": 20159, + "itv": 12805, + "ity": 2480, + "ity": 696, + "itya": 32055, + "itz": 14544, + "itz": 7807, + "iu": 14292, + "iu": 15575, + "ium": 10762, + "ius": 6740, + "iv": 6775, + "iv": 9315, + "iva": 42463, + "ivan": 15544, + "ivan": 15689, + "ivanka": 37914, + "ive": 26885, + "ive": 8653, + "ived": 15654, + "iver": 36849, + "iver": 44254, + "ives": 27333, + "ivf": 39159, + "iving": 45136, + "ivory": 16776, + "ivote": 45835, + "ivy": 36939, + "ivy": 16045, + "iw": 13058, + "iw": 46604, + "iwant": 42747, + "iwd": 16815, + "iwm": 44237, + "ix": 13272, + "ix": 8756, + "iy": 13704, + "iya": 18595, + "iyaki": 48395, + "iz": 2845, + "iz": 8407, + "iza": 37704, + "ization": 10847, + "ize": 10885, + "ized": 7690, + "izen": 34776, + "izer": 23895, + "izes": 45434, + "izing": 17354, + "izo": 46910, + "izz": 31779, + "izz": 46128, + "izzy": 28861, + "j": 73, + "j": 329, + "ja": 1586, + "ja": 2641, + "jaan": 25052, + "jab": 8059, + "jab": 9439, + "jac": 2293, + "jac": 30198, + "jace": 43286, + "jack": 2679, + "jack": 3267, + "jacked": 27923, + "jacket": 6164, + "jackets": 14745, + "jacki": 47418, + "jackie": 28023, + "jackie": 11716, + "jacking": 40929, + "jackman": 35723, + "jackpot": 23926, + "jacks": 19649, + "jackson": 12321, + "jackson": 4363, + "jacksonville": 19263, + "jaco": 6840, + "jacob": 14385, + "jacob": 9222, + "jacobs": 17482, + "jacobson": 46826, + "jacqu": 14495, + "jacqueline": 22843, + "jacques": 17799, + "jad": 12976, + "jad": 38691, + "jada": 37416, + "jade": 25123, + "jade": 14513, + "jaden": 37174, + "jadine": 37445, + "jae": 16869, + "jae": 15765, + "jaejoong": 43610, + "jaf": 19362, + "jag": 7984, + "jag": 36236, + "jagan": 48530, + "jagger": 30835, + "jags": 31086, + "jagu": 10096, + "jaguar": 44777, + "jaguar": 14757, + "jaguars": 21854, + "jah": 20067, + "jah": 11084, + "jahan": 44404, + "jahan": 47827, + "jai": 10542, + "jai": 13819, + "jail": 18574, + "jail": 9332, + "jailbreak": 45990, + "jailed": 19456, + "jails": 47833, + "jaime": 24716, + "jain": 21999, + "jaipur": 23593, + "jais": 48607, + "jait": 28910, + "jaitley": 32776, + "jak": 9225, + "jak": 30589, + "jakarta": 15471, + "jake": 13140, + "jake": 7419, + "jakob": 47358, + "jal": 8380, + "jal": 26773, + "jalan": 27270, + "jalap": 49081, + "jalape": 34263, + "jalapeño": 43017, + "jalen": 33548, + "jam": 1434, + "jam": 5201, + "jama": 8977, + "jama": 35366, + "jamaica": 13019, + "jamaican": 25144, + "jamal": 26108, + "jambo": 35599, + "jamboree": 38506, + "jame": 12341, + "james": 6963, + "james": 2392, + "jamesbond": 44704, + "jamesc": 47004, + "jameson": 31731, + "jami": 15092, + "jamie": 16454, + "jamie": 8078, + "jamiedor": 34310, + "jamiedornan": 34896, + "jammed": 35590, + "jammin": 35223, + "jamming": 25862, + "jammu": 25926, + "jams": 20243, + "jan": 1891, + "jan": 3334, + "jana": 18182, + "jane": 12389, + "jane": 6736, + "janeiro": 31740, + "janet": 29665, + "janet": 15872, + "jang": 41526, + "jang": 22074, + "jani": 22606, + "janice": 36048, + "janine": 46896, + "janis": 44233, + "jann": 35377, + "jans": 22578, + "jansen": 45354, + "janu": 3623, + "january": 3697, + "jap": 2299, + "jap": 49062, + "japan": 4502, + "japan": 3400, + "japanese": 27211, + "japanese": 4925, + "japs": 42121, + "jar": 5120, + "jar": 10837, + "jard": 25778, + "jardin": 37371, + "jare": 17654, + "jared": 35597, + "jared": 12571, + "jaredle": 36739, + "jaredleto": 37106, + "jaro": 35505, + "jarpad": 44497, + "jarre": 23385, + "jarrett": 30531, + "jars": 27583, + "jarvis": 29286, + "jas": 4492, + "jas": 17559, + "jasmin": 42989, + "jasmin": 47700, + "jasmine": 17056, + "jason": 10009, + "jason": 5395, + "jasper": 19827, + "jat": 26106, + "jau": 26932, + "jauregui": 48175, + "jav": 6234, + "java": 12918, + "javascri": 16289, + "javascript": 16423, + "jave": 46218, + "javed": 42268, + "javelin": 41701, + "javi": 47627, + "javier": 23307, + "jaw": 14804, + "jaw": 17307, + "jawa": 44790, + "jaws": 25491, + "jax": 22348, + "jax": 12390, + "jay": 3427, + "jay": 4155, + "jaya": 21960, + "jayanti": 37732, + "jaye": 45703, + "jayne": 35228, + "jays": 12393, + "jaz": 3465, + "jaz": 32874, + "jazeera": 38260, + "jazz": 11488, + "jazz": 4528, + "jazzfest": 36683, + "jazzy": 28191, + "jb": 21915, + "jb": 13637, + "jc": 14991, + "jc": 11517, + "jd": 18289, + "jd": 14125, + "jdm": 42013, + "je": 1013, + "je": 8776, + "jeal": 9964, + "jealous": 11093, + "jealousy": 37654, + "jean": 13943, + "jean": 6473, + "jeanette": 48167, + "jeanne": 29201, + "jeans": 10157, + "jeb": 35101, + "jec": 1347, + "ject": 6070, + "jed": 12166, + "jed": 38748, + "jeddah": 40982, + "jedi": 16681, + "jee": 29250, + "jee": 14870, + "jeep": 16593, + "jeep": 11286, + "jeeplife": 43100, + "jeet": 45542, + "jeet": 30944, + "jef": 10276, + "jeff": 6245, + "jeff": 5550, + "jefferson": 44711, + "jefferson": 13976, + "jeffery": 41470, + "jeffree": 45994, + "jeffrey": 32886, + "jeffrey": 16027, + "jeho": 42437, + "jeky": 43893, + "jekyll": 49405, + "jel": 9794, + "jelena": 48218, + "jelly": 19110, + "jelly": 13762, + "jellyfish": 30988, + "jem": 46326, + "jem": 37530, + "jen": 2554, + "jen": 12997, + "jenkins": 16162, + "jenn": 33921, + "jenn": 29869, + "jenna": 17125, + "jenner": 14260, + "jenni": 6774, + "jennie": 28875, + "jennifer": 19786, + "jennifer": 8613, + "jennings": 21564, + "jenny": 20165, + "jenny": 13414, + "jens": 40806, + "jensen": 35558, + "jensen": 19004, + "jensenackles": 41011, + "jeon": 45200, + "jeon": 43337, + "jeong": 47146, + "jeong": 39264, + "jeopar": 22988, + "jeopardy": 29613, + "jer": 2310, + "jer": 35307, + "jere": 5614, + "jeremi": 22362, + "jeremiah": 27301, + "jeremy": 14656, + "jeremy": 8127, + "jeremycorbyn": 37484, + "jeric": 25084, + "jericho": 28892, + "jerk": 23917, + "jerky": 40079, + "jermaine": 40722, + "jerome": 19876, + "jerry": 18163, + "jerry": 9164, + "jersey": 21921, + "jersey": 4471, + "jerseys": 15518, + "jerus": 12257, + "jerusalem": 12557, + "jes": 7686, + "jes": 35826, + "jess": 5313, + "jess": 13758, + "jesse": 23112, + "jesse": 11770, + "jessi": 24373, + "jessic": 14881, + "jessica": 45421, + "jessica": 8178, + "jessie": 19424, + "jester": 44225, + "jesu": 19777, + "jesuit": 33234, + "jesus": 4070, + "jet": 11515, + "jet": 6565, + "jetblue": 45021, + "jeter": 38450, + "jets": 38584, + "jets": 10025, + "jett": 44541, + "jetty": 46382, + "jew": 27450, + "jewel": 4880, + "jewel": 17591, + "jewell": 9777, + "jewellers": 46265, + "jewellery": 11192, + "jewelry": 28018, + "jewelry": 6039, + "jewels": 20205, + "jewish": 29594, + "jewish": 9104, + "jews": 14200, + "jf": 31130, + "jf": 33718, + "jfc": 43652, + "jfk": 18486, + "jg": 41986, + "jg": 35138, + "jh": 24858, + "jh": 21485, + "jha": 47012, + "jha": 38092, + "jhal": 45695, + "jhar": 31546, + "jharkhand": 39001, + "jhb": 34631, + "ji": 3252, + "ji": 2697, + "jia": 32907, + "jian": 33427, + "jiang": 43309, + "jiang": 25762, + "jic": 48350, + "jic": 40215, + "jid": 24403, + "jie": 40005, + "jig": 15136, + "jig": 47430, + "jigsaw": 32987, + "jiha": 23194, + "jihad": 29637, + "jihoon": 44765, + "jil": 36225, + "jill": 24136, + "jill": 15254, + "jillian": 37820, + "jim": 3190, + "jim": 4550, + "jima": 20679, + "jimcantore": 43950, + "jimenez": 35947, + "jimi": 30565, + "jimin": 16286, + "jimmie": 45679, + "jimmy": 12215, + "jimmy": 6817, + "jimmyfallon": 45265, + "jin": 7927, + "jin": 8485, + "jind": 40609, + "jing": 34933, + "jing": 28607, + "jingle": 28699, + "jinnah": 43141, + "jinping": 39308, + "jinx": 42977, + "jinyoung": 38051, + "jio": 40501, + "jis": 25988, + "jis": 23515, + "jisoo": 43070, + "jit": 11947, + "jit": 20308, + "jitsu": 24530, + "jiu": 43351, + "jiu": 44123, + "jj": 12502, + "jj": 12790, + "jk": 20189, + "jk": 9702, + "jkt": 21494, + "jl": 25027, + "jl": 22911, + "jlo": 31017, + "jm": 24044, + "jm": 18657, + "jn": 24576, + "jn": 21717, + "jnr": 37145, + "jnu": 47142, + "jo": 683, + "jo": 3804, + "joachim": 48979, + "joan": 28064, + "joan": 12710, + "joann": 35484, + "joanna": 25357, + "joanne": 43736, + "joanne": 25092, + "joao": 45666, + "joaqu": 25140, + "joaquin": 30745, + "job": 13114, + "job": 2075, + "jobs": 3735, + "jobsearch": 45459, + "joburg": 39343, + "jocel": 36879, + "jocelyn": 47259, + "jock": 34485, + "jockey": 20126, + "jodh": 48689, + "jodi": 36812, + "jodi": 26888, + "jodie": 33100, + "jody": 32959, + "joe": 9309, + "joe": 3305, + "joel": 19819, + "joel": 11429, + "joes": 34756, + "joey": 16281, + "joey": 10455, + "jog": 37967, + "jog": 31691, + "jogging": 37922, + "joh": 1201, + "johan": 17416, + "johan": 27789, + "johann": 31180, + "johanna": 41494, + "johannes": 37779, + "johannesburg": 28377, + "johansson": 41512, + "johar": 34871, + "john": 2004, + "john": 1742, + "johncena": 46820, + "johnnie": 47947, + "johnny": 14464, + "johnny": 6904, + "johns": 14515, + "johnson": 26036, + "johnson": 4010, + "johnston": 19791, + "johnstone": 40766, + "johor": 34750, + "join": 14737, + "join": 1384, + "joined": 4954, + "joining": 5118, + "joins": 5681, + "joint": 6640, + "jointhe": 30422, + "jointly": 37471, + "joints": 27204, + "jojo": 41484, + "jojo": 22075, + "joke": 7198, + "joker": 18200, + "jokers": 44101, + "jokes": 11336, + "joking": 26112, + "joko": 44975, + "jol": 9174, + "jol": 36470, + "jolie": 31633, + "jolla": 46109, + "jolly": 21516, + "jom": 32152, + "jon": 3026, + "jon": 6139, + "jona": 6629, + "jonah": 47934, + "jonah": 27556, + "jonas": 42373, + "jonas": 13650, + "jonathan": 19026, + "jonathan": 7762, + "jone": 33934, + "jones": 19091, + "jones": 3538, + "jong": 20214, + "jong": 14726, + "jonghyun": 29023, + "jongin": 36957, + "joni": 43177, + "jonny": 28454, + "jonny": 21895, + "joo": 25807, + "joo": 27680, + "joom": 47543, + "joon": 18547, + "joong": 26544, + "jop": 30486, + "joplin": 42688, + "jor": 2482, + "jor": 31595, + "jordan": 14644, + "jordan": 4388, + "jordani": 46898, + "jordi": 44795, + "jorge": 48761, + "jorge": 18225, + "jos": 20560, + "jos": 19661, + "jose": 4647, + "jose": 7075, + "josef": 36584, + "josel": 47800, + "joseph": 14163, + "joseph": 6478, + "josephine": 34866, + "josh": 9998, + "josh": 5679, + "joshi": 24786, + "joshu": 9112, + "joshua": 11852, + "josi": 33583, + "josie": 33167, + "joss": 42834, + "josé": 27922, + "jou": 19921, + "jou": 32029, + "jour": 2078, + "jour": 17142, + "journ": 4563, + "journal": 6626, + "journalism": 10123, + "journalist": 9914, + "journalists": 12249, + "journals": 24391, + "journe": 48833, + "journey": 32156, + "journey": 3749, + "journeys": 23329, + "journo": 37034, + "journos": 46437, + "jovi": 33866, + "joy": 6308, + "joy": 4273, + "joyce": 43753, + "joyce": 15275, + "joye": 34052, + "joyeux": 41876, + "joyful": 24139, + "joyous": 32245, + "joyride": 46949, + "joys": 22996, + "jp": 18249, + "jp": 10557, + "jpg": 36950, + "jpn": 36212, + "jr": 13973, + "jr": 3605, + "js": 46243, + "js": 8006, + "jst": 26523, + "jt": 39480, + "jt": 18119, + "ju": 669, + "ju": 9970, + "jual": 38720, + "juan": 17148, + "juan": 9274, + "juana": 9081, + "jubi": 15485, + "jubil": 47743, + "jubilee": 16907, + "juco": 31570, + "jud": 8363, + "juda": 32478, + "judah": 41066, + "judaism": 42217, + "judas": 39532, + "judd": 29770, + "judg": 20012, + "judge": 16824, + "judge": 5656, + "judged": 33453, + "judgement": 25246, + "judges": 12575, + "judging": 16570, + "judgment": 24191, + "judi": 42546, + "judice": 28032, + "judicial": 19579, + "judiciary": 24545, + "judith": 24047, + "judo": 27011, + "judy": 34663, + "judy": 16510, + "jug": 27619, + "jugg": 38628, + "juic": 38761, + "juice": 37954, + "juice": 6916, + "juices": 36757, + "juicy": 17623, + "juju": 43020, + "juke": 32519, + "jukebox": 36411, + "jul": 34662, + "jul": 15975, + "jule": 40819, + "jules": 21996, + "juli": 3614, + "juli": 49160, + "julia": 10207, + "julian": 25459, + "julian": 12643, + "juliana": 46059, + "julie": 22534, + "julie": 10505, + "julien": 32595, + "juliet": 20641, + "juliette": 44804, + "julio": 24888, + "julius": 20870, + "july": 2272, + "jum": 20791, + "jumbo": 24678, + "jume": 45989, + "jump": 5519, + "jump": 6423, + "jumped": 16901, + "jumper": 16558, + "jumpers": 36485, + "jumping": 11476, + "jumpman": 48803, + "jumps": 18911, + "jumpsuit": 31044, + "jun": 1637, + "jun": 7719, + "junction": 11320, + "june": 23188, + "june": 2345, + "jung": 13086, + "jung": 13031, + "jungkook": 20040, + "jungle": 42421, + "jungle": 10865, + "juni": 4029, + "junior": 21167, + "junior": 5027, + "juniors": 16811, + "juniper": 33829, + "junk": 16000, + "junkie": 27613, + "junkies": 41207, + "juno": 28845, + "junto": 34282, + "jupit": 15270, + "jupiter": 16212, + "jur": 15896, + "jura": 14715, + "jurassic": 28844, + "jurassic": 21255, + "jurgen": 39263, + "juris": 37010, + "jurisdic": 37714, + "jury": 12931, + "jus": 14999, + "just": 1770, + "just": 761, + "justi": 14700, + "justic": 30399, + "justice": 16904, + "justice": 3604, + "justicefor": 25812, + "justiceleague": 41929, + "justices": 44356, + "justified": 34546, + "justify": 28192, + "justin": 7537, + "justin": 4394, + "justinbieber": 12501, + "justine": 34418, + "justintrudeau": 32184, + "justsaying": 42922, + "juve": 47717, + "juve": 23092, + "juven": 12944, + "juvenile": 19333, + "juvent": 13908, + "juventus": 47378, + "juventus": 16208, + "jux": 33552, + "juxta": 34964, + "jv": 37932, + "jv": 11805, + "jw": 30221, + "jw": 24215, + "jy": 20979, + "jyo": 27378, + "jyoti": 48696, + "jä": 45381, + "k": 74, + "k": 330, + "ka": 1595, + "ka": 1525, + "kaa": 34496, + "kab": 6554, + "kab": 45134, + "kabaddi": 41749, + "kabir": 38619, + "kabo": 47974, + "kabul": 26160, + "kac": 21693, + "kach": 14341, + "kad": 10901, + "kade": 41130, + "kaduna": 38053, + "kae": 22542, + "kaeper": 30070, + "kaepernick": 30713, + "kaf": 19870, + "kag": 13666, + "kag": 31003, + "kah": 16068, + "kah": 15463, + "kahn": 35397, + "kai": 12752, + "kai": 9601, + "kaido": 40255, + "kail": 23623, + "kaine": 39028, + "kair": 33027, + "kaiser": 43685, + "kaiser": 29960, + "kait": 19326, + "kaitlyn": 34948, + "kaj": 44788, + "kaj": 40381, + "kak": 10401, + "kak": 40128, + "kaka": 47689, + "kaku": 30900, + "kal": 4187, + "kal": 18712, + "kala": 45453, + "kala": 33105, + "kalam": 40142, + "kalamaz": 42328, + "kalamazoo": 46264, + "kalb": 34483, + "kale": 17162, + "kale": 16625, + "kaleido": 41144, + "kali": 17844, + "kali": 26964, + "kalin": 42776, + "kalyan": 23825, + "kam": 4104, + "kam": 26011, + "kamal": 31371, + "kamal": 28619, + "kamala": 45003, + "kame": 45235, + "kamen": 40738, + "kami": 28707, + "kamloops": 36602, + "kamp": 35179, + "kamp": 29522, + "kampala": 37134, + "kan": 2532, + "kan": 8101, + "kana": 35178, + "kand": 17478, + "kane": 32218, + "kane": 9765, + "kang": 12226, + "kang": 20789, + "kangar": 20622, + "kangaroo": 25513, + "kani": 40907, + "kani": 41948, + "kann": 18533, + "kannada": 30053, + "kano": 28201, + "kans": 34012, + "kansas": 25507, + "kansas": 6539, + "kansascity": 46134, + "kant": 39923, + "kant": 47132, + "kanth": 24427, + "kanu": 44565, + "kany": 13590, + "kanye": 29680, + "kanye": 14965, + "kanyewest": 31943, + "kap": 6804, + "kap": 45279, + "kapam": 48561, + "kapil": 32337, + "kapil": 42709, + "kapilshar": 48978, + "kaplan": 37401, + "kapoor": 9117, + "kapp": 36717, + "kappa": 20239, + "kapur": 42371, + "kar": 1813, + "kar": 5933, + "kara": 12552, + "karab": 40916, + "karachi": 13671, + "karak": 40372, + "karan": 20077, + "karan": 20931, + "karanjohar": 47621, + "karao": 16262, + "karaoke": 16640, + "karate": 21211, + "kardashi": 13619, + "kardashian": 14578, + "kare": 14310, + "kare": 38354, + "kareem": 38885, + "kareena": 41569, + "karen": 17719, + "karen": 10349, + "kari": 15339, + "kari": 15161, + "karim": 33477, + "karin": 43917, + "karina": 40250, + "karl": 20967, + "karl": 13134, + "karla": 42309, + "karma": 17658, + "karnat": 13994, + "karnataka": 15515, + "karo": 45305, + "kart": 47841, + "kart": 21310, + "karthik": 41397, + "karti": 23053, + "kartikeyan": 32584, + "karting": 41655, + "kas": 6119, + "kas": 14372, + "kasa": 46111, + "kash": 6954, + "kash": 21371, + "kashi": 47945, + "kashmir": 20251, + "kashmir": 10783, + "kashmiri": 35331, + "kasi": 45870, + "kasi": 32819, + "kasich": 39666, + "kat": 2844, + "kat": 9341, + "kata": 14558, + "kate": 11620, + "kate": 6699, + "katelyn": 45963, + "kath": 7386, + "kath": 19745, + "katharine": 41473, + "katherine": 17687, + "kathle": 18721, + "kathleen": 21709, + "kathmandu": 34456, + "kathniel": 36159, + "kathr": 14905, + "kathryn": 33142, + "kathryn": 19999, + "kathy": 34775, + "kathy": 18795, + "kati": 6515, + "kati": 29928, + "katic": 48058, + "katie": 24117, + "katie": 9076, + "katniss": 47916, + "kato": 27573, + "katrin": 31282, + "katrina": 21397, + "katrinakaif": 45845, + "kats": 44213, + "katsu": 49296, + "katsu": 43712, + "katy": 17609, + "katy": 14435, + "katyperry": 28309, + "katz": 30790, + "kau": 9299, + "kau": 36895, + "kauai": 44050, + "kaufman": 37188, + "kaur": 30518, + "kav": 10228, + "kavan": 18576, + "kavanaugh": 20252, + "kaw": 10842, + "kaw": 42719, + "kawa": 33244, + "kawaii": 26891, + "kawasaki": 28227, + "kawhi": 41220, + "kay": 4673, + "kay": 9862, + "kaya": 22752, + "kayak": 27043, + "kayaking": 28977, + "kaye": 33003, + "kayla": 17139, + "kaylee": 47215, + "kayo": 37021, + "kaz": 8812, + "kaz": 39622, + "kazakh": 25451, + "kazakhstan": 26720, + "kazan": 47641, + "kb": 27381, + "kb": 19960, + "kbs": 27418, + "kc": 10869, + "kc": 8638, + "kca": 14347, + "kcon": 39970, + "kcr": 46181, + "kd": 21826, + "kd": 15597, + "kday": 31074, + "kdrama": 48628, + "ke": 643, + "ke": 618, + "kea": 47926, + "kean": 43288, + "keane": 28635, + "keanu": 40608, + "kear": 21562, + "kearney": 36435, + "keating": 40045, + "keaton": 29975, + "kebab": 36497, + "ked": 11730, + "ked": 1243, + "kee": 9724, + "kee": 6760, + "keef": 42323, + "keefe": 46965, + "keegan": 31122, + "keel": 48376, + "keen": 17714, + "keen": 13218, + "keenan": 36276, + "keep": 2924, + "keep": 1726, + "keeper": 7650, + "keepers": 16130, + "keepin": 41712, + "keeping": 38371, + "keeping": 4873, + "keepit": 28044, + "keeps": 6333, + "keer": 27412, + "keerth": 47500, + "keerthyofficial": 48185, + "kees": 10791, + "keg": 32785, + "keh": 41272, + "keh": 36983, + "kei": 18735, + "kei": 24835, + "keith": 18762, + "keith": 8252, + "kej": 15674, + "kejri": 16617, + "kejriwal": 17334, + "keke": 39195, + "kel": 2825, + "kel": 7553, + "kele": 41765, + "kell": 16082, + "kell": 40103, + "keller": 21407, + "kelley": 23776, + "kelli": 45852, + "kelli": 46190, + "kellie": 49224, + "kellogg": 44218, + "kelly": 13417, + "kelly": 5220, + "kelown": 31708, + "kelowna": 32963, + "kelsey": 42295, + "kelsey": 23018, + "kelvin": 32859, + "kem": 31013, + "kem": 17349, + "kemp": 18302, + "kemp": 25325, + "ken": 1838, + "ken": 1702, + "kend": 7497, + "kendal": 44836, + "kendall": 34607, + "kendall": 16238, + "kendra": 36074, + "kendrick": 41787, + "kendrick": 21953, + "kendricklamar": 47020, + "kenne": 6209, + "kennedy": 38631, + "kennedy": 9004, + "kennel": 39595, + "kenneth": 46900, + "kenneth": 17839, + "kenney": 41373, + "kenny": 20185, + "kenny": 9595, + "kens": 29765, + "kensing": 21505, + "kensington": 24988, + "kent": 13875, + "kent": 8214, + "kentu": 9045, + "kentucky": 32230, + "kentucky": 10014, + "keny": 17374, + "kenya": 6181, + "kenyan": 22624, + "kenyans": 36263, + "kenyatta": 31012, + "kenzie": 38087, + "keo": 43062, + "kept": 7737, + "ker": 2352, + "ker": 1485, + "keral": 35122, + "kerala": 11881, + "kered": 26690, + "kerel": 32232, + "keri": 43447, + "kermit": 40908, + "kern": 40150, + "kernel": 40684, + "kerr": 20491, + "kerri": 41849, + "kerry": 24795, + "kerry": 13097, + "kers": 30347, + "kers": 2880, + "kershaw": 40785, + "kerson": 42810, + "kerswednesday": 48152, + "kert": 47279, + "kes": 38398, + "kes": 1115, + "kesh": 19751, + "kesha": 36526, + "kest": 15080, + "ket": 2715, + "ket": 1236, + "ketball": 38240, + "ketch": 22590, + "ketch": 35371, + "ketchup": 26724, + "kete": 25404, + "keted": 41396, + "keting": 15951, + "keto": 27485, + "keto": 28754, + "kets": 1632, + "kett": 23124, + "kett": 10312, + "kettering": 43779, + "kettle": 41992, + "kettle": 24303, + "kev": 22758, + "kev": 29419, + "kevin": 9419, + "kevin": 4685, + "kew": 38014, + "kew": 31409, + "kex": 30251, + "key": 2891, + "key": 1458, + "keyan": 27617, + "keyboard": 13017, + "keyboards": 49237, + "keychain": 31050, + "keye": 40516, + "keye": 20635, + "keyes": 18336, + "keynes": 32462, + "keynote": 7556, + "keys": 48912, + "keys": 6355, + "keystone": 30688, + "keyword": 42284, + "keywords": 48122, + "kf": 33308, + "kf": 42119, + "kfc": 22032, + "kg": 36772, + "kg": 7817, + "kgs": 46629, + "kh": 2166, + "kh": 7452, + "kha": 7333, + "kha": 18929, + "khair": 43742, + "khaki": 41646, + "khal": 13070, + "khaled": 29343, + "khali": 11324, + "khalid": 27166, + "khalifa": 21389, + "khalil": 36229, + "kham": 24892, + "khan": 13318, + "khan": 3873, + "khand": 43384, + "khand": 31110, + "khanna": 29931, + "khar": 18340, + "khar": 28578, + "khart": 37458, + "khat": 43290, + "khe": 26360, + "kher": 43843, + "khi": 39062, + "khi": 42925, + "khil": 34101, + "khloe": 45312, + "kho": 14022, + "kho": 28774, + "khou": 30656, + "khs": 21239, + "khtar": 45593, + "khu": 14041, + "khur": 32083, + "khy": 40917, + "khz": 45604, + "ki": 848, + "ki": 2608, + "kia": 8712, + "kian": 43961, + "kian": 25708, + "kians": 44010, + "kib": 43108, + "kiba": 37207, + "kic": 24003, + "kic": 27633, + "kicchasu": 44665, + "kicchasudeep": 45560, + "kick": 4102, + "kick": 4289, + "kickass": 39299, + "kickboxing": 36041, + "kicked": 12479, + "kicker": 26338, + "kickin": 34597, + "kicking": 7802, + "kickoff": 10245, + "kicks": 6989, + "kickstart": 40780, + "kickstarter": 13228, + "kid": 3948, + "kid": 3551, + "kidd": 24082, + "kidding": 14535, + "kiddo": 36360, + "kiddos": 29205, + "kidlit": 39064, + "kidlit": 33515, + "kidlitart": 41600, + "kidman": 44931, + "kidnap": 45100, + "kidnapp": 16183, + "kidnapped": 24737, + "kidnapping": 32361, + "kidney": 37835, + "kidney": 14610, + "kids": 15561, + "kids": 1911, + "kidz": 41938, + "kie": 8544, + "kie": 3094, + "kiefer": 48026, + "kiel": 40940, + "kiel": 25509, + "kien": 28782, + "kier": 20403, + "kier": 35575, + "kieran": 29231, + "kies": 36601, + "kies": 4993, + "kiest": 29755, + "kiev": 24585, + "kiewicz": 47574, + "kigali": 40278, + "kii": 39340, + "kik": 36176, + "kiki": 23962, + "kiko": 40861, + "kil": 4912, + "kil": 39337, + "kildare": 45541, + "kili": 24386, + "kilig": 49172, + "kilimanjaro": 43470, + "kilkenny": 33805, + "kill": 6163, + "kill": 4367, + "killa": 41355, + "killarney": 48813, + "killed": 3733, + "killer": 28230, + "killer": 6613, + "killers": 17614, + "killin": 25903, + "killing": 37977, + "killing": 5923, + "killings": 24918, + "kills": 9795, + "kiln": 44150, + "kilo": 39281, + "kilom": 26285, + "kilometers": 39192, + "kilometres": 43278, + "kilt": 49319, + "kim": 4639, + "kim": 4606, + "kimber": 16796, + "kimberley": 39859, + "kimberly": 27465, + "kimchi": 41027, + "kimi": 31536, + "kimkardashian": 35400, + "kimmel": 27820, + "kimono": 40024, + "kin": 1442, + "kin": 2667, + "kina": 28518, + "kind": 7204, + "kind": 3044, + "kinda": 6612, + "kinder": 12711, + "kinder": 24159, + "kindergarten": 16749, + "kindle": 24704, + "kindle": 10746, + "kindleunlimited": 32164, + "kindly": 13952, + "kindness": 45112, + "kindness": 10614, + "kinds": 14879, + "kine": 17607, + "kineni": 49080, + "kinetic": 37699, + "king": 2365, + "king": 674, + "kingdom": 21870, + "kingdom": 7364, + "kingdomhearts": 48570, + "kingdoms": 43890, + "kingfisher": 34330, + "kingjames": 33153, + "kingly": 33642, + "kingof": 27878, + "kings": 18590, + "kings": 4232, + "kingsley": 41807, + "kingston": 40736, + "kingston": 15393, + "kini": 41644, + "kinky": 37006, + "kinney": 37233, + "kino": 39000, + "kins": 31060, + "kins": 4386, + "kinson": 12095, + "kio": 28210, + "kio": 39401, + "kiosk": 39146, + "kip": 27636, + "kip": 15986, + "kipp": 43329, + "kir": 3476, + "kir": 32949, + "kira": 33038, + "kiran": 43234, + "kiran": 36603, + "kirby": 17065, + "kiri": 34170, + "kiri": 45826, + "kirk": 10639, + "kirk": 11508, + "kirkland": 43061, + "kiro": 39749, + "kirstel": 46483, + "kirsten": 31813, + "kirsty": 37787, + "kis": 3199, + "kis": 22796, + "kish": 25662, + "kiss": 43757, + "kiss": 5946, + "kissed": 22561, + "kisses": 47876, + "kisses": 11220, + "kissing": 18637, + "kistan": 29580, + "kit": 4566, + "kit": 4274, + "kita": 29961, + "kitch": 3850, + "kitchen": 18131, + "kitchen": 4485, + "kitchener": 34428, + "kitchens": 28301, + "kite": 47777, + "kite": 19867, + "kites": 45829, + "kits": 13730, + "kitt": 10840, + "kitten": 13063, + "kittens": 17216, + "kitties": 36013, + "kitty": 25067, + "kitty": 8417, + "kiwan": 38709, + "kiwanis": 46513, + "kiwi": 22440, + "kiwis": 48108, + "kiya": 41610, + "kj": 27385, + "kj": 28238, + "kja": 41048, + "kjv": 37387, + "kk": 4390, + "kk": 10849, + "kka": 19002, + "kke": 44239, + "kker": 32399, + "kki": 44672, + "kkk": 20073, + "kkkk": 15834, + "kkkk": 47160, + "kkkkkkkk": 31042, + "kko": 43965, + "kkr": 40855, + "kl": 8498, + "kl": 14134, + "kla": 11249, + "klan": 46935, + "klar": 41374, + "klaus": 31788, + "kle": 7612, + "kle": 7432, + "klein": 33475, + "klein": 17579, + "kley": 18594, + "kli": 31640, + "klin": 44809, + "klin": 41647, + "kline": 47580, + "kling": 40270, + "klm": 38859, + "klo": 15296, + "klopp": 26446, + "kltu": 25978, + "klu": 21852, + "kly": 45090, + "km": 29954, + "km": 4590, + "kman": 33312, + "kms": 24996, + "kn": 4825, + "kn": 23693, + "knapp": 33945, + "kne": 6358, + "knee": 9897, + "knees": 19115, + "kner": 31578, + "knew": 5009, + "kni": 6312, + "knick": 33286, + "knicks": 17657, + "knife": 44176, + "knife": 8960, + "knigh": 43099, + "knight": 17949, + "knight": 7355, + "knights": 10385, + "knit": 18745, + "knit": 14313, + "knitted": 28151, + "knitting": 18863, + "knives": 20910, + "kno": 1482, + "kno": 25362, + "knob": 29736, + "knobs": 47504, + "knock": 14195, + "knock": 11583, + "knocked": 15325, + "knocking": 20380, + "knockout": 22602, + "knocks": 24296, + "knoll": 43882, + "knot": 18412, + "knots": 32428, + "know": 4179, + "know": 1038, + "knowing": 9267, + "knowledge": 27864, + "knowledge": 5510, + "knowledgeable": 43391, + "knowles": 32631, + "known": 3102, + "knows": 4309, + "knowyour": 30773, + "knox": 18630, + "knox": 21833, + "knoxville": 23232, + "knu": 14812, + "knuck": 21333, + "knuckle": 42023, + "knuckles": 40127, + "knw": 40803, + "ko": 1313, + "ko": 2448, + "koala": 36654, + "kobe": 42644, + "kobe": 14470, + "kobo": 42390, + "koch": 25331, + "kochi": 36710, + "kodak": 30425, + "kodi": 46611, + "kof": 17528, + "koff": 47303, + "kofi": 40400, + "koh": 13379, + "koh": 31216, + "kohl": 48479, + "kohli": 17549, + "koi": 28150, + "kojima": 46419, + "kok": 32045, + "kok": 11225, + "koko": 42426, + "koko": 40003, + "kol": 7142, + "kol": 31023, + "kolkata": 18011, + "kom": 6686, + "kom": 24181, + "kombat": 29670, + "kombucha": 48615, + "komo": 31820, + "kon": 5743, + "kon": 29519, + "kona": 30203, + "kong": 31784, + "kong": 6506, + "konstant": 46583, + "koo": 12225, + "koo": 40472, + "kook": 16003, + "kool": 36755, + "kool": 26444, + "kop": 16623, + "kop": 38999, + "kor": 6428, + "kor": 24175, + "kore": 3919, + "korea": 5915, + "korean": 31949, + "korean": 8034, + "kori": 42842, + "korn": 45412, + "korn": 31492, + "kors": 34535, + "kos": 47438, + "kos": 22951, + "kosh": 45233, + "kosher": 36502, + "koso": 23892, + "kosovo": 28343, + "kot": 23323, + "kot": 20701, + "kota": 21735, + "koto": 40945, + "koto": 29977, + "kou": 18502, + "kou": 39614, + "kour": 34134, + "kov": 17733, + "kov": 15156, + "kova": 26185, + "koval": 47903, + "kovic": 16886, + "kovich": 44794, + "kovsky": 33384, + "kow": 29764, + "kow": 23919, + "kowski": 17649, + "koz": 29598, + "kp": 16174, + "kp": 16894, + "kpa": 38759, + "kph": 41138, + "kpk": 42094, + "kpmg": 38243, + "kpop": 29534, + "kpop": 15859, + "kprc": 47832, + "kprs": 46253, + "kr": 7309, + "kr": 14107, + "kra": 5762, + "kraft": 28057, + "kraja": 29016, + "kraken": 48408, + "krakow": 40033, + "kram": 19075, + "kramer": 27495, + "kran": 33243, + "kranti": 47969, + "krat": 30470, + "kre": 8362, + "kreme": 43140, + "kremlin": 33979, + "kri": 3679, + "kris": 35251, + "kris": 12261, + "krish": 11487, + "krishna": 15863, + "krishnan": 46535, + "krispy": 49292, + "krist": 16490, + "kristen": 28881, + "kristen": 16644, + "kristi": 26895, + "kristin": 35408, + "kristin": 26785, + "kristina": 33180, + "krit": 36265, + "kro": 16193, + "kroger": 36344, + "kron": 25999, + "kru": 10609, + "kruger": 32948, + "krun": 43084, + "kry": 13995, + "krystal": 36554, + "ks": 10470, + "ks": 662, + "ksa": 25439, + "ksh": 36594, + "kst": 17420, + "kstate": 48590, + "ksu": 43496, + "kswx": 36180, + "kt": 17238, + "kt": 7792, + "ktm": 33989, + "ktn": 42170, + "kton": 37848, + "kts": 48577, + "ktv": 36444, + "ku": 1836, + "ku": 4827, + "kuala": 30336, + "kubball": 48995, + "kuber": 41336, + "kubernetes": 45144, + "kubrick": 37032, + "kuch": 39394, + "kud": 40818, + "kudos": 14481, + "kul": 11325, + "kul": 31514, + "kum": 18086, + "kum": 28148, + "kuma": 43139, + "kuma": 33920, + "kumar": 22329, + "kumar": 7674, + "kumb": 31391, + "kun": 6849, + "kun": 21842, + "kung": 39656, + "kung": 22347, + "kunst": 37881, + "kup": 39023, + "kups": 27240, + "kur": 4862, + "kurdi": 23504, + "kurdish": 21644, + "kurdistan": 24459, + "kurds": 20888, + "kuri": 46375, + "kuro": 28239, + "kuro": 47826, + "kurt": 31903, + "kurt": 14527, + "kus": 27618, + "kus": 27505, + "kush": 22264, + "kush": 24594, + "kushner": 36716, + "kut": 17283, + "kut": 36965, + "kuwait": 19679, + "kuya": 34815, + "kuz": 33253, + "kv": 27594, + "kv": 34249, + "kw": 10072, + "kw": 18339, + "kwa": 32784, + "kwa": 48576, + "kwame": 46681, + "kwan": 37100, + "kwan": 39447, + "kwang": 40260, + "kwe": 26050, + "kwi": 35327, + "kwon": 36369, + "kx": 28190, + "kx": 46442, + "ky": 2018, + "ky": 2383, + "kya": 29142, + "kyc": 37758, + "kyiv": 36422, + "kyle": 15847, + "kyle": 7539, + "kylie": 28282, + "kylie": 17983, + "kyliejenner": 47232, + "kylo": 47704, + "kyo": 13150, + "kyo": 6281, + "kyoto": 23223, + "kyr": 26329, + "kyrgy": 40013, + "kyrgyz": 48346, + "kyrie": 21857, + "kyu": 28296, + "kyu": 25490, + "kyuhyun": 37229, + "kyung": 41058, + "kyungsoo": 30280, + "kywx": 39940, + "kz": 48743, + "kz": 36848, + "kzn": 38264, + "kö": 32437, + "l": 75, + "l": 331, + "la": 572, + "la": 1210, + "laa": 44642, + "lab": 3537, + "lab": 4352, + "labe": 25749, + "label": 12235, + "label": 9093, + "labeled": 32720, + "labeling": 36825, + "labelled": 45188, + "labels": 17413, + "lable": 31879, + "labor": 11201, + "labor": 7878, + "laboratories": 43421, + "laboratory": 17664, + "laborday": 39324, + "labou": 32700, + "labour": 19586, + "labour": 6019, + "labourdoorstep": 37008, + "labout": 35961, + "labra": 37067, + "labrador": 25409, + "labs": 12021, + "laby": 29131, + "labyrin": 31782, + "labyrinth": 35594, + "lac": 4477, + "lac": 16189, + "lace": 30012, + "lace": 5421, + "laced": 36800, + "laces": 23281, + "lacey": 31754, + "lach": 30558, + "lack": 24915, + "lack": 8069, + "lacking": 30080, + "lacks": 34388, + "laco": 45882, + "lacrosse": 12915, + "lacy": 38645, + "lad": 15991, + "lad": 10707, + "ladak": 42312, + "ladakh": 45295, + "ladder": 16637, + "ladders": 47125, + "lade": 26447, + "laden": 28634, + "ladi": 12934, + "ladies": 28932, + "ladies": 3431, + "lads": 9803, + "lady": 7275, + "lady": 2909, + "ladybird": 43389, + "ladybug": 40038, + "ladygaga": 21232, + "laf": 47555, + "lafayette": 22683, + "lag": 30932, + "lag": 20394, + "laga": 30161, + "lage": 24369, + "lager": 36811, + "lager": 22989, + "lagh": 37237, + "laghate": 47565, + "laghateparth": 48780, + "lagi": 39786, + "lago": 42698, + "lago": 31476, + "lagoon": 22753, + "lagos": 12728, + "lagun": 18500, + "laguna": 23609, + "lah": 27315, + "lah": 4299, + "lahat": 42164, + "lahore": 16733, + "lai": 23947, + "laid": 42560, + "laid": 11160, + "lain": 46958, + "lain": 17151, + "laine": 35860, + "lair": 31981, + "lais": 34923, + "lak": 12890, + "lak": 26793, + "lake": 6441, + "lake": 2553, + "lakedistrict": 26437, + "lakel": 26133, + "lakeland": 34306, + "laker": 45717, + "lakers": 13570, + "lakes": 9265, + "lakeshore": 42595, + "lakeside": 30915, + "lakewood": 36417, + "lakh": 21487, + "lakhs": 37985, + "lakings": 34289, + "lakota": 45510, + "laksh": 24937, + "lakshmi": 39682, + "lal": 12301, + "lal": 19430, + "lala": 33661, + "lali": 21726, + "laliga": 32383, + "lam": 2022, + "lam": 5704, + "lama": 26049, + "lamar": 28678, + "lamar": 17284, + "lamb": 19863, + "lamb": 10034, + "lambda": 36687, + "lambert": 14574, + "lambeth": 43410, + "lambo": 45464, + "lamborgh": 18709, + "lamborghini": 19462, + "lambs": 30361, + "lame": 23192, + "lamin": 22337, + "laminated": 49079, + "lamo": 41461, + "lamont": 46719, + "lamp": 26700, + "lamp": 10725, + "lampard": 39989, + "lamps": 23424, + "lan": 1193, + "lan": 4872, + "lana": 15406, + "lanapar": 47437, + "lanaparrilla": 47819, + "lanc": 11872, + "lanca": 15694, + "lancashire": 20939, + "lancaster": 16446, + "lance": 26025, + "lance": 11609, + "lancer": 38195, + "lancers": 46392, + "lancia": 48698, + "lancs": 47540, + "land": 1567, + "land": 973, + "lande": 36556, + "landed": 9873, + "lander": 37247, + "lander": 9666, + "landers": 20019, + "landfall": 38465, + "landfill": 34947, + "landia": 41384, + "landing": 8292, + "landings": 46104, + "landlord": 28938, + "landlords": 35283, + "landmark": 15208, + "landmarks": 30393, + "lando": 25463, + "lando": 7065, + "landon": 32748, + "landrover": 38125, + "landry": 36137, + "lands": 40223, + "lands": 2961, + "landsc": 4384, + "landscape": 21123, + "landscape": 5727, + "landscapephotography": 28125, + "landscapes": 15344, + "landscaping": 25642, + "landslide": 31954, + "lane": 25534, + "lane": 3980, + "lanes": 10345, + "laney": 38552, + "lang": 7969, + "lang": 8578, + "lange": 32021, + "langford": 45615, + "langley": 28595, + "langu": 4095, + "language": 46103, + "language": 4781, + "languages": 13527, + "lani": 22964, + "lanka": 16221, + "lankan": 40531, + "lannister": 49056, + "lans": 43550, + "lansing": 30805, + "lant": 44504, + "lanta": 44768, + "lantern": 17185, + "lanterns": 33676, + "lantic": 32601, + "lantic": 27678, + "lants": 38425, + "lanyard": 46808, + "lao": 32475, + "lao": 29521, + "laos": 34353, + "lap": 7213, + "lap": 8639, + "lapd": 32557, + "lapel": 47961, + "lapland": 43633, + "laps": 18711, + "lapse": 33365, + "laptop": 10464, + "laptops": 32189, + "laq": 45026, + "lar": 1592, + "lar": 1652, + "lara": 19435, + "lard": 40347, + "lare": 22415, + "laredo": 48427, + "large": 40234, + "large": 3638, + "largely": 21418, + "larger": 12567, + "largest": 4960, + "largo": 44161, + "lari": 34676, + "lark": 43164, + "lark": 23536, + "larkin": 34769, + "larry": 18642, + "larry": 8242, + "lars": 8669, + "larsen": 39721, + "larson": 27973, + "larvae": 44840, + "las": 8295, + "las": 2552, + "lasag": 31210, + "lasagna": 40683, + "lasalle": 43866, + "laser": 25607, + "laser": 9885, + "lasers": 37060, + "lash": 31995, + "lash": 18480, + "lashes": 21015, + "lass": 24203, + "lass": 18263, + "lassic": 39430, + "last": 10600, + "last": 952, + "lasted": 25711, + "lasting": 13434, + "lastnight": 30159, + "lasts": 20141, + "lasvegas": 17789, + "lat": 1591, + "lat": 28437, + "lata": 47114, + "latam": 40012, + "late": 13267, + "late": 2325, + "latel": 49035, + "lately": 11824, + "latepost": 48328, + "later": 24109, + "later": 2941, + "lateral": 26646, + "latest": 46805, + "latest": 2053, + "latex": 27520, + "lati": 16357, + "latimes": 43356, + "latin": 16695, + "latin": 9888, + "latina": 27936, + "latino": 45734, + "latino": 19470, + "latinos": 40233, + "lation": 6191, + "latitude": 37392, + "lative": 15719, + "lator": 9291, + "lators": 28278, + "latt": 33561, + "latte": 17697, + "latter": 26198, + "latvia": 30034, + "lau": 1853, + "lau": 23090, + "lauderdale": 24352, + "laugh": 4969, + "laugh": 6332, + "laughed": 16746, + "laughing": 8301, + "laughs": 14322, + "laughter": 10722, + "laun": 2944, + "launch": 31168, + "launch": 2904, + "launched": 6125, + "launcher": 35782, + "launches": 7023, + "launching": 8565, + "laundering": 34079, + "laundry": 14797, + "laur": 15256, + "laura": 17091, + "laura": 7763, + "laure": 16932, + "laureate": 25675, + "laurel": 43370, + "laurel": 19942, + "lauren": 10456, + "lauren": 7634, + "laurence": 29353, + "laurent": 23226, + "laurie": 20326, + "laus": 38895, + "laus": 28111, + "lause": 22269, + "laut": 47688, + "lav": 13767, + "lav": 26919, + "lava": 16765, + "laven": 15047, + "lavender": 16033, + "laver": 28188, + "lavish": 35443, + "law": 2874, + "law": 2606, + "lawful": 33845, + "lawler": 47862, + "lawless": 39468, + "lawmaker": 37169, + "lawmakers": 21190, + "lawn": 31675, + "lawn": 11024, + "lawrence": 32221, + "lawrence": 8820, + "laws": 7306, + "lawson": 22152, + "lawsuit": 14346, + "lawsuits": 44331, + "lawyer": 10552, + "lawyers": 14232, + "lax": 17750, + "lax": 10024, + "lay": 7205, + "lay": 6360, + "laye": 25995, + "layer": 12411, + "layered": 28520, + "layers": 15900, + "laying": 12333, + "layla": 45050, + "layne": 48721, + "layo": 21738, + "layoffs": 29019, + "layout": 17314, + "lays": 19546, + "layton": 38061, + "laz": 18806, + "lazar": 33075, + "lazarus": 49126, + "laze": 41559, + "lazer": 43735, + "lazio": 33010, + "lazy": 32614, + "lazy": 10753, + "lb": 21958, + "lb": 7422, + "lbc": 37694, + "lbj": 45683, + "lbloggers": 48695, + "lbs": 8912, + "lc": 9584, + "lc": 7225, + "lcd": 21356, + "lcfc": 25339, + "lcs": 32279, + "ld": 1431, + "ld": 730, + "lder": 6945, + "lders": 43221, + "ldn": 37050, + "ldn": 2517, + "ldnont": 25827, + "ldnt": 21690, + "ldr": 37279, + "lds": 31235, + "le": 534, + "le": 579, + "lea": 2246, + "lea": 13324, + "leach": 35527, + "lead": 1328, + "lead": 2784, + "leader": 14806, + "leader": 3236, + "leaderboard": 34519, + "leaders": 3546, + "leadership": 36876, + "leadership": 3652, + "leading": 3833, + "leads": 5335, + "leaf": 9377, + "leaf": 7232, + "leaflet": 38289, + "leaflets": 39014, + "leafs": 16688, + "leafy": 42616, + "leagu": 13317, + "league": 16635, + "league": 2313, + "leagueof": 26022, + "leagueoflegends": 31737, + "leagues": 19888, + "leah": 24350, + "leah": 19308, + "leak": 42900, + "leak": 15489, + "leaked": 14353, + "leaking": 34097, + "leaks": 15657, + "leam": 39606, + "lean": 12447, + "lean": 8208, + "leaning": 24411, + "leanne": 41448, + "leans": 9357, + "leap": 29129, + "leap": 15392, + "leaps": 48080, + "lear": 1146, + "lear": 27663, + "learn": 16959, + "learn": 1768, + "learned": 6048, + "learnenglish": 49040, + "learner": 33547, + "learners": 19572, + "learning": 22632, + "learning": 2378, + "learns": 17569, + "learnt": 18959, + "leary": 36051, + "lease": 49041, + "lease": 14394, + "leased": 48352, + "leash": 36192, + "leasing": 29160, + "least": 3651, + "leather": 21417, + "leather": 5862, + "leau": 26498, + "leav": 3198, + "leave": 37512, + "leave": 3258, + "leaves": 5579, + "leaving": 5216, + "leban": 9360, + "lebanese": 23819, + "lebanon": 11695, + "leblanc": 46381, + "lebo": 44184, + "lebron": 11971, + "lebu": 47030, + "lec": 944, + "lec": 35374, + "leche": 46197, + "lect": 45392, + "lection": 18252, + "lections": 30995, + "lecture": 6617, + "lecturer": 23795, + "lectures": 21118, + "led": 8767, + "led": 912, + "ledge": 23647, + "ledge": 4815, + "ledger": 26817, + "leds": 36763, + "lee": 6224, + "lee": 2592, + "leed": 16483, + "leed": 40206, + "leeds": 38900, + "leeds": 7420, + "leek": 34585, + "leeminho": 37831, + "leen": 35311, + "leen": 15940, + "leep": 48875, + "leep": 10191, + "lees": 29324, + "lees": 34056, + "lef": 9152, + "left": 33949, + "left": 1823, + "leftist": 35143, + "lefto": 17437, + "leftover": 26414, + "leftovers": 28481, + "lefty": 33935, + "leg": 1211, + "leg": 4924, + "lega": 38674, + "legacy": 44108, + "legacy": 6447, + "legal": 17743, + "legal": 3998, + "legalization": 40584, + "legalize": 42921, + "legally": 14152, + "legate": 46009, + "lege": 8065, + "legen": 6105, + "legend": 5480, + "legend": 3539, + "legendary": 6053, + "legendof": 47915, + "legends": 6396, + "leges": 15356, + "legg": 18474, + "legg": 32511, + "legged": 25830, + "leggings": 22895, + "leggo": 43441, + "legi": 11183, + "legion": 35503, + "legion": 14525, + "legis": 7200, + "legislat": 16486, + "legislation": 14143, + "legislative": 16755, + "legislators": 31572, + "legislature": 22309, + "legit": 12563, + "legitim": 17656, + "legitimate": 24491, + "lego": 28117, + "lego": 7849, + "legos": 45359, + "legs": 7072, + "leh": 19105, + "leh": 29298, + "lehead": 28090, + "lehigh": 34527, + "lehman": 46094, + "lei": 15828, + "lei": 21830, + "leia": 32723, + "leic": 35073, + "leica": 30206, + "leice": 10026, + "leicester": 28795, + "leicester": 11510, + "leicestershire": 45358, + "leigh": 14849, + "leigh": 9292, + "leighton": 30782, + "leila": 41342, + "lein": 20026, + "lein": 28551, + "leinster": 32242, + "leip": 36401, + "leipzig": 41860, + "leis": 13133, + "leisure": 15849, + "leit": 35446, + "leith": 34141, + "lek": 26626, + "lek": 36535, + "lel": 46623, + "lele": 26075, + "lem": 10213, + "lem": 8428, + "leman": 24478, + "lemans": 26694, + "lement": 9693, + "lements": 15833, + "lemme": 23318, + "lemon": 12272, + "lemon": 7184, + "lemonade": 18884, + "lemons": 29576, + "lemore": 41147, + "len": 3687, + "len": 2159, + "lena": 22038, + "lend": 45397, + "lend": 24987, + "lender": 44734, + "lenders": 42443, + "lending": 20209, + "lene": 17628, + "leness": 36551, + "leng": 7861, + "length": 10130, + "lengths": 31858, + "lengthy": 32624, + "lenin": 41760, + "lennon": 18360, + "lennox": 45748, + "lenny": 48448, + "lenny": 30124, + "leno": 45357, + "lenovo": 25886, + "lens": 8666, + "lenses": 21264, + "lent": 20943, + "lent": 22605, + "lentil": 41511, + "lentils": 44269, + "leo": 24008, + "leo": 8312, + "leon": 6581, + "leon": 9763, + "leonard": 43849, + "leonard": 13142, + "leonardo": 20282, + "leone": 22864, + "leop": 11234, + "leopard": 15931, + "leopards": 40996, + "leopold": 45501, + "lep": 48884, + "leppard": 41656, + "lepre": 45641, + "ler": 5587, + "ler": 1803, + "lero": 15067, + "lerosis": 35455, + "leroy": 32441, + "lers": 6247, + "lery": 38184, + "les": 4339, + "les": 840, + "lesbian": 17419, + "lesbians": 43182, + "lesh": 32282, + "lesley": 25506, + "lesli": 13649, + "leslie": 16244, + "lesn": 39568, + "lesnar": 42223, + "less": 3242, + "less": 1285, + "lesser": 20369, + "lessly": 13103, + "lessness": 24847, + "lesson": 7714, + "lessons": 7199, + "lest": 24372, + "lest": 6794, + "lester": 23157, + "lester": 24023, + "lestwe": 29726, + "lestweforget": 30273, + "let": 1898, + "let": 1094, + "leta": 34319, + "lete": 34078, + "letes": 6815, + "leth": 30022, + "leth": 42462, + "lethal": 21905, + "lethbridge": 48390, + "leti": 34176, + "letics": 14504, + "letit": 46423, + "leto": 32203, + "leton": 37674, + "leton": 7462, + "lets": 10448, + "lets": 3243, + "letsgo": 16967, + "letsgo": 29789, + "letstalk": 35591, + "lett": 22428, + "lett": 9778, + "lette": 41798, + "lette": 10301, + "letter": 15567, + "letter": 4861, + "lettering": 26382, + "letterman": 38447, + "letters": 9181, + "letting": 9510, + "letto": 35449, + "lettu": 17933, + "lettuce": 18573, + "leu": 15691, + "leuke": 31031, + "leukemia": 32097, + "leum": 21571, + "leur": 45806, + "lev": 17022, + "lev": 29950, + "levan": 42543, + "leve": 36271, + "level": 21682, + "level": 2931, + "leveled": 48453, + "levels": 6295, + "leven": 44792, + "leven": 34729, + "lever": 20178, + "lever": 23094, + "leverage": 24030, + "leveraging": 37948, + "levi": 25630, + "levi": 19113, + "leviathan": 41736, + "levin": 36949, + "levine": 26594, + "levit": 22715, + "levy": 17147, + "lew": 5063, + "lew": 25329, + "lewan": 48349, + "lewd": 45241, + "lewes": 40431, + "lewi": 19589, + "lewis": 22043, + "lewis": 6020, + "lewisham": 37385, + "lewisham": 47633, + "lewishamilton": 42960, + "lewood": 37951, + "lex": 6586, + "lex": 9658, + "lexa": 48259, + "lexi": 44231, + "lexi": 24679, + "lexington": 22308, + "lexus": 20694, + "ley": 2565, + "ley": 1066, + "leye": 37061, + "leys": 45609, + "leys": 14834, + "leyton": 46573, + "lez": 26442, + "lf": 33960, + "lf": 22078, + "lfc": 37826, + "lfc": 8267, + "lfw": 28514, + "lg": 4546, + "lg": 11368, + "lga": 39348, + "lgb": 25401, + "lgbt": 11743, + "lgbt": 9592, + "lgbti": 42730, + "lgbtq": 47625, + "lgbtq": 14939, + "lgm": 39389, + "lh": 27794, + "lh": 31159, + "lhp": 45092, + "lhs": 33170, + "li": 554, + "li": 4250, + "lia": 26118, + "lia": 6964, + "liability": 29139, + "liaison": 39294, + "liam": 5258, + "liam": 7167, + "lian": 18058, + "liance": 40864, + "liar": 16334, + "liars": 23863, + "lias": 46021, + "lib": 10249, + "lib": 13345, + "libby": 36832, + "libdems": 40869, + "liber": 3425, + "liberal": 48032, + "liberal": 9985, + "liberalism": 40018, + "liberals": 15981, + "liberated": 38690, + "liberation": 19507, + "liberia": 32208, + "libertarian": 35067, + "liberties": 48623, + "liberty": 23397, + "liberty": 8480, + "libr": 2856, + "libra": 43038, + "librarian": 25148, + "librarians": 37806, + "libraries": 14277, + "library": 25713, + "library": 3519, + "libre": 49210, + "libre": 31681, + "libs": 26401, + "liby": 36390, + "libya": 16417, + "libyan": 42319, + "lic": 2508, + "lic": 3376, + "lice": 45691, + "licen": 6706, + "licence": 20550, + "license": 10337, + "licensed": 18752, + "licenses": 36414, + "licensing": 24219, + "lich": 23979, + "lich": 25875, + "lick": 29197, + "lick": 17541, + "licking": 33013, + "licks": 42117, + "lics": 44552, + "lid": 39369, + "lid": 17678, + "lidge": 45558, + "lido": 35683, + "lids": 41609, + "lie": 6570, + "lie": 2538, + "lieb": 45387, + "liebe": 37749, + "lied": 6486, + "lief": 38428, + "lien": 45716, + "lier": 3626, + "liers": 19303, + "lies": 37236, + "lies": 3205, + "liest": 14020, + "liet": 41107, + "lieu": 20401, + "lieu": 35313, + "lieutenant": 22538, + "lif": 16456, + "life": 2666, + "life": 970, + "lifeat": 27801, + "lifeboat": 37404, + "lifecycle": 49171, + "lifein": 48447, + "lifeis": 24824, + "lifeisgood": 46433, + "lifel": 15025, + "lifeline": 38438, + "lifelong": 21358, + "lifeof": 36061, + "lifesaving": 48016, + "lifespan": 49257, + "lifestyle": 46512, + "lifestyle": 7037, + "lifestyles": 48521, + "lifetime": 48737, + "lifetime": 9107, + "liff": 34404, + "liffe": 38942, + "lift": 33146, + "lift": 6779, + "lifted": 16783, + "lifter": 38555, + "lifting": 10857, + "lifts": 18291, + "lig": 19915, + "lig": 38493, + "liga": 16802, + "ligam": 31077, + "ligament": 48705, + "ligan": 27962, + "ligans": 42133, + "ligh": 7510, + "light": 3885, + "light": 1395, + "lighted": 18404, + "lighten": 32717, + "lightening": 28170, + "lighter": 14102, + "lighthouse": 13717, + "lighting": 5799, + "lightly": 26878, + "lightning": 7756, + "lightroom": 41454, + "lights": 3073, + "lightweight": 16278, + "ligu": 42920, + "ligue": 29196, + "lik": 4831, + "lik": 18495, + "like": 9175, + "like": 789, + "liked": 7112, + "likefor": 48444, + "likeli": 40666, + "likelihood": 48158, + "likely": 5256, + "liken": 36084, + "likes": 4724, + "liking": 16810, + "lil": 6012, + "lil": 4461, + "lilac": 33647, + "lili": 26686, + "lili": 48411, + "lilies": 38110, + "lillard": 47016, + "lille": 38705, + "lilli": 40920, + "lillian": 41563, + "lilly": 47825, + "lilly": 21815, + "lily": 23803, + "lily": 10647, + "lim": 2377, + "lim": 17204, + "lima": 17589, + "limb": 27061, + "limb": 32363, + "limbo": 46179, + "limbs": 34886, + "lime": 17385, + "lime": 11193, + "limel": 48658, + "limer": 16915, + "limerick": 19501, + "limestone": 27272, + "limit": 18933, + "limit": 9973, + "limitations": 32730, + "limited": 49229, + "limited": 3472, + "limiting": 35812, + "limitless": 35833, + "limits": 11966, + "limo": 33166, + "limous": 47287, + "limpopo": 47175, + "lin": 1254, + "lin": 2424, + "lina": 26110, + "lincol": 6239, + "lincoln": 16957, + "lincoln": 7454, + "lincolnshire": 29014, + "lind": 6492, + "linda": 45410, + "linda": 10760, + "linden": 44076, + "linden": 34832, + "lindo": 38467, + "lindsay": 29846, + "lindsay": 16858, + "lindsey": 29475, + "lindsey": 18128, + "line": 3674, + "line": 1148, + "linear": 19816, + "linebacker": 29848, + "lined": 11842, + "lineman": 31501, + "linen": 20032, + "liner": 11618, + "liners": 24463, + "lines": 3418, + "liness": 28633, + "lineup": 7316, + "lineups": 33589, + "ling": 4851, + "ling": 1358, + "linger": 29593, + "lingerie": 18473, + "lingering": 46494, + "lings": 11390, + "lington": 27673, + "lington": 9002, + "lingu": 34449, + "lingui": 29942, + "linguistic": 46847, + "linguistics": 48651, + "lining": 11589, + "link": 18433, + "link": 2468, + "linke": 15088, + "linked": 11059, + "linkedin": 16302, + "linkin": 40287, + "linkin": 49291, + "linking": 23296, + "links": 8113, + "linn": 37431, + "lino": 41189, + "lino": 34995, + "lins": 6567, + "linson": 15401, + "linton": 36479, + "linus": 49303, + "linux": 14061, + "lio": 19395, + "lion": 8872, + "lion": 5567, + "lionel": 19441, + "lions": 7093, + "lip": 8630, + "lip": 8546, + "lipo": 38795, + "lipp": 38074, + "lips": 8847, + "lipse": 10351, + "lipstick": 15618, + "liqu": 6310, + "lique": 32680, + "liqueur": 43612, + "liqui": 33817, + "liquid": 18366, + "liquid": 10158, + "liquidity": 42812, + "liquor": 17828, + "lis": 7297, + "lis": 12749, + "lisa": 25236, + "lisa": 7424, + "lisam": 43072, + "lisboa": 40052, + "lisbon": 17708, + "lish": 12658, + "lish": 2354, + "lished": 22620, + "lisle": 21529, + "lism": 34390, + "liss": 45489, + "liss": 35433, + "lisse": 49309, + "list": 1734, + "list": 1998, + "lista": 37812, + "listed": 6457, + "listen": 17454, + "listen": 2672, + "listened": 15347, + "listener": 34819, + "listeners": 26901, + "listening": 3656, + "listens": 25912, + "lister": 45109, + "listing": 8145, + "listings": 21987, + "liston": 48041, + "lists": 12281, + "lit": 2213, + "lit": 4350, + "lita": 30100, + "lite": 29273, + "lite": 13694, + "litecoin": 39063, + "liter": 3085, + "liter": 34904, + "literacy": 12841, + "literal": 24269, + "literally": 4719, + "literary": 13586, + "literature": 11072, + "litfest": 40369, + "lith": 37005, + "lithium": 22794, + "litho": 31088, + "lithograph": 49022, + "lithu": 21045, + "lithuania": 27068, + "liti": 24292, + "litigation": 31769, + "lito": 47381, + "litre": 25786, + "litres": 39919, + "litt": 1216, + "litt": 47583, + "litter": 45431, + "litter": 17118, + "litters": 45300, + "little": 7024, + "little": 1274, + "littlemix": 29731, + "littlest": 48969, + "litur": 36830, + "litz": 30357, + "liu": 20466, + "liv": 13895, + "liv": 19901, + "livan": 12785, + "live": 3215, + "live": 1064, + "lived": 8867, + "livel": 17973, + "liveli": 26566, + "livelihood": 46497, + "livelihoods": 47716, + "lively": 19663, + "liveme": 35396, + "livemusic": 15688, + "liven": 41057, + "liveon": 22815, + "livepd": 38742, + "livepd": 31899, + "liver": 4755, + "liver": 12639, + "liverpool": 29778, + "liverpool": 5366, + "livery": 23248, + "lives": 3247, + "livesmatter": 20348, + "livestock": 22079, + "livestream": 16844, + "livetweet": 38546, + "livin": 28061, + "living": 10965, + "living": 2815, + "livingston": 30551, + "lix": 45068, + "liz": 8632, + "liz": 12242, + "liza": 28787, + "lizard": 17221, + "lizards": 41991, + "lizasober": 44487, + "lizasoberano": 45076, + "lizz": 34430, + "lizzie": 29530, + "lizzy": 32306, + "lj": 34211, + "lj": 32273, + "lju": 44562, + "lk": 39110, + "lk": 26596, + "lka": 21881, + "ll": 1657, + "ll": 865, + "lla": 15419, + "llama": 36679, + "llan": 17281, + "llan": 38728, + "lland": 31150, + "llc": 17161, + "lle": 26550, + "lle": 29732, + "llen": 41197, + "ller": 7722, + "llers": 26426, + "lli": 47015, + "lli": 13368, + "llis": 25518, + "lll": 27177, + "llll": 34874, + "llll": 43485, + "llo": 19293, + "lloy": 10092, + "lloyd": 33339, + "lloyd": 12400, + "llp": 28042, + "lls": 40535, + "lly": 26379, + "lm": 6981, + "lm": 15282, + "lma": 4493, + "lmao": 5121, + "lmaoo": 32623, + "lmaooo": 33362, + "lmaoooo": 45232, + "lmfa": 8928, + "lmfao": 11068, + "lmfaooo": 47658, + "lmp": 43575, + "lms": 30381, + "ln": 31644, + "ln": 18654, + "lng": 22339, + "lnp": 39679, + "lo": 549, + "lo": 2982, + "loa": 39678, + "load": 4515, + "load": 2834, + "loaded": 6756, + "loader": 28492, + "loading": 9975, + "loads": 8691, + "loaf": 26467, + "loaf": 18273, + "loan": 28431, + "loan": 8176, + "loans": 14206, + "lob": 11197, + "lob": 46606, + "lobal": 34574, + "lobb": 27698, + "lobby": 12449, + "lobbying": 36047, + "lobe": 46325, + "lobes": 24148, + "lobo": 39323, + "lobos": 36586, + "lobster": 13793, + "loc": 1378, + "loc": 25826, + "local": 9202, + "local": 2029, + "localized": 49399, + "locally": 15603, + "locals": 15041, + "locate": 20490, + "located": 5677, + "location": 4372, + "locations": 9580, + "loch": 20188, + "loch": 14101, + "lock": 7201, + "lock": 4381, + "lockdown": 35636, + "locke": 29698, + "locked": 8371, + "locker": 14053, + "lockhart": 48642, + "lockheed": 36637, + "locking": 19978, + "locks": 13212, + "lockscreen": 42439, + "loco": 25555, + "locom": 22798, + "locomo": 46147, + "locomotive": 30439, + "locu": 33635, + "locust": 46237, + "lod": 45650, + "lodge": 10504, + "loe": 30113, + "loe": 25484, + "loeb": 49334, + "lof": 15011, + "loff": 31008, + "loft": 35707, + "loft": 20049, + "loftus": 46689, + "log": 3239, + "log": 7383, + "logan": 20655, + "logan": 10569, + "logans": 40752, + "logg": 43002, + "logged": 31457, + "logger": 39089, + "logging": 24444, + "logi": 3177, + "logia": 48031, + "logic": 10670, + "logical": 4791, + "logically": 24782, + "logie": 33445, + "logies": 7378, + "login": 31121, + "logist": 7407, + "logistics": 14755, + "logists": 12233, + "logne": 19911, + "logo": 31480, + "logo": 5750, + "logos": 24879, + "logs": 22745, + "logue": 27785, + "logy": 22721, + "logy": 1659, + "loh": 49129, + "loh": 37983, + "loi": 35128, + "loid": 31408, + "loin": 21760, + "loire": 46040, + "lois": 27040, + "lok": 19908, + "lok": 23575, + "loki": 24435, + "lol": 10721, + "lol": 1824, + "lola": 19065, + "lolita": 42615, + "lolla": 45483, + "lolli": 27906, + "lollipop": 34605, + "lolly": 48264, + "lolo": 16895, + "lolo": 37481, + "lolol": 25280, + "lololol": 34738, + "lolz": 35260, + "lom": 9279, + "loma": 42889, + "lombar": 25493, + "lombard": 46461, + "lombardi": 44346, + "lomond": 48941, + "lon": 1235, + "lon": 6507, + "london": 6835, + "london": 1789, + "londonmarathon": 35018, + "lone": 22220, + "lone": 13576, + "lonel": 28872, + "loneliness": 30310, + "lonely": 34509, + "lonely": 12368, + "lonelyplanet": 44984, + "long": 4792, + "long": 1538, + "longe": 25793, + "longer": 5349, + "longest": 10731, + "longevity": 35354, + "longh": 20286, + "longhorn": 41047, + "longhorns": 38295, + "longing": 38482, + "longlive": 47840, + "longs": 43618, + "longtime": 19685, + "loo": 731, + "loo": 11804, + "look": 8874, + "look": 1012, + "lookalike": 38307, + "lookbook": 39184, + "looked": 4913, + "lookin": 11254, + "looking": 36898, + "looking": 1312, + "lookout": 18330, + "looks": 1606, + "lool": 33125, + "loom": 37440, + "loom": 17199, + "looming": 35384, + "looms": 30550, + "loon": 28222, + "loona": 48137, + "looney": 45315, + "looo": 20902, + "loool": 36016, + "looool": 47038, + "looooo": 31484, + "loop": 19606, + "loop": 10408, + "loops": 21625, + "loos": 45723, + "loose": 43815, + "loose": 9786, + "loot": 21518, + "lop": 36734, + "lop": 17066, + "lopes": 49269, + "lopez": 12982, + "lor": 2179, + "lor": 11335, + "lord": 18896, + "lord": 3486, + "lorde": 35483, + "lords": 14969, + "lore": 12880, + "lore": 27218, + "loren": 13602, + "loren": 33398, + "lorenzo": 21342, + "lores": 34510, + "loretta": 40863, + "lori": 20164, + "lori": 23095, + "lorna": 46316, + "lorraine": 27602, + "lorry": 31354, + "los": 32217, + "los": 3087, + "losange": 14037, + "losangeles": 14638, + "lose": 43318, + "lose": 5354, + "loser": 18168, + "losers": 23201, + "loses": 14263, + "losing": 7918, + "loss": 34761, + "loss": 4327, + "losses": 16909, + "lost": 14258, + "lost": 2624, + "lostdog": 48482, + "lot": 5132, + "lot": 1954, + "loth": 43625, + "lothian": 31360, + "lothing": 42058, + "lotion": 25260, + "lotr": 34165, + "lots": 2958, + "lott": 42854, + "lotta": 29125, + "lotte": 16535, + "lotte": 7274, + "lottery": 16975, + "lottie": 48517, + "lotto": 28265, + "lotus": 13824, + "lou": 2207, + "lou": 9745, + "loubout": 38369, + "loud": 22884, + "loud": 7464, + "louder": 25904, + "loudest": 49214, + "loudly": 39256, + "lough": 21927, + "lough": 28045, + "loughborough": 49153, + "loui": 42173, + "louie": 25790, + "louis": 8916, + "louis": 4459, + "louisa": 40011, + "louise": 32275, + "louise": 13076, + "louisi": 12187, + "louisiana": 12946, + "louisville": 13860, + "louisvuitton": 44911, + "loun": 6466, + "lounge": 7141, + "lounging": 45430, + "lour": 29383, + "lourdes": 45071, + "louvre": 36995, + "lov": 8923, + "lov": 21229, + "lova": 37394, + "lovable": 38565, + "lovato": 18960, + "love": 2618, + "love": 793, + "lovecraft": 42405, + "loved": 3249, + "lovefl": 38884, + "loveher": 38306, + "lovehim": 45733, + "loveis": 30931, + "loveisland": 30970, + "loveislove": 43603, + "loveit": 24764, + "lovel": 8999, + "lovelies": 31412, + "lovelondon": 46493, + "lovely": 33250, + "lovely": 2165, + "lovemy": 20041, + "lovemyjob": 40130, + "loven": 33754, + "lover": 28508, + "lover": 7168, + "lovers": 48416, + "lovers": 5973, + "loves": 37773, + "loves": 3925, + "lovethe": 33040, + "lovethem": 48298, + "lovett": 47095, + "lovewins": 47687, + "loveyou": 39226, + "loveyou": 25964, + "loveyour": 26462, + "lovin": 33442, + "lovin": 16354, + "loving": 29568, + "loving": 3721, + "lovingly": 44100, + "low": 1049, + "low": 1042, + "loway": 16104, + "lowe": 17910, + "lowed": 22733, + "lowell": 24458, + "lower": 32578, + "lower": 4909, + "lowered": 34968, + "lowering": 35261, + "lowers": 36398, + "lowes": 38515, + "lowest": 12098, + "lowing": 8283, + "lowkey": 29481, + "lowry": 27444, + "lows": 4406, + "lox": 41725, + "loy": 4519, + "loy": 23929, + "loyal": 13032, + "loyalty": 14686, + "loyd": 44212, + "loyed": 29279, + "loyment": 18307, + "loyola": 32569, + "lp": 22282, + "lp": 6392, + "lpc": 44092, + "lpg": 47905, + "lpga": 34295, + "lps": 32094, + "lr": 20572, + "lr": 7041, + "lrt": 32996, + "ls": 19051, + "ls": 1268, + "lsd": 43766, + "lse": 46127, + "lse": 43886, + "lsu": 35428, + "lsu": 15672, + "lt": 13642, + "lt": 3333, + "ltc": 27664, + "ltd": 6802, + "lte": 25202, + "lton": 14237, + "lu": 664, + "lu": 9657, + "lub": 22469, + "lub": 11836, + "lubbock": 37660, + "lubric": 40963, + "luc": 7013, + "luc": 28014, + "luca": 21053, + "lucas": 23425, + "lucas": 10225, + "lucci": 45849, + "luce": 46217, + "lucent": 41552, + "lucer": 36042, + "luch": 36646, + "lucha": 38449, + "luci": 8787, + "lucia": 22290, + "luciano": 46365, + "lucid": 44540, + "lucie": 39461, + "lucifer": 46224, + "lucifer": 27687, + "lucille": 47454, + "lucin": 27523, + "luck": 9647, + "luck": 2820, + "luckiest": 42469, + "luckily": 20100, + "lucknow": 29407, + "lucky": 20495, + "lucky": 4133, + "lucrative": 41485, + "lucy": 17262, + "lucy": 10120, + "lud": 14288, + "lude": 28755, + "ludo": 40141, + "ludwig": 30633, + "lue": 45199, + "luf": 25264, + "lufc": 17818, + "luffy": 39047, + "lufthan": 37769, + "lufthansa": 39145, + "lug": 45521, + "lugg": 19673, + "luggage": 20138, + "luhan": 20975, + "luigi": 28444, + "luis": 25231, + "luis": 11339, + "luiz": 39633, + "lujah": 31639, + "luk": 21652, + "luka": 34878, + "lukaku": 37177, + "lukas": 37941, + "luke": 11970, + "luke": 5652, + "lul": 20861, + "lulla": 37019, + "lullaby": 41676, + "lulu": 32052, + "lulu": 26935, + "lum": 18112, + "lum": 5997, + "lumb": 36231, + "lumber": 27421, + "lumber": 34692, + "lumi": 41437, + "lumia": 31912, + "lumin": 15867, + "luminous": 37913, + "lump": 38704, + "lumpur": 34411, + "lun": 3221, + "lun": 49390, + "luna": 14425, + "lunar": 16043, + "lunatic": 45874, + "lunch": 10954, + "lunch": 2772, + "luncheon": 15104, + "lunches": 29705, + "lunchtime": 14330, + "lund": 30975, + "lund": 20181, + "lunes": 35648, + "lung": 38479, + "lung": 16271, + "lungs": 27366, + "lup": 27413, + "lupita": 49352, + "lupus": 36017, + "lur": 14439, + "lure": 31376, + "lures": 46747, + "lurking": 29941, + "lus": 7158, + "lusci": 38004, + "luscious": 39935, + "lush": 40382, + "lush": 16263, + "lust": 42071, + "lust": 12662, + "lustre": 46673, + "luther": 21848, + "luther": 17208, + "lutheran": 27341, + "luton": 28288, + "luv": 24726, + "luv": 8502, + "lux": 3439, + "lux": 16704, + "luxe": 26373, + "luxemb": 21314, + "luxembour": 22712, + "luxembourg": 23949, + "luxu": 16112, + "luxurious": 17292, + "luxury": 12083, + "luxury": 5247, + "luxurytravel": 29010, + "luz": 41008, + "lv": 10862, + "lv": 11184, + "lvl": 31256, + "lw": 40515, + "lw": 35115, + "lx": 30789, + "ly": 1251, + "ly": 597, + "lydia": 24316, + "lyf": 43688, + "lyfe": 30787, + "lyft": 32944, + "lying": 7175, + "lyk": 46376, + "lyle": 36828, + "lym": 20087, + "lyme": 31167, + "lymph": 30073, + "lymphoma": 37648, + "lyn": 3957, + "lyn": 5054, + "lynch": 31586, + "lynch": 13560, + "lynd": 33416, + "lynda": 42959, + "lyndon": 48518, + "lynn": 25303, + "lynn": 10667, + "lynne": 26900, + "lynx": 28941, + "lyon": 17176, + "lyons": 29453, + "lyric": 24366, + "lyric": 21291, + "lyrical": 33358, + "lyricist": 49013, + "lyrics": 9551, + "lyrix": 46814, + "lys": 45054, + "lyte": 40059, + "lywood": 4012, + "lz": 30818, + "lé": 39641, + "m": 76, + "m": 332, + "ma": 577, + "ma": 1226, + "maa": 42774, + "maa": 21555, + "maan": 33668, + "maar": 48927, + "maas": 43332, + "mab": 35639, + "mabel": 47319, + "mable": 23001, + "mably": 40082, + "mabu": 44682, + "mac": 1961, + "mac": 4945, + "macar": 21558, + "macaroni": 41824, + "macarthur": 36785, + "macau": 43984, + "macau": 33370, + "macbeth": 36321, + "macbook": 20617, + "macdonald": 20315, + "mace": 44869, + "maced": 21102, + "macedonia": 27071, + "macfar": 45374, + "macfarlane": 48825, + "mach": 2637, + "mach": 35091, + "machado": 42318, + "mache": 43220, + "macher": 29330, + "machi": 41783, + "machin": 17972, + "machine": 11539, + "machine": 4169, + "machinelearning": 13621, + "machinery": 21858, + "machines": 11108, + "machining": 45562, + "macho": 43977, + "macht": 45225, + "macin": 36533, + "mack": 8590, + "mack": 12145, + "mackay": 32497, + "macken": 48057, + "mackenzie": 22351, + "mackerel": 35002, + "mackin": 26010, + "macklemore": 41758, + "macle": 33843, + "maclean": 47137, + "macleod": 43684, + "macmillan": 36364, + "macmillan": 35191, + "macon": 35818, + "macos": 45469, + "macqu": 38365, + "macquarie": 40858, + "macro": 20891, + "macro": 16626, + "macron": 24859, + "macs": 46548, + "macy": 17113, + "macys": 47652, + "mad": 2740, + "mad": 3843, + "mada": 37799, + "madagas": 24758, + "madagascar": 25744, + "madam": 33634, + "madam": 27538, + "madame": 23507, + "madd": 31717, + "madden": 19093, + "maddie": 39959, + "maddie": 18875, + "maddow": 32644, + "maddy": 31734, + "made": 5388, + "made": 1105, + "madein": 13670, + "madeira": 33810, + "madel": 34532, + "madele": 29831, + "madeleine": 33264, + "madeline": 33905, + "madewith": 28627, + "madewithunity": 43190, + "madhu": 23000, + "madhuri": 38346, + "madhuridixit": 43889, + "madhya": 48302, + "madi": 6527, + "madi": 27282, + "madison": 24798, + "madison": 8791, + "madmen": 45452, + "madness": 8755, + "madon": 44852, + "madonna": 14137, + "madra": 27416, + "madras": 42046, + "madre": 42130, + "madri": 5529, + "madrid": 5909, + "mads": 41201, + "madu": 34913, + "madurai": 49159, + "maduro": 32912, + "mae": 16898, + "mae": 17339, + "maer": 47088, + "maestro": 24140, + "mafi": 47164, + "mafia": 14890, + "mag": 1191, + "mag": 4508, + "maga": 8694, + "magaz": 2974, + "magazine": 3113, + "magazines": 22253, + "magdal": 29673, + "mage": 46568, + "mage": 10923, + "magee": 43872, + "magenta": 38091, + "magento": 42442, + "mages": 31059, + "maggi": 29611, + "maggie": 41443, + "maggie": 14524, + "maggio": 49087, + "magh": 45555, + "magi": 19270, + "magic": 13061, + "magic": 3778, + "magical": 36408, + "magical": 7823, + "magician": 26368, + "magin": 42678, + "maging": 41310, + "magn": 10290, + "magna": 34076, + "magne": 9921, + "magnesium": 36379, + "magnet": 18240, + "magnetic": 13838, + "magnets": 33030, + "magni": 24297, + "magnific": 9725, + "magnificent": 10724, + "magnitude": 22955, + "magno": 21184, + "magnolia": 27123, + "magnu": 45198, + "magnum": 23496, + "magnus": 26275, + "magpie": 45973, + "mags": 31021, + "maguire": 26470, + "mah": 7206, + "mah": 10801, + "maha": 12237, + "maha": 33983, + "mahal": 22301, + "mahan": 45191, + "mahar": 11635, + "maharaj": 38488, + "maharashtra": 19328, + "mahat": 32434, + "mahatma": 40530, + "mahe": 15756, + "maher": 29826, + "mahesh": 33448, + "mahesh": 22095, + "mahi": 32529, + "mahi": 38659, + "mahin": 24113, + "mahindra": 31285, + "mahmoud": 41361, + "mahog": 30804, + "mahogany": 33084, + "mahon": 45864, + "mahon": 20371, + "mahone": 26634, + "mai": 7138, + "mai": 14595, + "maia": 46585, + "maid": 23148, + "maid": 10226, + "maidan": 37346, + "maiden": 37011, + "maiden": 13809, + "maids": 27305, + "maidstone": 44395, + "mail": 10478, + "mail": 2614, + "mailbox": 31482, + "mailed": 42314, + "mailing": 26680, + "mailonline": 26021, + "mails": 45213, + "main": 3904, + "main": 2623, + "maine": 18639, + "maine": 7836, + "mained": 15609, + "mainedcm": 15845, + "mainland": 27629, + "mainly": 15280, + "mains": 33656, + "mainst": 42102, + "mainstream": 18034, + "maintain": 12954, + "maintained": 26665, + "maintaining": 21964, + "maintains": 38335, + "mainten": 9399, + "maintenance": 9610, + "mais": 28153, + "maisie": 47355, + "maison": 37065, + "maison": 27626, + "mait": 26387, + "maize": 35386, + "maj": 2948, + "maj": 28723, + "maja": 47498, + "maje": 9852, + "majestic": 15335, + "majesty": 21188, + "major": 8008, + "major": 3350, + "majority": 10508, + "majors": 23597, + "mak": 11271, + "mak": 19253, + "makar": 42242, + "makati": 39402, + "make": 3232, + "make": 1078, + "makeaw": 45859, + "makeinindia": 42739, + "makeit": 26308, + "maken": 47093, + "makeover": 17926, + "maker": 15196, + "maker": 4836, + "makers": 6577, + "makerspace": 42400, + "makes": 2088, + "makeshift": 43274, + "makeu": 41707, + "makeup": 26402, + "makeup": 5853, + "makeyourown": 34090, + "makeyourownlane": 34823, + "maki": 34514, + "makin": 43096, + "makin": 22407, + "making": 17976, + "making": 1665, + "makk": 39852, + "maknae": 44118, + "mako": 49061, + "mal": 1662, + "mal": 3796, + "mala": 28290, + "malade": 36928, + "malaga": 35395, + "malala": 41137, + "malam": 48956, + "malaria": 24929, + "malawi": 23405, + "malay": 5323, + "malay": 42430, + "malayalam": 34860, + "malaysi": 39668, + "malaysia": 8146, + "malaysian": 21136, + "malbec": 47741, + "malcol": 12645, + "malcolm": 14139, + "maldives": 16795, + "male": 11326, + "male": 2801, + "males": 14426, + "malhotra": 28866, + "mali": 6701, + "mali": 22669, + "malia": 46714, + "malibu": 21723, + "malicious": 42147, + "malign": 41122, + "malik": 11394, + "mall": 10984, + "mall": 6220, + "mallorca": 28082, + "mallory": 38968, + "malls": 36447, + "malm": 44071, + "malnutrition": 41153, + "malo": 43518, + "malone": 19852, + "maloney": 45897, + "mals": 25370, + "malt": 21688, + "malta": 16989, + "maltese": 39838, + "malvern": 39356, + "malware": 24153, + "mam": 4404, + "mam": 17778, + "mama": 7133, + "mamamoo": 36012, + "mamas": 42395, + "mamba": 44189, + "mament": 45690, + "mami": 43858, + "mamma": 34893, + "mammal": 33385, + "mammals": 31987, + "mammoth": 28022, + "man": 723, + "man": 786, + "mana": 29467, + "mana": 15837, + "manafort": 40108, + "manag": 1830, + "manage": 9770, + "managed": 7928, + "management": 3319, + "manager": 3898, + "managerial": 44261, + "managers": 12853, + "manages": 29699, + "managing": 10892, + "manas": 44188, + "manatee": 46558, + "mance": 2324, + "manchester": 24424, + "manchester": 4651, + "mancini": 47681, + "mancity": 31538, + "mancrush": 36945, + "mancrushmonday": 39307, + "mand": 4325, + "mand": 27244, + "mandala": 41106, + "mandarin": 26455, + "mandate": 26228, + "mandatory": 19934, + "mandel": 34960, + "mandela": 16280, + "mandi": 38961, + "mandir": 35815, + "mando": 34006, + "mands": 12340, + "mandu": 31440, + "mandy": 41505, + "mandy": 24302, + "mane": 44471, + "mane": 16044, + "maneu": 33216, + "mang": 25616, + "mang": 31096, + "manga": 11873, + "mangal": 43027, + "manger": 48251, + "mango": 43831, + "mango": 13962, + "mangrove": 47180, + "manhatt": 10152, + "manhattan": 10961, + "mani": 5654, + "mani": 10718, + "mania": 8435, + "maniac": 31814, + "maniacs": 41444, + "manian": 40077, + "manic": 23017, + "manic": 37825, + "manicure": 33637, + "manife": 14379, + "manifest": 34422, + "manifestation": 48348, + "manifesto": 20907, + "manil": 38827, + "manila": 10969, + "manipu": 40261, + "manipul": 19237, + "manipulation": 30277, + "manipur": 47757, + "manish": 41759, + "manish": 44720, + "manit": 15693, + "manitoba": 20342, + "manjaro": 41489, + "mankind": 24155, + "manly": 25194, + "mann": 19396, + "mann": 4783, + "manne": 30160, + "manned": 26139, + "mannequin": 43388, + "manner": 20700, + "manners": 31693, + "manning": 15996, + "manny": 37054, + "manny": 20933, + "mano": 15753, + "mano": 24016, + "manoj": 41146, + "manor": 41830, + "manor": 13614, + "mans": 28422, + "mans": 7746, + "mansfield": 25543, + "manship": 15460, + "mansion": 13404, + "manslaughter": 48632, + "manson": 26715, + "mant": 25122, + "mant": 27037, + "manta": 41431, + "mantis": 39946, + "mantle": 22159, + "mantra": 25162, + "manu": 3404, + "manu": 25799, + "manual": 12268, + "manuel": 29171, + "manuel": 9567, + "manufac": 5105, + "manufacture": 27741, + "manufactured": 24010, + "manufacturer": 15668, + "manufacturers": 18763, + "manufacturing": 8386, + "manure": 47907, + "manus": 28181, + "manuscript": 24365, + "manuscripts": 40765, + "manutd": 20994, + "many": 28484, + "many": 1346, + "manziel": 40637, + "mao": 47447, + "mao": 25605, + "maori": 43400, + "map": 25180, + "map": 3923, + "maple": 21980, + "maple": 10570, + "mapleleafs": 41257, + "mapoli": 28768, + "mapp": 36894, + "mapped": 41596, + "mapping": 15231, + "maps": 8765, + "mapu": 42082, + "mar": 675, + "mar": 3091, + "mara": 15655, + "marais": 47913, + "maran": 44732, + "marath": 16274, + "marathi": 34102, + "marathon": 40764, + "marathon": 5910, + "marau": 38475, + "marbella": 36182, + "marble": 45429, + "marble": 13071, + "marbles": 42931, + "marc": 14054, + "marc": 9075, + "marca": 38242, + "marcel": 17726, + "marcel": 24652, + "marcelo": 35939, + "march": 10638, + "march": 2227, + "marche": 36173, + "marched": 37976, + "marches": 38249, + "marchfor": 31721, + "marching": 15082, + "marchmadness": 28555, + "marci": 36698, + "marcia": 41075, + "marck": 47733, + "marco": 24719, + "marco": 10924, + "marcor": 39945, + "marcorubio": 41143, + "marcos": 21696, + "marcu": 20760, + "marcus": 48955, + "marcus": 9895, + "mardi": 39728, + "mardi": 29229, + "mardigras": 43343, + "mare": 26512, + "mare": 8870, + "mares": 19724, + "marg": 44014, + "margar": 16838, + "margare": 10232, + "margaret": 12185, + "margarita": 25958, + "margaritas": 42679, + "margate": 37428, + "margin": 19464, + "margin": 21357, + "marginal": 38320, + "margins": 33763, + "margot": 37144, + "mari": 2603, + "mari": 19322, + "maria": 41109, + "maria": 6595, + "mariachi": 44299, + "mariah": 31214, + "mariah": 24789, + "mariahcarey": 36538, + "marian": 41129, + "marian": 24677, + "mariana": 44224, + "marianne": 32214, + "mariano": 43988, + "marie": 20657, + "marie": 7864, + "marietta": 46634, + "marig": 41002, + "marijuana": 9864, + "maril": 14611, + "marilyn": 38959, + "marilyn": 18489, + "marin": 8910, + "marin": 23992, + "marina": 12060, + "marinated": 33406, + "marine": 20674, + "marine": 5746, + "mariner": 39972, + "mariners": 19086, + "marines": 15018, + "marino": 30878, + "mario": 39176, + "mario": 7600, + "marion": 37765, + "marion": 18397, + "maris": 21512, + "maris": 33093, + "marisa": 42938, + "mariska": 44703, + "marissa": 31219, + "marist": 48223, + "mariti": 13124, + "maritime": 14331, + "marj": 38639, + "mark": 3805, + "mark": 2110, + "marke": 2399, + "marked": 12360, + "marker": 18170, + "markers": 23664, + "market": 11614, + "market": 2196, + "marketer": 33482, + "marketers": 23682, + "marketing": 19535, + "marketing": 2905, + "marketplace": 18241, + "markets": 7292, + "markham": 39817, + "marking": 14705, + "markings": 41046, + "markle": 32672, + "marko": 38338, + "marks": 5466, + "markus": 33725, + "marl": 24922, + "marlborough": 43515, + "marlene": 45117, + "marley": 16504, + "marlin": 34275, + "marlins": 23309, + "marlon": 32995, + "marmalade": 39068, + "marnock": 48305, + "maro": 27029, + "maroon": 20501, + "marqu": 20704, + "marque": 13012, + "marquee": 27725, + "marquette": 37624, + "marquez": 27317, + "marquis": 33530, + "marr": 32871, + "marrake": 37125, + "marrakech": 39006, + "marri": 3839, + "marriage": 38047, + "marriage": 7040, + "marriages": 38190, + "married": 6791, + "marries": 46283, + "marriott": 19211, + "marrow": 31030, + "marry": 13288, + "marrying": 40507, + "mars": 41469, + "mars": 7496, + "marsden": 43344, + "marse": 26577, + "marseille": 30365, + "marsh": 9237, + "marsh": 13505, + "marsha": 21491, + "marshal": 26608, + "marshall": 30939, + "marshall": 9811, + "marshals": 44175, + "marshes": 43450, + "marshmal": 21069, + "marshmallow": 28530, + "marshmallows": 39471, + "mart": 2348, + "mart": 7772, + "marta": 32858, + "martens": 43211, + "marth": 34493, + "martha": 16427, + "marti": 20577, + "martial": 17088, + "martialarts": 35895, + "martian": 30214, + "martin": 6929, + "martin": 3690, + "martina": 34393, + "martinez": 13913, + "marting": 47570, + "martini": 22199, + "martino": 41675, + "martins": 30569, + "marty": 9926, + "marty": 17169, + "martyn": 44075, + "martyr": 36155, + "martyr": 26067, + "martyrdom": 43110, + "martyred": 39114, + "martyrs": 24707, + "maru": 37413, + "maru": 31838, + "marvel": 13835, + "marvel": 5996, + "marvelcomics": 46897, + "marvell": 26576, + "marvellous": 28402, + "marvelous": 25487, + "marvin": 19675, + "marx": 30559, + "marx": 26001, + "marxist": 45205, + "mary": 5146, + "mary": 2676, + "maryam": 33636, + "maryam": 36393, + "maryland": 11379, + "marys": 40905, + "marys": 40228, + "mas": 5226, + "mas": 1412, + "masa": 24995, + "masa": 41868, + "masala": 31483, + "masc": 23564, + "mascar": 46984, + "mascara": 31635, + "mascot": 13983, + "mascots": 43266, + "mascul": 25589, + "masculine": 48269, + "masculinity": 40465, + "mase": 49128, + "maser": 25798, + "maserati": 30442, + "mash": 12317, + "mash": 15680, + "mashable": 41026, + "mashed": 27395, + "mashup": 27079, + "masi": 35965, + "masjid": 31420, + "mask": 19262, + "mask": 8306, + "masked": 25757, + "masking": 47046, + "masks": 19055, + "maslow": 44359, + "mason": 17424, + "mason": 9699, + "masonic": 36491, + "masonry": 30764, + "masons": 37195, + "masqu": 26593, + "masquer": 29604, + "masquerade": 36944, + "mass": 4636, + "mass": 4854, + "massach": 14484, + "massachuse": 14577, + "massachusetts": 14756, + "massacre": 14696, + "massage": 13055, + "masse": 41735, + "masses": 22978, + "massey": 29868, + "massi": 17239, + "massimo": 45821, + "massive": 4818, + "massively": 34297, + "mast": 45916, + "mast": 27920, + "master": 4534, + "master": 3498, + "mastercard": 40542, + "masterchef": 34809, + "masterclass": 17529, + "mastered": 32616, + "masterful": 46823, + "mastering": 28326, + "mastermind": 34029, + "masterpiece": 12066, + "masterpieces": 37596, + "masters": 6913, + "mastery": 34800, + "mastiff": 42311, + "maswar": 47887, + "mat": 905, + "mat": 9063, + "mata": 17270, + "match": 7733, + "match": 2439, + "matcha": 32433, + "matchday": 15947, + "matched": 17792, + "matches": 8609, + "matching": 11840, + "matchup": 19355, + "matchups": 49162, + "mate": 6137, + "mate": 2936, + "mated": 33813, + "mateo": 34991, + "mater": 23724, + "materi": 7084, + "material": 7118, + "materials": 8161, + "maternal": 26131, + "maternity": 23894, + "mates": 5817, + "math": 13277, + "math": 6025, + "mathe": 8725, + "mathemat": 11901, + "mathematical": 25609, + "mathematician": 41036, + "mathematics": 20113, + "mathew": 36333, + "mathews": 37120, + "mathi": 23014, + "mathieu": 40417, + "maths": 14763, + "mati": 12716, + "mati": 32268, + "matic": 36859, + "matic": 7900, + "matically": 38282, + "matics": 23634, + "matil": 26751, + "matilda": 36308, + "matin": 44849, + "matinee": 38525, + "mating": 34346, + "mation": 11701, + "matisse": 43446, + "mato": 13127, + "matologist": 48842, + "matology": 27940, + "matory": 25519, + "matri": 27041, + "matrix": 18078, + "mats": 22259, + "matsu": 30242, + "matt": 7972, + "matt": 3972, + "mattb": 42791, + "matte": 31237, + "matte": 19771, + "mattel": 35365, + "matteo": 33120, + "matter": 30471, + "matter": 3828, + "matters": 5708, + "matth": 41846, + "matthe": 5116, + "matthew": 17588, + "matthew": 7008, + "matthews": 16739, + "matthi": 29853, + "matthias": 45104, + "matti": 39840, + "mattress": 23438, + "matty": 31233, + "matty": 29176, + "matu": 40616, + "matur": 22897, + "mature": 14417, + "maturity": 28047, + "mau": 8134, + "mau": 23033, + "maui": 20463, + "maul": 30725, + "maur": 10574, + "maure": 25191, + "maureen": 31723, + "maurice": 20200, + "mauricio": 39066, + "mauriti": 28406, + "mauritius": 29305, + "mauro": 41691, + "mav": 25697, + "maver": 16700, + "maverick": 27425, + "mavericks": 30092, + "mavs": 30665, + "maw": 39351, + "maw": 42271, + "mawards": 37682, + "max": 4898, + "max": 3902, + "maxi": 8554, + "maxi": 23266, + "maxim": 19892, + "maxim": 38574, + "maximize": 28673, + "maximum": 13162, + "maximus": 44312, + "maxine": 38468, + "maxwell": 19611, + "maxx": 37466, + "may": 1686, + "may": 1270, + "maya": 45783, + "maya": 12987, + "mayan": 37952, + "maybe": 3746, + "mayday": 29957, + "mayer": 21196, + "mayfair": 35171, + "mayfield": 33933, + "mayhem": 21502, + "maymay": 26600, + "maymay": 33853, + "maymayentrata": 30480, + "maynard": 32487, + "mayne": 35771, + "mayo": 22449, + "mayo": 11280, + "mayor": 15429, + "mayor": 4676, + "mayoral": 28983, + "mayorof": 43533, + "mayors": 28501, + "mays": 35445, + "maythe": 42281, + "mayward": 45751, + "mayward": 23519, + "mayweather": 22774, + "maz": 9177, + "maz": 36215, + "mazda": 18506, + "maze": 21988, + "mazz": 29439, + "mañ": 37059, + "mañana": 39354, + "mb": 758, + "mb": 3996, + "mba": 8329, + "mban": 46685, + "mbar": 44452, + "mbb": 10736, + "mbc": 20137, + "mbe": 38395, + "mbe": 27004, + "mber": 5467, + "mber": 1034, + "mberg": 26372, + "mbers": 5443, + "mbi": 45347, + "mble": 20310, + "mble": 4756, + "mbles": 28693, + "mbling": 28604, + "mbo": 25733, + "mbo": 11319, + "mbps": 44896, + "mbs": 10370, + "mbta": 38979, + "mbu": 42228, + "mbuhari": 36752, + "mc": 1278, + "mc": 4126, + "mca": 40570, + "mca": 14635, + "mcal": 28663, + "mcar": 43776, + "mcbride": 35080, + "mcc": 21192, + "mccabe": 37628, + "mccaf": 47385, + "mccain": 20397, + "mccall": 34844, + "mccann": 27140, + "mccar": 9570, + "mccarthy": 16974, + "mccartney": 19958, + "mccl": 24709, + "mccla": 43672, + "mccle": 40139, + "mcclure": 44945, + "mcco": 46152, + "mccon": 32638, + "mccor": 23057, + "mccormack": 45164, + "mccormick": 39088, + "mccoy": 20218, + "mccr": 41996, + "mccre": 25393, + "mccul": 38833, + "mccull": 41782, + "mcd": 28930, + "mcder": 27355, + "mcdermott": 34504, + "mcdon": 12171, + "mcdonald": 10741, + "mcdonalds": 17674, + "mcdonnell": 34360, + "mcdowell": 34119, + "mce": 26864, + "mcel": 28752, + "mcen": 47423, + "mcfad": 36976, + "mcfadden": 42105, + "mcfar": 29020, + "mcfarlane": 47174, + "mcfc": 16416, + "mcfly": 38211, + "mcg": 42507, + "mcg": 27995, + "mcgee": 29223, + "mcgill": 46524, + "mcgill": 35511, + "mcgin": 29596, + "mcgowan": 40462, + "mcgr": 25169, + "mcgra": 29367, + "mcgrath": 28759, + "mcgraw": 40950, + "mcgregor": 19642, + "mcgu": 34294, + "mcguinness": 45299, + "mcguire": 32635, + "mci": 46212, + "mci": 45491, + "mcil": 30481, + "mcin": 18770, + "mcintosh": 45353, + "mcintyre": 33369, + "mck": 6781, + "mckay": 33611, + "mcke": 27424, + "mckee": 43529, + "mcken": 42619, + "mckenna": 24924, + "mckenzie": 25502, + "mckin": 15437, + "mckinley": 39891, + "mckinney": 33554, + "mckinnon": 48736, + "mckinsey": 48143, + "mcl": 49021, + "mcla": 12565, + "mclaren": 37381, + "mclaren": 16789, + "mclau": 32285, + "mclaughlin": 35346, + "mcle": 25299, + "mclean": 28666, + "mcleod": 40259, + "mcm": 12251, + "mcmahon": 24026, + "mcmaster": 42703, + "mcmillan": 45603, + "mcn": 42919, + "mcnam": 32682, + "mcnamara": 37506, + "mcne": 42545, + "mco": 33723, + "mcqueen": 22544, + "mcr": 29884, + "mcr": 16966, + "mcs": 27020, + "mcu": 30403, + "md": 8637, + "md": 4732, + "mdc": 38773, + "mdc": 41761, + "mds": 48746, + "mdt": 40822, + "me": 613, + "me": 614, + "mea": 46045, + "mea": 17711, + "mead": 12134, + "mead": 21567, + "meade": 37218, + "meado": 16402, + "meadow": 25213, + "meadow": 17195, + "meadows": 17178, + "meal": 29662, + "meal": 5478, + "meals": 11229, + "mean": 4189, + "mean": 3450, + "meand": 48015, + "meaning": 14586, + "meaning": 8342, + "meaningful": 17480, + "meaningless": 48932, + "meanings": 45814, + "means": 3494, + "meant": 8674, + "meantime": 27499, + "meanwhile": 9650, + "meas": 5867, + "measles": 38230, + "measurable": 48010, + "measure": 15261, + "measure": 10579, + "measured": 23154, + "measurement": 20973, + "measurements": 29894, + "measures": 11936, + "measuring": 18064, + "meat": 10805, + "meat": 6480, + "meatball": 43642, + "meatballs": 29233, + "meath": 37920, + "meatless": 48085, + "meats": 29558, + "mec": 27432, + "mecca": 36095, + "mech": 38305, + "mechan": 6715, + "mechanic": 24582, + "mechanical": 14467, + "mechanics": 20536, + "mechanism": 22576, + "mechanisms": 28610, + "meck": 41908, + "med": 1948, + "med": 2177, + "meda": 33614, + "medal": 29714, + "medal": 6974, + "medalist": 21040, + "medalists": 43397, + "medalli": 31349, + "medallion": 43469, + "medallist": 41472, + "medals": 14710, + "mede": 48225, + "meded": 27627, + "medi": 1436, + "media": 22064, + "media": 1895, + "mediac": 37490, + "median": 30491, + "mediation": 42829, + "medic": 3602, + "medic": 35441, + "medicaid": 25421, + "medical": 18432, + "medical": 4116, + "medicare": 23710, + "medication": 23771, + "medications": 37181, + "medicinal": 28772, + "medicine": 5616, + "medicines": 26541, + "medics": 46688, + "medieval": 38956, + "medieval": 10789, + "medina": 27281, + "mediocre": 41170, + "medit": 19130, + "meditate": 38039, + "meditation": 10827, + "mediter": 14194, + "mediterran": 14358, + "mediterranean": 15327, + "medium": 8675, + "medley": 24793, + "meds": 25075, + "medtech": 42044, + "medusa": 44216, + "medway": 42286, + "mee": 1725, + "mee": 14075, + "meek": 28935, + "meen": 37940, + "meen": 46515, + "meer": 26714, + "meer": 27555, + "meet": 5714, + "meet": 1633, + "meeting": 48566, + "meeting": 2071, + "meetings": 9980, + "meets": 5972, + "meetthe": 27575, + "meetup": 15430, + "meg": 11500, + "meg": 16186, + "mega": 15979, + "mega": 9068, + "megab": 38103, + "megadeth": 46741, + "megal": 37650, + "megam": 26073, + "megan": 19127, + "megan": 11503, + "megap": 33624, + "megat": 35581, + "megh": 31192, + "meghan": 39939, + "meghan": 18261, + "meh": 10512, + "meh": 22211, + "mehta": 25031, + "mei": 22564, + "mei": 25198, + "meier": 29812, + "mein": 28857, + "mein": 21466, + "meister": 28407, + "mek": 44645, + "mel": 1902, + "mel": 6834, + "mela": 35032, + "melan": 22261, + "melanch": 44818, + "melancholy": 47821, + "melani": 34031, + "melania": 32796, + "melanie": 22153, + "melanoma": 40862, + "melb": 47007, + "melb": 28980, + "melbourne": 28387, + "melbourne": 6995, + "melee": 45108, + "meli": 28885, + "melinda": 46303, + "melis": 18913, + "melissa": 41866, + "melissa": 13030, + "mell": 22531, + "mell": 41583, + "mello": 47594, + "mellon": 45162, + "mellow": 32034, + "melo": 10354, + "melo": 22374, + "melodic": 41877, + "melodies": 38412, + "melody": 19119, + "melon": 12146, + "melrose": 36296, + "melt": 22209, + "melt": 15957, + "meltdown": 30613, + "melted": 23037, + "melting": 19247, + "melton": 46062, + "melts": 31446, + "melville": 46030, + "melvin": 31544, + "mely": 6373, + "mem": 4937, + "mem": 34944, + "memb": 2114, + "member": 29566, + "member": 1640, + "members": 2567, + "membership": 11562, + "membrane": 34088, + "meme": 35157, + "meme": 9169, + "memes": 12828, + "memo": 15967, + "memo": 19334, + "memoir": 20532, + "memoirs": 45311, + "memor": 1858, + "memorab": 26271, + "memorabilia": 27488, + "memorable": 13172, + "memorial": 16285, + "memorial": 4642, + "memorialday": 21598, + "memoriam": 48191, + "memories": 4304, + "memory": 44766, + "memory": 5137, + "memph": 10285, + "memphis": 38432, + "memphis": 11298, + "men": 1552, + "men": 1656, + "mena": 23052, + "menace": 29949, + "mend": 8151, + "mend": 46927, + "mendel": 49268, + "mendes": 18060, + "mendez": 48275, + "mendo": 19327, + "mendoza": 23680, + "meng": 37102, + "meng": 37450, + "mening": 46428, + "menon": 38255, + "menopau": 34974, + "menopause": 46026, + "mens": 16924, + "mens": 10495, + "mensfashion": 27578, + "menstru": 28345, + "menstrual": 40915, + "menswear": 18803, + "ment": 1585, + "ment": 777, + "mental": 8611, + "mental": 3448, + "mentalhealth": 20593, + "mentalhealth": 13022, + "mentality": 26647, + "mentally": 14307, + "mentary": 4468, + "mentation": 9512, + "mentday": 40397, + "mente": 40302, + "mente": 36396, + "mented": 9249, + "menting": 14471, + "mention": 43881, + "mention": 6762, + "mentioned": 11948, + "mentioning": 34290, + "mentions": 12334, + "mento": 30582, + "mentor": 45342, + "mentor": 11642, + "mentoring": 19610, + "mentors": 20945, + "mentorship": 33878, + "ments": 1827, + "menu": 6225, + "menus": 33534, + "meo": 30792, + "meow": 39965, + "meow": 17246, + "mep": 27095, + "mer": 1316, + "mer": 2452, + "mera": 20028, + "merc": 34357, + "merc": 44399, + "mercado": 45479, + "merce": 8409, + "mercede": 34959, + "mercedes": 26403, + "mercedes": 10685, + "mercedesam": 40107, + "mercedesbenz": 32347, + "mercen": 40301, + "mercer": 21632, + "merch": 11504, + "merchandi": 14954, + "merchandise": 16808, + "merchandising": 49196, + "merchant": 19563, + "merchants": 34427, + "merci": 23364, + "merci": 29378, + "mercur": 11471, + "mercury": 45203, + "mercury": 12653, + "mercy": 33249, + "mercy": 10815, + "mere": 29657, + "mere": 10342, + "mered": 24657, + "mered": 32297, + "meredith": 25103, + "merely": 28718, + "merge": 30406, + "merged": 46492, + "merger": 24744, + "merging": 49256, + "meri": 17993, + "meri": 36109, + "meria": 48433, + "meric": 27097, + "merica": 30561, + "meridi": 37901, + "meridian": 31195, + "mering": 41060, + "meringue": 41661, + "merino": 42648, + "merit": 20830, + "merkel": 24715, + "merle": 48586, + "merlin": 26517, + "merlot": 40424, + "mermaid": 16064, + "mermaids": 43617, + "mero": 19097, + "merr": 48288, + "merri": 21462, + "merrill": 47713, + "merritt": 36462, + "merry": 14167, + "merry": 5779, + "merrychristmas": 19672, + "mers": 4199, + "mersal": 36711, + "mersey": 25248, + "mersey": 46239, + "merseyside": 35382, + "mert": 48496, + "merton": 35315, + "mery": 40873, + "meryl": 35787, + "mes": 28432, + "mes": 3029, + "mesa": 18956, + "mese": 42018, + "mesh": 15030, + "mesm": 18695, + "mesmer": 38435, + "mesmeri": 25985, + "mesmerizing": 35637, + "meso": 25537, + "mesqu": 46819, + "mess": 2490, + "mess": 8188, + "message": 3918, + "messages": 9390, + "messaging": 23234, + "messe": 40391, + "messed": 23580, + "messenger": 17389, + "messi": 19394, + "messi": 11252, + "messiah": 28737, + "messing": 23144, + "messy": 15987, + "mest": 23780, + "mester": 47349, + "mesut": 49177, + "met": 5249, + "met": 2340, + "meta": 14803, + "meta": 22701, + "metab": 16150, + "metabol": 48389, + "metaboli": 25573, + "metabolic": 34311, + "metabolism": 27824, + "metal": 8935, + "metal": 4044, + "metall": 19084, + "metallic": 17257, + "metallica": 24079, + "metals": 21375, + "metam": 28862, + "metamor": 39030, + "metamorpho": 47601, + "metaph": 24189, + "metaphor": 34233, + "metast": 41973, + "mete": 11226, + "meteor": 26429, + "meteor": 26823, + "meteoro": 25948, + "meteorologist": 42849, + "meter": 10104, + "meters": 13247, + "metgala": 30089, + "meth": 21867, + "meth": 26177, + "methane": 37565, + "metho": 5770, + "method": 10284, + "methodist": 25165, + "methodo": 28488, + "methodology": 37316, + "methods": 12200, + "methyl": 48999, + "metmuseum": 28207, + "meto": 25679, + "metoo": 24722, + "metr": 15086, + "metre": 27889, + "metres": 19798, + "metric": 19950, + "metrical": 40704, + "metrics": 24396, + "metro": 7257, + "metro": 6784, + "metroid": 39957, + "metropolis": 40476, + "metropolitan": 19013, + "metry": 20039, + "mets": 9633, + "mett": 28081, + "metz": 40506, + "meu": 34520, + "mew": 40368, + "mex": 3213, + "mex": 18387, + "mexic": 31728, + "mexican": 37442, + "mexican": 8186, + "mexicans": 47729, + "mexico": 31834, + "mexico": 4604, + "mey": 28584, + "mey": 27777, + "meyer": 13963, + "meyers": 32326, + "mez": 30615, + "mez": 46833, + "mezz": 38771, + "mf": 18199, + "mf": 11067, + "mfa": 24107, + "mfc": 39474, + "mfg": 21912, + "mfw": 27309, + "mg": 10003, + "mg": 8014, + "mga": 23954, + "mgm": 27572, + "mgmt": 22288, + "mgr": 31500, + "mgs": 48073, + "mgt": 48663, + "mh": 9962, + "mh": 10834, + "mha": 41944, + "mhealth": 41225, + "mhs": 28815, + "mhz": 31550, + "mi": 714, + "mi": 2251, + "mia": 5852, + "miam": 31053, + "miami": 15106, + "miami": 4891, + "mian": 24792, + "miaw": 36046, + "mib": 48178, + "mic": 1213, + "mic": 3816, + "mica": 41551, + "micah": 33870, + "mice": 19030, + "mich": 25628, + "mich": 23029, + "micha": 2083, + "michael": 6051, + "michael": 2511, + "michaela": 41897, + "michaeljackson": 33532, + "michaels": 23868, + "michal": 47144, + "miche": 37966, + "micheal": 43709, + "michel": 5158, + "michel": 17153, + "michelangelo": 41245, + "michele": 20642, + "michelin": 26330, + "michelle": 19028, + "michelle": 8625, + "michi": 5658, + "michigan": 32344, + "michigan": 6296, + "mick": 15171, + "mick": 12592, + "mickey": 41813, + "mickey": 13053, + "micky": 43011, + "micro": 3160, + "micro": 11374, + "microbes": 44671, + "microbi": 19496, + "microbial": 30335, + "microbiology": 35348, + "microbiome": 35148, + "micron": 48742, + "microphone": 24643, + "micropoetry": 35997, + "microscope": 29114, + "microscopy": 38431, + "microsof": 42424, + "microsoft": 38650, + "microsoft": 7254, + "microwave": 24240, + "mics": 16554, + "mid": 2192, + "mid": 4734, + "midcentury": 48988, + "midd": 2983, + "midday": 23390, + "middle": 9849, + "middle": 3694, + "middleeast": 32783, + "middles": 29769, + "middlesbrough": 32436, + "middlesex": 39154, + "middleton": 23627, + "middleweight": 35829, + "midfield": 28116, + "midfielder": 13423, + "midget": 30734, + "midi": 39496, + "midi": 27326, + "midland": 24822, + "midlands": 18062, + "midnight": 35746, + "midnight": 6302, + "mids": 40821, + "midst": 24752, + "midsummer": 35234, + "midterm": 34365, + "midterms": 32015, + "midtown": 26069, + "midway": 26536, + "midweek": 29120, + "midwest": 16627, + "midwi": 44802, + "midwife": 37681, + "midwives": 42355, + "mie": 20865, + "mie": 10555, + "miento": 46482, + "mier": 36490, + "mies": 8840, + "miff": 49398, + "mig": 28743, + "might": 2727, + "mighty": 26632, + "mighty": 7815, + "mign": 41678, + "migos": 44640, + "migr": 3736, + "migra": 28186, + "migraine": 35360, + "migrant": 18902, + "migrants": 15814, + "migrate": 41804, + "migrating": 43604, + "migration": 11891, + "migu": 12279, + "miguel": 33672, + "miguel": 14436, + "miho": 46870, + "mii": 39896, + "mik": 15096, + "mik": 46203, + "mika": 28609, + "mika": 25185, + "mike": 5884, + "mike": 3178, + "mikel": 48865, + "mikequind": 33508, + "mikequindazzi": 33551, + "mikey": 34934, + "mikey": 23368, + "mikha": 30999, + "mikhail": 38327, + "miki": 48863, + "miko": 35413, + "miku": 37703, + "mil": 1469, + "mil": 12826, + "mila": 26183, + "milan": 30380, + "milan": 8552, + "milano": 18585, + "milb": 42248, + "mild": 16085, + "mildly": 49059, + "mile": 7833, + "mile": 6243, + "mileage": 30579, + "miler": 44680, + "miles": 3446, + "milestone": 13485, + "milestones": 34025, + "miley": 25336, + "miley": 14321, + "mileycyrus": 28528, + "milf": 45386, + "milford": 35840, + "mili": 16698, + "miliband": 41440, + "milit": 3715, + "militant": 33629, + "militants": 23974, + "military": 24498, + "military": 4323, + "militi": 46625, + "militia": 32114, + "milk": 13409, + "milk": 5205, + "milkshake": 29066, + "milky": 37320, + "milky": 21120, + "milkyway": 43246, + "mill": 4221, + "mill": 6637, + "milla": 49381, + "millan": 34930, + "millan": 22188, + "millar": 41851, + "mille": 34066, + "millen": 48501, + "millenni": 10406, + "millennial": 28357, + "millennials": 18804, + "millennium": 21116, + "miller": 21699, + "miller": 5733, + "milli": 5340, + "millie": 29283, + "milling": 39133, + "million": 13154, + "million": 2506, + "millionaire": 25179, + "millionaires": 47159, + "millions": 8492, + "mills": 10331, + "millwall": 35902, + "milly": 45794, + "milne": 44590, + "milner": 45230, + "milo": 24548, + "milton": 39004, + "milton": 17360, + "milwau": 13452, + "milwaukee": 14259, + "mim": 39379, + "mimi": 27086, + "mimic": 47116, + "mimic": 46519, + "mimo": 45551, + "min": 771, + "min": 3331, + "mina": 15281, + "minaj": 25136, + "minal": 40222, + "minat": 33275, + "mince": 32396, + "mind": 5890, + "mind": 2575, + "mindanao": 44228, + "minded": 21330, + "mindful": 28457, + "mindfulness": 15707, + "minding": 45337, + "minds": 9244, + "mindset": 14217, + "mindy": 46875, + "mindy": 38551, + "mine": 20149, + "mine": 3347, + "minecraft": 15678, + "mined": 48034, + "minent": 12533, + "miner": 14109, + "miner": 26572, + "mineral": 17692, + "minerals": 21169, + "miners": 22119, + "mines": 16211, + "ming": 10868, + "ming": 2107, + "mingham": 7590, + "mingle": 38437, + "mingly": 36909, + "mington": 49283, + "mington": 23119, + "minh": 48734, + "minho": 21318, + "mini": 1810, + "mini": 3954, + "miniature": 44298, + "miniature": 16377, + "miniatures": 38816, + "minic": 31522, + "minim": 10005, + "minimal": 18458, + "minimalism": 42594, + "minimalist": 26641, + "minimize": 38697, + "minimum": 12244, + "minindia": 28458, + "mining": 8473, + "minion": 28622, + "minions": 27035, + "minis": 33409, + "minis": 35976, + "minister": 25688, + "minister": 3569, + "ministerial": 33008, + "ministers": 16406, + "ministries": 27895, + "ministry": 8742, + "mink": 42017, + "minn": 45991, + "minn": 47318, + "minne": 7083, + "minneapolis": 16977, + "minneso": 9380, + "minnesota": 9968, + "minnie": 24493, + "mino": 22791, + "minogue": 44202, + "minor": 8522, + "minorities": 28119, + "minority": 16210, + "minors": 36789, + "mins": 6196, + "minsk": 46151, + "minster": 11189, + "mint": 48084, + "mint": 7506, + "minted": 49377, + "minton": 20050, + "minu": 29064, + "minus": 15358, + "minute": 28931, + "minute": 4497, + "minutes": 3056, + "mio": 26366, + "mir": 2750, + "mir": 6585, + "mira": 21665, + "mira": 22762, + "mirac": 13685, + "miracle": 49208, + "miracle": 11543, + "miracles": 23478, + "miraculous": 38671, + "mirage": 28679, + "mirai": 49060, + "mirand": 32367, + "miranda": 17590, + "mire": 38140, + "mire": 30140, + "miri": 22273, + "miriam": 30950, + "miro": 34851, + "miro": 48317, + "mirren": 47600, + "mirro": 48500, + "mirror": 29823, + "mirror": 7220, + "mirrors": 21823, + "mirza": 36440, + "mis": 866, + "mis": 11239, + "mischief": 33896, + "misconceptions": 48681, + "misconduct": 30601, + "mise": 46567, + "mise": 17267, + "miser": 33394, + "miserable": 26196, + "misery": 28360, + "mises": 24390, + "misfits": 42708, + "mish": 15494, + "mish": 20981, + "misha": 35434, + "mishra": 33042, + "misleading": 30862, + "mism": 15948, + "miso": 27657, + "miso": 33441, + "misogy": 31315, + "misogyny": 48415, + "miss": 6984, + "miss": 1526, + "missal": 38337, + "missed": 3955, + "misses": 15844, + "missi": 3008, + "missile": 14411, + "missiles": 27868, + "missin": 36209, + "missing": 23509, + "missing": 3423, + "mission": 12738, + "mission": 2406, + "missionaries": 40580, + "missionary": 27915, + "missions": 6990, + "mississ": 26483, + "mississauga": 28393, + "mississi": 11687, + "mississippi": 12232, + "missou": 30710, + "missoula": 48549, + "missouri": 11835, + "missuni": 26347, + "missuniverse": 28766, + "missy": 48105, + "missy": 31515, + "missyou": 45799, + "mist": 12610, + "mist": 11946, + "mistak": 20478, + "mistake": 11303, + "mistaken": 29182, + "mistakenly": 48494, + "mistakes": 12824, + "mister": 26949, + "mister": 18895, + "mistle": 46800, + "mistletoe": 48569, + "mistre": 42039, + "mistress": 24349, + "mists": 28636, + "misty": 18799, + "misunderstood": 41574, + "misuse": 40970, + "mit": 3303, + "mit": 4551, + "mita": 47514, + "mitage": 27964, + "mitch": 6969, + "mitch": 14150, + "mitchell": 39339, + "mitchell": 9007, + "mite": 26929, + "mith": 21752, + "mith": 17948, + "miti": 17857, + "mitigate": 42273, + "mitigation": 35514, + "mito": 38254, + "mitochondri": 42132, + "mitra": 47703, + "mits": 24086, + "mitsu": 17905, + "mitsubi": 21604, + "mitsubishi": 23030, + "mitt": 17321, + "mitt": 21341, + "mitted": 10307, + "mitting": 27938, + "mitz": 41827, + "mium": 35891, + "miwx": 43941, + "mix": 3210, + "mix": 3285, + "mixed": 29376, + "mixed": 6780, + "mixer": 17200, + "mixers": 39175, + "mixes": 19061, + "mixing": 15588, + "mixtape": 11044, + "mixture": 28286, + "miy": 25695, + "miya": 36257, + "miz": 20881, + "miz": 30795, + "mize": 19076, + "mized": 43418, + "mizing": 38715, + "mizz": 19985, + "mizzou": 26165, + "mj": 13117, + "mj": 14733, + "mk": 11581, + "mk": 8937, + "mke": 36642, + "mkt": 24814, + "ml": 3627, + "ml": 5780, + "mla": 16723, + "mlas": 48464, + "mlb": 21039, + "mlb": 7482, + "mley": 40329, + "mlg": 45801, + "mlin": 24556, + "mlk": 17941, + "mlkday": 39905, + "mlm": 37611, + "mln": 18971, + "mlp": 23620, + "mlpfi": 45475, + "mlpfim": 45640, + "mls": 13077, + "mm": 1028, + "mm": 2848, + "mma": 34140, + "mma": 6096, + "mmc": 44253, + "mme": 13105, + "mmed": 19570, + "mmer": 35717, + "mmer": 7508, + "mmers": 28128, + "mmes": 42862, + "mmi": 34147, + "mming": 21038, + "mming": 16507, + "mmings": 31357, + "mmit": 41050, + "mmj": 43015, + "mmm": 37908, + "mmm": 7641, + "mmmm": 36312, + "mmmm": 13180, + "mmmmm": 21808, + "mmmmmm": 43740, + "mmo": 30418, + "mmon": 41131, + "mmor": 36657, + "mmorpg": 39476, + "mms": 37803, + "mmva": 42666, + "mmy": 28837, + "mmy": 8722, + "mn": 5086, + "mn": 4057, + "mna": 34877, + "mnd": 44776, + "mnet": 34129, + "mnf": 41105, + "mnl": 32980, + "mnleg": 42653, + "mns": 39040, + "mnt": 21477, + "mntwins": 45448, + "mnwild": 39044, + "mnwx": 39592, + "mo": 617, + "mo": 2080, + "moa": 33174, + "moana": 43241, + "mob": 2818, + "mob": 12754, + "mobi": 9451, + "mobil": 26343, + "mobil": 29815, + "mobile": 12935, + "mobile": 3451, + "mobiles": 44302, + "mobili": 20770, + "mobility": 12546, + "mobilization": 48916, + "moby": 47219, + "moc": 41439, + "moc": 36992, + "mocha": 28425, + "mochi": 47973, + "mock": 15641, + "mock": 12759, + "mocked": 47400, + "mocking": 28692, + "mocking": 37870, + "mocks": 35142, + "mod": 6362, + "mod": 10893, + "moda": 25814, + "modal": 33157, + "mode": 20402, + "mode": 6493, + "model": 4591, + "model": 2863, + "modeled": 39527, + "modeling": 13706, + "modelling": 19946, + "models": 6176, + "moder": 2894, + "moderate": 16435, + "moderated": 27928, + "moderating": 34242, + "moderator": 32659, + "modern": 11706, + "modern": 4077, + "modernart": 34417, + "moderni": 24328, + "modernism": 39601, + "modernist": 36773, + "modernization": 47294, + "modes": 30454, + "modest": 25436, + "modi": 9047, + "modi": 7774, + "modification": 37630, + "modified": 17964, + "modo": 36820, + "mods": 23843, + "modu": 9036, + "modular": 22437, + "module": 16757, + "modules": 30575, + "moe": 38655, + "moe": 17938, + "mof": 30798, + "moff": 27160, + "mog": 42362, + "moga": 41732, + "mogadishu": 45133, + "mogul": 41320, + "moh": 18979, + "moh": 35388, + "moha": 46892, + "moham": 7923, + "mohamed": 18472, + "mohammad": 19926, + "mohammed": 16168, + "mohan": 26521, + "mohan": 23586, + "mohawk": 34942, + "mohd": 49094, + "mohsin": 48861, + "moi": 20691, + "moi": 21825, + "moil": 30349, + "moines": 32091, + "moist": 19831, + "moist": 33263, + "moisture": 20412, + "moisturi": 25942, + "moj": 34505, + "moja": 49055, + "mojito": 46830, + "mojo": 25204, + "mok": 49146, + "mol": 4246, + "mol": 31582, + "mold": 21846, + "molding": 46274, + "moldova": 47317, + "mole": 9927, + "mole": 23529, + "molecular": 19370, + "molecule": 39233, + "molecules": 35643, + "molina": 34201, + "mollie": 48203, + "molly": 24368, + "molly": 12573, + "molo": 41510, + "mology": 32255, + "molten": 46071, + "moly": 47083, + "mom": 1614, + "mom": 2543, + "moma": 33605, + "mombasa": 40340, + "moment": 12197, + "moment": 2495, + "momento": 30078, + "moments": 5251, + "momentum": 15722, + "momlife": 43825, + "momma": 14508, + "mommy": 12456, + "momo": 48490, + "momo": 25980, + "moms": 28446, + "moms": 10042, + "momsdemand": 33744, + "mon": 749, + "mon": 2173, + "mona": 19143, + "monaco": 14938, + "monaghan": 39797, + "monarch": 27235, + "monarch": 22619, + "monarchs": 36750, + "monarchy": 47503, + "monaster": 19422, + "monastery": 21850, + "monc": 34847, + "moncton": 44962, + "mond": 14522, + "mond": 4475, + "monday": 6205, + "monday": 2098, + "mondaymorning": 40089, + "mondaymotiv": 45488, + "mondaymotivation": 8198, + "mondaymotivaton": 47034, + "mondays": 13815, + "monde": 29339, + "mondo": 36207, + "monds": 20317, + "mone": 25990, + "monet": 24499, + "monetary": 26394, + "moneti": 38056, + "money": 12743, + "money": 2327, + "mong": 43566, + "monger": 38928, + "mongers": 27670, + "mongo": 20680, + "mongolia": 27144, + "mongolian": 46335, + "moni": 46851, + "monia": 31161, + "monic": 30893, + "monica": 13540, + "monit": 9014, + "monitor": 10198, + "monitored": 45828, + "monitoring": 11030, + "monitors": 30478, + "monk": 30557, + "monk": 16424, + "monkey": 29597, + "monkey": 9465, + "monkeys": 15781, + "monks": 29090, + "monmouth": 36929, + "mono": 8220, + "mono": 22537, + "monochrome": 25576, + "monogram": 39665, + "monologue": 47776, + "monopoly": 25241, + "monoxide": 49314, + "monro": 45750, + "monroe": 13625, + "mons": 19885, + "monsanto": 37592, + "monsi": 46677, + "monsieur": 48879, + "monsoon": 18872, + "monsta": 30718, + "monstax": 45631, + "monste": 47045, + "monster": 14454, + "monster": 6060, + "monsters": 11546, + "mont": 5186, + "mont": 5382, + "montag": 37202, + "montage": 32325, + "montal": 42126, + "montan": 28405, + "montana": 11436, + "monte": 8711, + "monte": 14667, + "montene": 28538, + "montenegro": 30378, + "monter": 36673, + "monterey": 23388, + "monterrey": 45254, + "montess": 43205, + "montessori": 45443, + "montgom": 13852, + "montgomery": 14951, + "month": 7680, + "month": 1924, + "monthly": 8764, + "months": 3109, + "monthsary": 42420, + "monton": 41961, + "montp": 39523, + "montre": 8434, + "montreal": 9262, + "montrose": 42347, + "monty": 43997, + "monty": 24038, + "monu": 9748, + "monument": 12019, + "monumental": 31297, + "monuments": 26916, + "mony": 4117, + "monza": 40380, + "moo": 4953, + "moo": 24626, + "mood": 42358, + "mood": 5394, + "moods": 43727, + "moody": 17170, + "moom": 36887, + "moon": 6334, + "moon": 3293, + "mooney": 37942, + "moonlight": 20001, + "moons": 29887, + "moonshine": 46706, + "moor": 14817, + "moor": 11877, + "moore": 28613, + "moore": 6708, + "moors": 32577, + "moose": 37562, + "moose": 17338, + "moot": 46895, + "mop": 33900, + "mopar": 41166, + "mor": 657, + "mor": 18614, + "mora": 29262, + "moral": 11246, + "morale": 39404, + "morales": 27117, + "morality": 34133, + "morally": 42519, + "morals": 46223, + "moran": 21557, + "moray": 44569, + "more": 5434, + "more": 750, + "morecam": 37305, + "morecambe": 43414, + "mored": 20195, + "moreland": 44135, + "moreno": 24826, + "morethan": 30889, + "morg": 34284, + "morgan": 15432, + "morgan": 6075, + "morgen": 35106, + "mori": 25710, + "mori": 29514, + "moris": 43131, + "moritz": 45594, + "morley": 40439, + "mormon": 27715, + "morn": 22393, + "mornin": 28327, + "morning": 10769, + "morning": 1119, + "mornings": 12106, + "moro": 31613, + "moroc": 11996, + "moroccan": 27546, + "morocco": 15228, + "moron": 31875, + "morons": 46477, + "morow": 40779, + "morph": 23915, + "morph": 41700, + "morphe": 38978, + "morpho": 38622, + "morrha": 43044, + "morri": 9876, + "morris": 22560, + "morris": 9090, + "morrison": 40961, + "morrison": 14094, + "morrisons": 40965, + "morrissey": 30040, + "morro": 48363, + "morrow": 21611, + "mors": 13064, + "morse": 25282, + "mort": 24257, + "mort": 30583, + "mortal": 31883, + "mortal": 14680, + "mortality": 20347, + "mortar": 27258, + "mortg": 12069, + "mortgage": 13988, + "mortgages": 45391, + "mortimer": 47836, + "morton": 20698, + "morty": 37391, + "mory": 22633, + "mos": 28658, + "mos": 9593, + "mosa": 14164, + "mosa": 23809, + "mosaic": 17506, + "mosch": 47003, + "mosco": 9840, + "moscow": 10371, + "moseley": 47080, + "moses": 18451, + "mosley": 46228, + "mosqu": 15215, + "mosque": 12694, + "mosques": 41214, + "mosquit": 39699, + "mosquito": 25083, + "mosquitoes": 41870, + "moss": 25107, + "moss": 12815, + "most": 7034, + "most": 1096, + "mostly": 8829, + "mosul": 29165, + "mot": 16352, + "mot": 15452, + "mota": 42499, + "motd": 46232, + "motel": 26191, + "moth": 33208, + "moth": 11736, + "mother": 7455, + "mother": 3050, + "motherhood": 32274, + "motherland": 46774, + "mothers": 10546, + "mothersday": 15583, + "motherwell": 48104, + "moths": 29086, + "moti": 38210, + "motif": 35373, + "motion": 32139, + "motion": 7860, + "motiv": 3183, + "motivate": 26771, + "motivated": 16521, + "motivates": 44684, + "motivating": 37720, + "motivation": 26117, + "motivation": 4193, + "motivational": 32832, + "motivational": 20472, + "motivationmonday": 28703, + "motive": 36669, + "motley": 42553, + "motm": 41192, + "moto": 10646, + "moto": 11431, + "motocross": 34562, + "motogp": 16615, + "motor": 3975, + "motor": 7659, + "motorbike": 33341, + "motorcycle": 10297, + "motorcycles": 24869, + "motoring": 44491, + "motorists": 32766, + "motorola": 33738, + "motors": 14989, + "motorsport": 18371, + "motorsports": 24264, + "motorway": 31808, + "motown": 32685, + "mott": 44570, + "mott": 21708, + "motto": 23338, + "mou": 2809, + "mou": 25289, + "moud": 37698, + "moul": 25725, + "mould": 36743, + "moulin": 47656, + "moun": 2023, + "mound": 21414, + "mount": 20553, + "mount": 5532, + "mountain": 14547, + "mountain": 3965, + "mountaine": 24841, + "mountaineer": 49255, + "mountains": 5873, + "mounted": 17897, + "mounting": 29910, + "mounts": 36767, + "mour": 9053, + "mour": 42446, + "moured": 29555, + "mourinho": 18536, + "mourn": 33592, + "mourning": 24169, + "mourns": 42811, + "mous": 24837, + "mous": 17425, + "mouse": 33032, + "mouse": 9301, + "mousse": 31869, + "moustache": 32795, + "mouth": 15152, + "mouth": 4932, + "mouths": 38518, + "mov": 23950, + "move": 16624, + "move": 2783, + "moved": 6997, + "movember": 23474, + "movement": 5208, + "movements": 19665, + "mover": 37673, + "movers": 33957, + "moves": 6880, + "movi": 1707, + "movic": 43838, + "movie": 11247, + "movie": 2016, + "movies": 4772, + "moving": 32160, + "moving": 3584, + "mow": 31006, + "mow": 36329, + "mower": 30895, + "mowing": 46424, + "mowx": 44263, + "moy": 27276, + "moy": 34205, + "moyes": 37119, + "moz": 14761, + "moz": 43738, + "mozam": 26648, + "mozambique": 28831, + "mozart": 22132, + "mozz": 26317, + "mozzarella": 27845, + "mp": 1037, + "mp": 1246, + "mpa": 30749, + "mpc": 38560, + "mpd": 33814, + "mped": 28134, + "mper": 22803, + "mpg": 39830, + "mpg": 37454, + "mpgvip": 42149, + "mph": 5306, + "mpi": 43263, + "mping": 27999, + "mple": 21139, + "mplo": 47071, + "mpls": 34298, + "mpo": 33674, + "mpp": 39570, + "mps": 5504, + "mption": 9717, + "mpton": 27448, + "mpu": 47156, + "mpus": 25864, + "mpy": 17192, + "mq": 19103, + "mqm": 24687, + "mr": 3139, + "mr": 1982, + "mra": 44568, + "mrc": 25897, + "mri": 24773, + "mrs": 25003, + "mrs": 4255, + "mrt": 30256, + "mru": 22370, + "mrw": 15303, + "ms": 3525, + "ms": 988, + "msa": 36306, + "msc": 31826, + "msc": 20529, + "msd": 25804, + "msd": 36407, + "msdhoni": 32850, + "msf": 36239, + "msg": 44430, + "msg": 10928, + "msh": 41751, + "msi": 43597, + "msi": 45278, + "msk": 38501, + "msl": 42736, + "msm": 22210, + "msn": 18824, + "msn": 41042, + "msnbc": 20245, + "mson": 27773, + "mson": 12298, + "msp": 41445, + "msp": 22318, + "mss": 42136, + "mss": 48610, + "mst": 26335, + "msu": 26763, + "msu": 17298, + "mswx": 42957, + "msy": 43919, + "mt": 4252, + "mt": 3284, + "mta": 28691, + "mtb": 48306, + "mtb": 18747, + "mtc": 42482, + "mtg": 49142, + "mtg": 13648, + "mth": 48151, + "mtl": 22135, + "mtn": 26041, + "mtn": 18953, + "mtr": 46650, + "mts": 38751, + "mtv": 8099, + "mtv": 12555, + "mtvbr": 47258, + "mtvhottest": 16751, + "mtvstars": 19948, + "mu": 670, + "mu": 6411, + "mua": 21395, + "muay": 44910, + "muaythai": 47763, + "mubarak": 17957, + "muc": 49115, + "much": 14300, + "much": 1238, + "mucha": 42191, + "muchas": 26278, + "mucho": 19864, + "muck": 44731, + "muck": 45330, + "mud": 17491, + "mud": 11673, + "mudder": 49104, + "muddy": 21524, + "mue": 44383, + "mue": 40717, + "mueller": 46863, + "mueller": 14719, + "muen": 48646, + "muer": 33840, + "muf": 33852, + "mufc": 9013, + "muffin": 22696, + "muffins": 25922, + "mufti": 44930, + "mug": 16339, + "mug": 9722, + "mugabe": 36441, + "mughal": 37508, + "mugs": 22852, + "mugshot": 40028, + "muh": 36335, + "muh": 46475, + "muham": 10043, + "muhammad": 12259, + "muir": 44650, + "muir": 24745, + "muj": 44635, + "muk": 17327, + "muk": 32600, + "mukher": 34575, + "mukherjee": 37862, + "mul": 1899, + "mul": 43193, + "mula": 40937, + "mulator": 17463, + "mulberry": 39221, + "mule": 28695, + "mull": 17313, + "mull": 35310, + "mulled": 44641, + "mullen": 30797, + "muller": 33956, + "mullet": 35010, + "mulligan": 44336, + "mullins": 41265, + "mult": 34219, + "multi": 3947, + "multi": 6400, + "multic": 21683, + "multicul": 28004, + "multicultural": 34667, + "multil": 27975, + "multimedia": 27977, + "multin": 38996, + "multinational": 46540, + "multip": 40314, + "multiplayer": 27460, + "multiple": 6470, + "multipurpose": 47665, + "multit": 27814, + "multitasking": 48684, + "mulus": 26180, + "mum": 15565, + "mum": 4030, + "mumb": 5850, + "mumbai": 24279, + "mumbai": 6971, + "mumford": 46184, + "mummy": 16301, + "mums": 17868, + "mun": 2617, + "mun": 21059, + "muna": 48424, + "munch": 23587, + "munch": 33299, + "munchies": 44324, + "munchkin": 41305, + "mund": 14244, + "mundo": 20990, + "muni": 27327, + "muni": 39795, + "munich": 13526, + "munici": 12159, + "municipal": 43667, + "municipal": 16600, + "municipality": 29987, + "munition": 32668, + "munro": 36501, + "munster": 27201, + "mup": 21966, + "muppet": 40598, + "muppets": 40187, + "mups": 42195, + "mur": 2144, + "mur": 18293, + "mura": 45176, + "mural": 12315, + "murals": 31499, + "murder": 28136, + "murder": 5787, + "murdered": 13158, + "murderer": 26956, + "murderers": 48472, + "murdering": 36055, + "murders": 22409, + "murdoch": 29037, + "murphy": 48976, + "murphy": 8914, + "murray": 31978, + "murray": 7513, + "murs": 38783, + "mus": 2198, + "mus": 8103, + "musa": 30540, + "musc": 5696, + "muscat": 33322, + "muscle": 27323, + "muscle": 9269, + "muscles": 16786, + "muscular": 30606, + "muse": 2369, + "muse": 15686, + "museo": 36457, + "muses": 48243, + "museu": 27087, + "museum": 15602, + "museum": 2786, + "museums": 15542, + "museumweek": 37996, + "mush": 7635, + "mushroom": 13011, + "mushrooms": 14730, + "musi": 15628, + "music": 4110, + "music": 1179, + "musica": 26668, + "musical": 36002, + "musical": 5173, + "musically": 48893, + "musicals": 36974, + "musichistory": 37890, + "musician": 11179, + "musicians": 12498, + "musicislife": 43311, + "musicmonday": 35887, + "musicvideo": 26764, + "musik": 32986, + "musings": 44961, + "musique": 42250, + "musk": 32143, + "musk": 19063, + "muskete": 32775, + "musketeers": 37993, + "musko": 34987, + "muskoka": 40832, + "musli": 4958, + "muslim": 43795, + "muslim": 7060, + "muslims": 10513, + "muss": 41493, + "mussels": 33393, + "must": 6783, + "must": 2048, + "mustache": 23451, + "mustaf": 23596, + "mustafa": 29000, + "mustang": 42361, + "mustang": 13309, + "mustangs": 22500, + "mustard": 15794, + "muster": 47361, + "mustread": 28978, + "mut": 12598, + "mut": 22839, + "mutant": 28384, + "mutation": 38626, + "mutations": 39651, + "mute": 31252, + "muted": 48028, + "muth": 34280, + "mutil": 39950, + "mutt": 45924, + "mutu": 17574, + "mutual": 15055, + "mutuals": 31158, + "muy": 44625, + "mv": 10580, + "mv": 8269, + "mvc": 40549, + "mvp": 8905, + "mw": 16725, + "mw": 11206, + "mwc": 24289, + "mwf": 48565, + "mx": 21947, + "mx": 9575, + "my": 1152, + "my": 607, + "mya": 31401, + "myal": 42735, + "myan": 13761, + "myanmar": 14764, + "myart": 38826, + "myco": 48362, + "mydayin": 41896, + "mydayinla": 42801, + "mydubai": 43475, + "mye": 27551, + "myel": 40084, + "myers": 15993, + "myjaps": 47939, + "myle": 43700, + "myles": 25511, + "mylife": 30537, + "mylittle": 37757, + "mylittlepony": 45107, + "myo": 16206, + "myr": 20272, + "myra": 35694, + "myri": 34972, + "myrt": 47785, + "myrtle": 27768, + "mys": 11724, + "myself": 3245, + "mysore": 44924, + "myspace": 41382, + "myster": 4669, + "mysteries": 20605, + "mysterious": 12650, + "mystery": 39828, + "mystery": 6711, + "mysti": 28711, + "mystic": 36264, + "mystic": 23722, + "mystical": 34122, + "myth": 20322, + "myth": 13878, + "mythical": 34377, + "mytho": 43857, + "mythology": 22496, + "myths": 18675, + "mz": 29509, + "mz": 33400, + "mzan": 36322, + "mzansi": 43301, + "má": 36842, + "mé": 21890, + "méxico": 46159, + "mü": 28142, + "mün": 41235, + "n": 77, + "n": 333, + "na": 1097, + "na": 1272, + "naa": 37738, + "naacp": 32176, + "nab": 6951, + "nab": 19440, + "nabe": 35111, + "naby": 24800, + "nac": 14557, + "nac": 18950, + "nach": 12168, + "nach": 43622, + "nacho": 35647, + "nachos": 32847, + "nacht": 37261, + "nacional": 38782, + "nad": 6204, + "nad": 43928, + "nada": 31683, + "nadal": 20814, + "nade": 24908, + "nadi": 30512, + "nadia": 27487, + "nadine": 23356, + "nadu": 20936, + "nae": 19374, + "naf": 16161, + "naf": 45956, + "nafta": 43123, + "nag": 6694, + "nag": 23902, + "naga": 45953, + "naga": 38997, + "nagar": 17490, + "nage": 41219, + "nago": 38349, + "nagoya": 43303, + "nagpur": 43328, + "nah": 26421, + "nah": 11129, + "nahi": 35244, + "nai": 6230, + "nai": 10692, + "naia": 31340, + "naidu": 42429, + "naija": 16326, + "naik": 34424, + "nail": 19459, + "nail": 9059, + "nailart": 43532, + "nailed": 19035, + "nails": 8469, + "nair": 27107, + "naira": 39450, + "naire": 48892, + "nairobi": 17756, + "nais": 46396, + "naissance": 44761, + "naive": 43362, + "naj": 30985, + "naji": 32589, + "nak": 9248, + "nak": 25550, + "naked": 46371, + "naked": 11478, + "naku": 39864, + "nal": 14132, + "nal": 3119, + "nale": 27198, + "nall": 32869, + "nally": 26158, + "nam": 1410, + "nam": 12344, + "nama": 39586, + "naman": 27635, + "namaste": 35549, + "name": 18160, + "name": 1981, + "named": 3194, + "nameis": 40831, + "nament": 3916, + "naments": 16540, + "names": 6130, + "namesake": 41298, + "nami": 20393, + "namibia": 23731, + "naming": 19367, + "namjoon": 31986, + "namm": 35524, + "namo": 46013, + "namo": 24854, + "nan": 4375, + "nan": 7750, + "nana": 18761, + "nanaimo": 40518, + "nancy": 21511, + "nancy": 11425, + "nand": 20435, + "nandez": 12764, + "nando": 46044, + "nang": 48148, + "nani": 27980, + "nanny": 31104, + "nano": 15835, + "nano": 22006, + "nanop": 34177, + "nanotechnology": 42235, + "nanow": 46734, + "nant": 22526, + "nantes": 47533, + "nantucket": 41573, + "nao": 39319, + "naom": 34955, + "naomi": 20173, + "nap": 6568, + "nap": 11012, + "napa": 20545, + "napier": 40875, + "napkin": 38930, + "naples": 23560, + "napo": 18715, + "napol": 20122, + "napoleon": 24969, + "napoli": 22445, + "napp": 11359, + "napping": 37657, + "naps": 31317, + "naq": 46453, + "nar": 2977, + "nar": 20145, + "nara": 33823, + "narcis": 25229, + "narcissi": 35442, + "narco": 38461, + "nard": 18216, + "nare": 34853, + "naren": 8468, + "narendr": 9807, + "narendra": 25848, + "narendramodi": 9853, + "narnia": 48693, + "narr": 11845, + "narrated": 43609, + "narrative": 15933, + "narratives": 35117, + "narrator": 46529, + "narrow": 24006, + "narrow": 16652, + "narrowly": 29747, + "naruto": 22732, + "nas": 3090, + "nas": 15250, + "nasa": 6841, + "nasal": 42853, + "nascar": 25723, + "nascar": 7868, + "nasdaq": 26629, + "nash": 6771, + "nash": 13620, + "nasheed": 49176, + "nashgrier": 33372, + "nashville": 45356, + "nashville": 8585, + "nasi": 47987, + "nasir": 47509, + "nassau": 34048, + "nasser": 43559, + "nasty": 32930, + "nasty": 8709, + "nat": 1276, + "nat": 11310, + "nata": 39392, + "natal": 28516, + "natali": 20296, + "natalia": 32978, + "natalie": 36634, + "natalie": 13595, + "natash": 48701, + "natasha": 23093, + "nate": 26643, + "nate": 7587, + "natgeo": 33009, + "natgeo": 25046, + "nath": 22203, + "nath": 19843, + "nathan": 13028, + "nathan": 9711, + "nathanfillion": 47422, + "nathaniel": 32667, + "nati": 1060, + "nati": 13384, + "natic": 44944, + "natin": 44358, + "nation": 2317, + "nation": 2670, + "national": 3126, + "national": 1362, + "nationalbestfriend": 42222, + "nationaldogday": 32227, + "nationalism": 29867, + "nationalist": 25058, + "nationality": 44451, + "nationally": 15130, + "nationalpark": 33060, + "nationalparks": 41204, + "nationals": 10784, + "nationaltrust": 34051, + "nations": 7654, + "nationwide": 13795, + "native": 20639, + "native": 4562, + "natives": 36060, + "nativity": 33988, + "natl": 39225, + "natl": 34465, + "nato": 13139, + "nats": 21106, + "natu": 2775, + "natur": 6800, + "natural": 13198, + "natural": 3288, + "naturally": 12995, + "naturals": 44686, + "nature": 9382, + "nature": 2625, + "naturelovers": 41514, + "naturephotography": 22533, + "natures": 15616, + "natureuk": 46193, + "nau": 5955, + "nau": 32878, + "naught": 41001, + "naughty": 47255, + "naughty": 15101, + "nautical": 31660, + "nav": 3413, + "nav": 25308, + "navajo": 35523, + "naval": 44725, + "naval": 13273, + "navar": 24848, + "navarro": 37104, + "nave": 42704, + "naveen": 43837, + "naver": 32534, + "navi": 16159, + "navi": 44848, + "navig": 12507, + "navigate": 24400, + "navigating": 33134, + "navigation": 20148, + "navigator": 38910, + "navis": 36377, + "navratri": 45428, + "navy": 28414, + "navy": 5598, + "naw": 16259, + "naw": 30500, + "nawaz": 49161, + "nawaz": 19523, + "nax": 38299, + "nay": 11704, + "nay": 16182, + "naya": 38917, + "nayanth": 38157, + "nayanthara": 45184, + "naz": 6363, + "naz": 35534, + "nazi": 12972, + "nazis": 21778, + "nb": 6459, + "nb": 6813, + "nba": 22524, + "nba": 5139, + "nbad": 43458, + "nbaf": 30127, + "nbafinals": 33803, + "nbap": 41956, + "nbaplayoffs": 43860, + "nbat": 46291, + "nbc": 9352, + "nbc": 8799, + "nbd": 24526, + "nbl": 42652, + "nc": 5021, + "nc": 4911, + "nca": 6921, + "ncaa": 9418, + "ncbd": 47221, + "ncc": 33195, + "ncc": 36686, + "ncds": 47573, + "ncfc": 31274, + "ncis": 33617, + "ncpol": 40562, + "ncr": 38474, + "ncs": 42689, + "nct": 27723, + "nct": 20319, + "ncwx": 36166, + "nd": 5625, + "nd": 1764, + "nda": 32862, + "ndc": 47564, + "ndi": 48229, + "ndp": 19257, + "nds": 31347, + "ndtv": 26261, + "ne": 557, + "ne": 1422, + "nea": 24068, + "neal": 33652, + "neal": 16730, + "near": 11296, + "near": 2252, + "nearby": 13314, + "nearest": 18985, + "nearing": 26571, + "nearly": 4816, + "nears": 37710, + "neat": 43201, + "neat": 15465, + "neath": 18315, + "neau": 31559, + "neb": 40209, + "nebra": 13371, + "nebraska": 14565, + "nebu": 49295, + "nebula": 22532, + "nec": 25109, + "nec": 22992, + "necess": 6961, + "necessarily": 25853, + "necessary": 8955, + "necessities": 43483, + "necessity": 33163, + "neck": 6066, + "neck": 6906, + "necklace": 7385, + "necklaces": 32276, + "necks": 29701, + "nectar": 33683, + "ned": 16030, + "ned": 1369, + "nederland": 49058, + "nee": 20494, + "nee": 10601, + "need": 3229, + "need": 1262, + "needed": 4049, + "needing": 22894, + "needle": 44490, + "needle": 19886, + "needles": 27250, + "needless": 39984, + "needs": 2536, + "needy": 30150, + "neel": 33092, + "neel": 46043, + "neer": 34245, + "nees": 47248, + "neet": 46362, + "neg": 5513, + "negan": 42623, + "negative": 8869, + "negatively": 40254, + "negativity": 34658, + "neglec": 18827, + "neglect": 33680, + "neglected": 31893, + "negli": 32594, + "negligence": 45658, + "negoti": 10216, + "negotiate": 32969, + "negotiating": 35510, + "negotiation": 36504, + "negotiations": 20433, + "negr": 42190, + "negro": 26554, + "neh": 40416, + "neh": 41697, + "neha": 44463, + "nehru": 30316, + "nei": 9366, + "neigh": 4061, + "neighb": 6534, + "neighbor": 7759, + "neighbor": 14485, + "neighborhood": 9471, + "neighborhoods": 26713, + "neighboring": 44754, + "neighbors": 13037, + "neighbour": 15858, + "neighbour": 23719, + "neighbourhood": 20312, + "neighbours": 17594, + "neil": 13591, + "neil": 8030, + "neilhimself": 45682, + "neill": 19324, + "neither": 14398, + "nek": 47727, + "neko": 47066, + "nel": 5476, + "nel": 2693, + "nell": 27081, + "nell": 8117, + "nelly": 21166, + "nels": 19296, + "nelson": 24774, + "nelson": 8586, + "nem": 45153, + "neman": 48553, + "neme": 30993, + "nemesis": 37811, + "nemo": 30441, + "nen": 17817, + "nen": 15451, + "nene": 44167, + "neo": 14562, + "neo": 11017, + "neon": 21043, + "neon": 13919, + "neonatal": 46464, + "neop": 49069, + "nep": 20739, + "nep": 41960, + "nepal": 25597, + "nepal": 10066, + "nepali": 47579, + "neph": 27926, + "nephe": 41810, + "nephew": 11689, + "nephews": 43747, + "nephro": 43054, + "neptune": 30566, + "ner": 2064, + "ner": 998, + "nerd": 24452, + "nerd": 12273, + "nerds": 22609, + "nerdy": 33124, + "nered": 17583, + "nerf": 42914, + "nering": 20226, + "nero": 29048, + "ners": 2129, + "nerve": 18571, + "nerves": 27813, + "nervous": 13928, + "nery": 48597, + "nes": 5457, + "nes": 4980, + "nesburg": 27159, + "nese": 32220, + "ness": 7187, + "ness": 1294, + "nesses": 20107, + "nessy": 32939, + "nest": 20302, + "nest": 8719, + "nesting": 28860, + "nestle": 43967, + "nestled": 38107, + "nests": 41133, + "net": 1851, + "net": 2315, + "netany": 23137, + "netanyahu": 23583, + "netball": 19761, + "netes": 44335, + "netfli": 6304, + "netflix": 35325, + "netflix": 6600, + "nether": 9946, + "netherlands": 11060, + "neti": 43980, + "netneutrality": 47794, + "nets": 8582, + "nett": 23403, + "nett": 6975, + "nette": 13271, + "network": 23285, + "network": 3304, + "networking": 9818, + "networks": 10004, + "neu": 3855, + "neu": 43342, + "neue": 45764, + "neur": 19001, + "neur": 31976, + "neural": 26388, + "neuro": 7401, + "neuro": 36000, + "neurological": 41718, + "neurology": 43197, + "neurons": 40442, + "neuroscience": 23381, + "neutr": 17207, + "neutral": 17011, + "neutrality": 26511, + "neutron": 44056, + "nev": 10236, + "nev": 43645, + "neva": 43304, + "nevada": 13499, + "neve": 44099, + "neve": 44023, + "never": 6746, + "never": 1426, + "neveragain": 45053, + "neverforget": 19242, + "nevergiveup": 42497, + "neverland": 41483, + "nevertheless": 48355, + "nevertrump": 47494, + "neville": 19269, + "nevis": 43670, + "new": 1218, + "new": 686, + "newark": 20240, + "newbie": 45427, + "newborn": 18320, + "newbury": 34169, + "newcastle": 41955, + "newcastle": 9302, + "newcomer": 30648, + "newcomers": 44037, + "newe": 40068, + "newell": 41436, + "newer": 33099, + "newest": 4990, + "newfound": 25250, + "newfoundland": 28079, + "newh": 18546, + "newin": 31911, + "newjersey": 32621, + "newly": 42186, + "newly": 7056, + "newman": 15815, + "newmarket": 38617, + "newmexico": 35238, + "newmusic": 32510, + "newmusic": 17201, + "newor": 25969, + "neworleans": 31205, + "newport": 42580, + "newport": 14846, + "newprofile": 14633, + "newprofilepic": 14754, + "newrelease": 34793, + "news": 6216, + "news": 1120, + "newsat": 43979, + "newsc": 28656, + "newscast": 45031, + "newsle": 10727, + "newsletter": 11069, + "newsnow": 48650, + "newsp": 7109, + "newspaper": 8786, + "newspapers": 22423, + "newsroom": 23200, + "newt": 37224, + "newton": 33122, + "newton": 12606, + "newtown": 31747, + "newyear": 22161, + "newyear": 12999, + "newyearseve": 37587, + "newyork": 18140, + "newyork": 10454, + "newyorkcity": 30460, + "newyorker": 39732, + "newzealand": 21117, + "nex": 6897, + "nex": 39720, + "next": 12434, + "next": 1131, + "nextgen": 41933, + "nexus": 19053, + "ney": 3857, + "ney": 1438, + "neymar": 21878, + "neys": 12616, + "nez": 27388, + "nf": 15195, + "nf": 25643, + "nfamily": 20098, + "nfc": 23695, + "nffc": 27893, + "nfl": 11219, + "nfl": 4691, + "nfldraft": 25002, + "ng": 10352, + "ng": 5215, + "nga": 35477, + "ngc": 29046, + "ngo": 38740, + "ngo": 24821, + "ngos": 34627, + "nguyen": 29947, + "nh": 3760, + "nh": 10803, + "nhc": 44817, + "nhl": 12290, + "nhl": 8167, + "nhlbruins": 39081, + "nhljets": 49357, + "nhm": 39483, + "nhpolitics": 36125, + "nhq": 42368, + "nhra": 30052, + "nhs": 23282, + "nhs": 7695, + "ni": 697, + "ni": 3256, + "nia": 3098, + "niag": 18071, + "niagar": 39298, + "niagara": 18965, + "niall": 41354, + "niall": 8327, + "niallo": 22855, + "niallofficial": 23084, + "niam": 39347, + "nian": 46003, + "nib": 31049, + "nic": 2109, + "nic": 6651, + "nica": 29040, + "nicar": 25119, + "nicaragua": 28423, + "nice": 28386, + "nice": 1805, + "nicely": 12303, + "nicer": 29488, + "nicest": 22967, + "niche": 25279, + "nichol": 7668, + "nicholas": 39814, + "nicholas": 13148, + "nicholls": 38846, + "nichols": 22730, + "nicholson": 28745, + "nick": 4209, + "nick": 4253, + "nickel": 22034, + "nickelo": 28668, + "nickelodeon": 33279, + "nicki": 17738, + "nickimin": 27390, + "nickiminaj": 27593, + "nickjonas": 43862, + "nickname": 24731, + "nicknamed": 45190, + "nicks": 15049, + "nicky": 28893, + "nicky": 22091, + "nico": 20850, + "nico": 17779, + "nicol": 9919, + "nicol": 48274, + "nicola": 21791, + "nicolas": 43813, + "nicolas": 18918, + "nicole": 21246, + "nicole": 10000, + "nicot": 45099, + "nicotine": 46697, + "nie": 9524, + "nie": 3501, + "niece": 12795, + "nieces": 44877, + "niel": 19109, + "niel": 26837, + "niels": 37154, + "nielsen": 28372, + "nier": 13014, + "nies": 10586, + "niest": 15007, + "nieu": 29781, + "nific": 4748, + "nifty": 25604, + "nig": 27933, + "nig": 28099, + "nigan": 48516, + "nigel": 33919, + "nigel": 15153, + "niger": 4524, + "niger": 29920, + "nigeri": 40913, + "nigeria": 6106, + "nigerian": 12167, + "nigerians": 25358, + "nigh": 13525, + "nigh": 48157, + "night": 3870, + "night": 930, + "nightclub": 20418, + "nighter": 41349, + "nighting": 36211, + "nightingale": 40696, + "nightlife": 28823, + "nightly": 28868, + "nightmare": 12867, + "nightmares": 24032, + "nightout": 44257, + "nights": 4296, + "nighttime": 38147, + "nightw": 39956, + "nih": 25783, + "nik": 5126, + "nik": 13705, + "nike": 16300, + "nike": 5783, + "nikeplus": 43154, + "niki": 36136, + "nikita": 37118, + "nikk": 38596, + "nikki": 23156, + "nikki": 16689, + "niko": 43771, + "nikol": 27430, + "nikola": 42146, + "nikon": 25488, + "nikon": 13849, + "nikov": 43960, + "nil": 16852, + "nil": 35030, + "nile": 24252, + "nim": 30402, + "nim": 42093, + "nima": 42586, + "nin": 5794, + "nin": 14145, + "nina": 13891, + "nine": 16213, + "nine": 7330, + "ninety": 48214, + "ning": 6050, + "ning": 762, + "ningham": 23395, + "ningly": 43537, + "nings": 4588, + "nington": 26214, + "ninj": 23225, + "ninja": 11969, + "ninjas": 42796, + "nino": 25633, + "ninten": 6184, + "nintendo": 13969, + "nintendo": 7886, + "nintendoswitch": 16404, + "ninth": 22770, + "nip": 33889, + "nip": 22333, + "nipp": 24634, + "nipple": 45987, + "nipples": 44774, + "nippon": 47960, + "nips": 49241, + "nir": 15503, + "nir": 40057, + "nireland": 45763, + "niro": 47373, + "nirvana": 28300, + "nis": 5609, + "nis": 3786, + "nish": 19834, + "nish": 13256, + "nished": 24141, + "nishi": 32386, + "nishings": 49247, + "nison": 45700, + "niss": 39043, + "nissan": 37635, + "nissan": 11082, + "nist": 17782, + "nister": 36640, + "nit": 4087, + "nit": 19011, + "nite": 8427, + "niti": 43964, + "niti": 45355, + "nitin": 37529, + "nitro": 30726, + "nitrogen": 30706, + "niture": 7840, + "nity": 12707, + "niu": 48187, + "niv": 47300, + "niversary": 29643, + "nix": 48552, + "nix": 32278, + "nixon": 20671, + "nj": 8343, + "nj": 6672, + "njcaa": 48992, + "njpw": 38992, + "nk": 22708, + "nk": 17456, + "nko": 36353, + "nl": 12057, + "nl": 7655, + "nli": 37502, + "nlp": 35680, + "nlwx": 49260, + "nm": 15956, + "nm": 11370, + "nmd": 43331, + "nme": 40454, + "nmwx": 47967, + "nn": 8947, + "nn": 12925, + "nnn": 26277, + "nnnn": 41420, + "no": 578, + "no": 871, + "noaa": 27557, + "noah": 28806, + "noah": 11519, + "nobel": 33742, + "nobel": 15605, + "nobelprize": 46074, + "noble": 29430, + "noble": 12051, + "nobody": 7009, + "noc": 16988, + "noc": 44420, + "nocchi": 46359, + "noch": 38672, + "noche": 29689, + "noches": 44166, + "nock": 16993, + "noctur": 26291, + "nocturnal": 41738, + "nod": 18648, + "nodapl": 39079, + "node": 31434, + "node": 24871, + "nodejs": 39262, + "nodes": 40534, + "noel": 38406, + "noel": 17496, + "nof": 29505, + "noff": 46979, + "nofilter": 16418, + "nog": 31157, + "noh": 40775, + "noi": 43115, + "noi": 39889, + "noida": 33404, + "noir": 39291, + "noir": 12953, + "nois": 22057, + "noise": 41018, + "noise": 9307, + "noises": 31575, + "noisse": 45686, + "noisy": 33495, + "nokia": 17731, + "nol": 8055, + "nola": 13289, + "nolan": 17323, + "nold": 40322, + "nole": 34654, + "noles": 40569, + "nollywood": 43145, + "nology": 42221, + "nom": 2981, + "nom": 12799, + "nomad": 27849, + "noman": 45592, + "nomin": 5643, + "nominate": 17122, + "nominated": 8710, + "nominating": 45747, + "nomination": 14136, + "nominations": 17124, + "nominee": 14122, + "nominees": 17873, + "nomnom": 26962, + "nomore": 35126, + "noms": 35706, + "non": 4282, + "non": 3353, + "none": 29644, + "none": 8906, + "nonetheless": 39675, + "nonfiction": 31654, + "nonprofit": 19315, + "nonprofits": 37935, + "nonsense": 19136, + "nonstop": 30300, + "nont": 25207, + "noo": 6759, + "noo": 46672, + "noodle": 19521, + "noodles": 15782, + "nook": 30088, + "noon": 37693, + "noon": 2347, + "noor": 46978, + "noor": 31323, + "nope": 15625, + "nor": 1062, + "nor": 6190, + "nora": 25890, + "norcal": 41970, + "nord": 19261, + "nord": 36067, + "nordic": 36439, + "nordic": 20734, + "nordstrom": 38562, + "norfolk": 30232, + "norfolk": 12202, + "norm": 10990, + "norm": 22457, + "norma": 35757, + "normal": 28748, + "normal": 5967, + "normali": 45157, + "normally": 15870, + "norman": 22027, + "norman": 11338, + "normandy": 23840, + "normani": 44596, + "norms": 33011, + "norris": 21814, + "norse": 36559, + "norte": 35638, + "north": 3468, + "north": 2188, + "northampton": 49246, + "northampton": 26175, + "northan": 37081, + "northbound": 24228, + "northcarolina": 43386, + "northe": 24675, + "northeast": 42673, + "northeast": 13009, + "northeastern": 28297, + "northeasthour": 42869, + "norther": 26908, + "northern": 17210, + "northern": 5049, + "northernlights": 48940, + "northkorea": 38495, + "northside": 45957, + "northumber": 22295, + "northumberland": 22922, + "northwales": 49371, + "northwest": 12894, + "northwestern": 23685, + "norton": 18032, + "norway": 8780, + "norwe": 14414, + "norwegian": 15971, + "norwich": 37629, + "norwich": 15812, + "norwood": 37889, + "nos": 13420, + "nose": 24192, + "nose": 8231, + "noses": 48163, + "nostal": 12076, + "nostalgia": 16622, + "nostalgic": 24468, + "not": 2534, + "not": 783, + "notable": 22023, + "notch": 19476, + "notdead": 42059, + "note": 10910, + "note": 3246, + "notebook": 16365, + "notebooks": 37623, + "noted": 22501, + "notes": 5795, + "nothin": 24291, + "nothing": 28412, + "nothing": 2586, + "noti": 10686, + "notic": 6915, + "notice": 6683, + "noticeable": 40857, + "noticed": 9324, + "notices": 33459, + "noticias": 47759, + "noticing": 37571, + "notification": 22512, + "notifications": 23169, + "notified": 39454, + "noting": 38649, + "notion": 37856, + "notjust": 33212, + "notjustlakes": 45803, + "notmy": 39301, + "noto": 29878, + "noton": 48258, + "notor": 21711, + "notori": 44065, + "notorious": 22489, + "notre": 24397, + "notre": 15306, + "notredame": 34077, + "notsorry": 34361, + "nott": 9333, + "nott": 34989, + "notte": 47308, + "nottingham": 12852, + "notts": 25598, + "nou": 8751, + "nou": 30953, + "noun": 33663, + "nouri": 23796, + "nourish": 46025, + "nourished": 48354, + "nous": 29485, + "nouveau": 29948, + "nouvel": 34215, + "nov": 2264, + "nov": 4293, + "nova": 11236, + "novak": 26465, + "novasco": 33785, + "novascotia": 34744, + "novation": 39753, + "nove": 30507, + "novel": 15044, + "novel": 6080, + "novelist": 27314, + "novella": 42770, + "novels": 16040, + "novelty": 37750, + "november": 3680, + "nover": 37465, + "novi": 47957, + "novice": 33743, + "novo": 27504, + "novo": 36581, + "now": 2040, + "now": 692, + "nowadays": 26155, + "nowhere": 14108, + "nowplaying": 3708, + "nowwatching": 30852, + "nox": 27406, + "noxi": 39304, + "noxious": 42833, + "noy": 32787, + "np": 18205, + "np": 6314, + "npa": 42378, + "npc": 33966, + "npr": 39941, + "npr": 24078, + "nps": 22025, + "npt": 47231, + "nr": 6574, + "nr": 9713, + "nra": 17286, + "nrc": 45786, + "nrf": 47982, + "nrg": 48662, + "nrl": 27142, + "nrl": 18127, + "ns": 12405, + "ns": 1373, + "nsa": 23004, + "nsc": 32792, + "nsd": 36659, + "nsf": 34180, + "nsfw": 19847, + "nsi": 47824, + "nsw": 21301, + "nsw": 11693, + "nswpol": 44434, + "nt": 10902, + "nt": 3207, + "ntr": 30845, + "nts": 43775, + "ntt": 22859, + "ntv": 24807, + "ntv": 45304, + "nu": 1156, + "nu": 9444, + "nucle": 25693, + "nuclear": 34136, + "nuclear": 7279, + "nude": 16630, + "nudes": 32122, + "nue": 22834, + "nuestra": 45649, + "nuestro": 38590, + "nuev": 47861, + "nueva": 48810, + "nuevo": 30265, + "nufc": 15720, + "nuff": 37324, + "nug": 13471, + "nugent": 47457, + "nugget": 25448, + "nuggets": 18970, + "nuh": 45950, + "nuit": 38815, + "nuk": 39228, + "nuke": 39399, + "nul": 29358, + "null": 47376, + "num": 17896, + "num": 30534, + "numb": 34639, + "numb": 39427, + "number": 44078, + "number": 2842, + "numbered": 25975, + "numbers": 6121, + "numer": 11442, + "numerous": 17082, + "numis": 39100, + "nun": 12511, + "nun": 28540, + "nunavut": 48626, + "nunes": 40697, + "nuns": 44061, + "nup": 46757, + "nur": 3920, + "nur": 33493, + "nure": 42480, + "nurse": 37547, + "nurse": 10058, + "nursery": 15540, + "nurses": 12938, + "nursing": 11126, + "nurture": 38865, + "nurturing": 45229, + "nus": 25157, + "nus": 18239, + "nut": 10358, + "nut": 6491, + "nutcracker": 36733, + "nutella": 27312, + "nutr": 6198, + "nutri": 15470, + "nutrient": 32900, + "nutrients": 24668, + "nutriti": 17978, + "nutrition": 41546, + "nutrition": 7989, + "nutritional": 26457, + "nutritious": 30387, + "nuts": 8644, + "nutshell": 26659, + "nutty": 39846, + "nv": 17217, + "nv": 16985, + "nvi": 22847, + "nvidia": 27325, + "nw": 7826, + "nw": 7030, + "nwa": 34237, + "nwo": 40976, + "nws": 23333, + "nws": 30998, + "nwsl": 48394, + "nwt": 25029, + "nx": 18810, + "nx": 16997, + "nxt": 35037, + "nxt": 17804, + "ny": 1383, + "ny": 1350, + "nya": 24165, + "nyc": 13304, + "nyc": 2832, + "nycc": 27187, + "nycfc": 47497, + "nye": 40723, + "nye": 13416, + "nyfw": 21089, + "nyk": 46841, + "nylon": 25915, + "nyo": 41534, + "nyo": 44586, + "nypd": 42293, + "nypd": 18279, + "nyr": 32538, + "nyrd": 47936, + "nys": 36375, + "nys": 23423, + "nyse": 32650, + "nyt": 46311, + "nyt": 12816, + "nytimes": 13772, + "nyu": 43143, + "nyu": 31355, + "nz": 10142, + "nz": 7082, + "o": 78, + "o": 334, + "oa": 11994, + "oahu": 37790, + "oak": 6010, + "oak": 7221, + "oakland": 42663, + "oakland": 12077, + "oakley": 27810, + "oaks": 16734, + "oakville": 38500, + "oasis": 18185, + "oat": 20095, + "oat": 34132, + "oates": 47094, + "oath": 20108, + "oatmeal": 26374, + "oats": 24150, + "oax": 43090, + "oaxaca": 47818, + "ob": 1411, + "ob": 14908, + "oba": 42902, + "oba": 15147, + "obam": 13174, + "obama": 4276, + "obamacare": 18005, + "obe": 11897, + "obe": 29117, + "obedience": 48921, + "ober": 15284, + "obese": 41757, + "obesity": 19499, + "obey": 26926, + "obi": 21454, + "obi": 18414, + "obile": 20513, + "obitu": 39218, + "obituary": 43580, + "objec": 7970, + "object": 14115, + "objective": 23663, + "objectives": 30238, + "objects": 13770, + "obl": 31452, + "oblast": 42672, + "obli": 11416, + "obligation": 34473, + "obligations": 38232, + "obligatory": 35020, + "oblivion": 45323, + "obo": 46001, + "obo": 26618, + "obrien": 31946, + "obs": 39162, + "obsc": 20392, + "obscure": 33337, + "obse": 8433, + "observ": 9050, + "observation": 20250, + "observations": 27409, + "observatory": 21236, + "observe": 23217, + "observed": 21267, + "observer": 22077, + "observers": 47544, + "observing": 28359, + "obsessed": 9744, + "obsession": 15718, + "obsi": 47323, + "obsole": 35561, + "obsolete": 40628, + "obst": 29398, + "obstac": 24075, + "obstacle": 29751, + "obstacles": 24480, + "obste": 49103, + "obstru": 44876, + "obstruc": 38762, + "obstruction": 40240, + "obtain": 26555, + "obtained": 29322, + "obvious": 13959, + "obviously": 10068, + "oc": 1566, + "oc": 6603, + "oca": 31120, + "ocal": 38148, + "occ": 43940, + "occa": 8530, + "occasion": 12280, + "occasional": 33059, + "occasionally": 32479, + "occasions": 26154, + "occer": 20804, + "occi": 42994, + "occu": 7863, + "occult": 42529, + "occup": 11152, + "occupation": 18624, + "occupational": 30644, + "occupied": 17271, + "occupy": 22453, + "occupy": 24210, + "occur": 11264, + "occur": 21813, + "occurred": 19850, + "occurrence": 40615, + "occurring": 31335, + "occurs": 26563, + "ocd": 35904, + "oce": 3509, + "ocean": 12941, + "ocean": 4918, + "oceans": 16792, + "och": 29334, + "och": 32011, + "oche": 33045, + "oci": 9891, + "ocity": 46039, + "ock": 33579, + "ock": 21313, + "ocks": 22410, + "oclock": 36274, + "oco": 32553, + "ocon": 33090, + "ocr": 45813, + "ocre": 40320, + "ocs": 27297, + "oct": 4565, + "octa": 23444, + "octag": 37768, + "octagon": 49167, + "octane": 43040, + "octavia": 47416, + "octo": 31032, + "october": 3481, + "octopus": 22327, + "ocu": 22709, + "oculus": 30082, + "od": 4886, + "od": 9719, + "oda": 24777, + "oday": 41954, + "odd": 15525, + "odd": 11387, + "oddly": 34213, + "odds": 11555, + "ode": 19125, + "ode": 19639, + "odell": 41556, + "odessa": 43574, + "odi": 12223, + "odi": 18853, + "odin": 35175, + "odisha": 15737, + "odo": 49188, + "odo": 40993, + "odor": 39509, + "odu": 35095, + "odu": 39904, + "odyssey": 19991, + "oe": 24251, + "oe": 11667, + "oec": 24288, + "oecd": 30816, + "oem": 29650, + "oes": 3643, + "of": 684, + "of": 539, + "ofa": 29774, + "ofc": 19877, + "ofe": 30000, + "ofer": 47322, + "off": 892, + "off": 1007, + "offe": 8261, + "offee": 34059, + "offen": 7231, + "offence": 34594, + "offences": 33972, + "offended": 30765, + "offender": 48294, + "offenders": 35878, + "offense": 15253, + "offensive": 11037, + "offer": 20607, + "offer": 3271, + "offered": 9395, + "offering": 6896, + "offerings": 24535, + "offers": 4679, + "offic": 3276, + "office": 18033, + "office": 2171, + "officeof": 38750, + "officeofrg": 47100, + "officer": 4683, + "officers": 6335, + "offices": 10933, + "offici": 1401, + "official": 5768, + "official": 1868, + "officially": 4226, + "officials": 7658, + "officiel": 26548, + "offl": 16851, + "offline": 22724, + "offro": 32198, + "offroad": 37173, + "offs": 23987, + "offseason": 25485, + "offset": 28843, + "offshore": 15496, + "offside": 49347, + "offspring": 38635, + "offthe": 38189, + "ofi": 36692, + "ofi": 49090, + "oficial": 18061, + "oft": 16693, + "oftball": 39768, + "often": 4864, + "ofthe": 7592, + "oftheday": 6988, + "oftheweek": 20654, + "oftheyear": 33975, + "og": 11542, + "og": 8555, + "oga": 47312, + "ogden": 42011, + "ogil": 39013, + "ography": 22399, + "ogue": 24761, + "ogun": 48970, + "oh": 5648, + "oh": 1779, + "ohana": 48330, + "ohh": 23076, + "ohhh": 27697, + "ohhhh": 40201, + "ohi": 5207, + "ohio": 18951, + "ohio": 6155, + "ohiostate": 41324, + "ohl": 45547, + "ohl": 41095, + "ohmy": 29758, + "ohn": 48043, + "ohs": 39542, + "ohwx": 47993, + "oi": 27357, + "oi": 13934, + "oic": 45554, + "oid": 14758, + "oids": 21847, + "oil": 11973, + "oil": 2870, + "oiland": 32316, + "oilandgas": 34130, + "oilers": 21627, + "oilpainting": 34279, + "oils": 17886, + "oily": 47550, + "oir": 48079, + "oir": 37113, + "ois": 23262, + "oit": 18453, + "oitnb": 34865, + "oj": 30986, + "oj": 34553, + "ok": 1944, + "ok": 2481, + "oka": 42258, + "oka": 19092, + "okan": 41263, + "okanagan": 43233, + "okay": 4917, + "okc": 42418, + "okc": 18357, + "oke": 26636, + "oke": 23598, + "oki": 20390, + "okin": 30687, + "okinawa": 35877, + "okla": 9431, + "oklahoma": 10170, + "oko": 26892, + "oko": 26095, + "okstate": 36356, + "oktoberfest": 32026, + "oku": 45010, + "oku": 43829, + "okwx": 27336, + "ol": 562, + "ol": 2985, + "ola": 20499, + "ola": 3373, + "olaf": 39709, + "olan": 48489, + "olan": 24227, + "oland": 26452, + "olas": 40800, + "old": 4931, + "old": 896, + "olde": 37731, + "older": 7700, + "oldest": 9285, + "oldham": 29929, + "oldie": 35280, + "oldies": 36278, + "oldman": 48614, + "olds": 8580, + "oldschool": 44384, + "oldschool": 25133, + "oldsmobile": 45396, + "ole": 9089, + "ole": 1947, + "oled": 46768, + "oler": 24069, + "oles": 16962, + "olf": 16346, + "olga": 34779, + "oli": 3811, + "oli": 8810, + "olic": 31341, + "oligar": 46185, + "olim": 47769, + "olin": 37823, + "olin": 18283, + "olina": 34711, + "oline": 17441, + "oling": 38033, + "olini": 36040, + "olis": 49397, + "olithic": 35574, + "olive": 22486, + "olive": 9898, + "oliver": 22882, + "oliver": 9261, + "olives": 27149, + "olivi": 20773, + "olivia": 11697, + "olivier": 23891, + "oll": 32270, + "oll": 15510, + "olla": 31908, + "ollie": 24434, + "olls": 42697, + "olly": 23998, + "olo": 14628, + "olo": 7606, + "ological": 12345, + "ologist": 23442, + "ologists": 30912, + "ology": 4627, + "olor": 29245, + "olph": 25077, + "ols": 2236, + "olsen": 26307, + "olson": 28046, + "olt": 46252, + "olu": 16502, + "olu": 46302, + "olulu": 27645, + "oly": 20323, + "oly": 24823, + "olym": 3594, + "olympi": 13597, + "olympia": 23965, + "olympiad": 47694, + "olympian": 25420, + "olympians": 44583, + "olympic": 26099, + "olympic": 6388, + "olympics": 7629, + "olympus": 30960, + "om": 547, + "om": 3932, + "oma": 44603, + "oma": 5358, + "omaha": 16509, + "oman": 22088, + "oman": 10871, + "omar": 19488, + "omar": 13367, + "omars": 37099, + "omas": 36023, + "omat": 40788, + "omb": 34447, + "ombe": 35967, + "omd": 49346, + "ome": 3693, + "ome": 5832, + "omed": 16835, + "omega": 13465, + "omelette": 38789, + "omen": 9969, + "omen": 25469, + "oment": 43683, + "omeo": 39844, + "omer": 24087, + "omer": 17902, + "omes": 25736, + "ometer": 20060, + "ometric": 38702, + "omez": 12541, + "omf": 47496, + "omfg": 12523, + "omg": 35233, + "omg": 3186, + "omi": 24097, + "omi": 10341, + "omic": 40536, + "omic": 12793, + "omics": 15138, + "omile": 46915, + "omin": 16457, + "omination": 42571, + "oming": 10796, + "ominous": 40914, + "omni": 18793, + "omni": 39489, + "omnibus": 44760, + "omnic": 48383, + "omo": 14478, + "omo": 11066, + "omon": 48758, + "omor": 29431, + "oms": 3770, + "omusic": 38965, + "omy": 40805, + "omy": 6884, + "on": 521, + "on": 525, + "ona": 2687, + "onair": 29511, + "onal": 918, + "onboard": 21689, + "once": 16331, + "once": 2654, + "onceupon": 28122, + "onceuponatime": 33505, + "onco": 46700, + "oncology": 24593, + "ond": 27918, + "ond": 2636, + "onda": 32643, + "onday": 29864, + "onde": 44532, + "ondo": 29529, + "ondon": 42043, + "ondon": 11851, + "one": 1980, + "one": 637, + "onec": 27746, + "oned": 28012, + "oned": 4698, + "onedirection": 16245, + "onee": 44433, + "oneill": 44808, + "onelove": 47417, + "onent": 12147, + "onents": 11709, + "oneof": 48478, + "onep": 20440, + "onepiece": 43153, + "oneplus": 25981, + "oner": 30055, + "oner": 6071, + "oners": 12324, + "ones": 20757, + "ones": 1575, + "oneself": 46874, + "onesie": 33237, + "oness": 25379, + "onet": 36058, + "oneteam": 41094, + "onetsy": 33392, + "onew": 43848, + "onews": 18696, + "onex": 49116, + "oney": 44498, + "oney": 9408, + "onf": 41790, + "onfox": 29874, + "ong": 2787, + "ong": 846, + "onga": 30259, + "ongchang": 35071, + "ongi": 21754, + "ongo": 31226, + "ongoing": 10393, + "ongs": 12143, + "oni": 4385, + "oni": 8048, + "onia": 8001, + "onial": 27599, + "onian": 21090, + "onic": 15838, + "onic": 3711, + "onica": 14631, + "onics": 9779, + "onie": 35249, + "onies": 22601, + "onimo": 41271, + "oning": 5197, + "onion": 10985, + "onions": 15255, + "onist": 10099, + "onists": 19659, + "onix": 27370, + "onized": 43657, + "onlin": 31103, + "online": 12940, + "online": 2027, + "onlinemarketing": 41820, + "onlineshopping": 38587, + "only": 11646, + "only": 1033, + "onlyin": 32947, + "onna": 25438, + "onna": 35458, + "onnaise": 48934, + "onne": 23466, + "onnell": 45613, + "ono": 28165, + "ono": 14388, + "onom": 48014, + "onomy": 36873, + "onpoli": 20708, + "ons": 26076, + "ons": 708, + "onsale": 36324, + "onset": 30527, + "onsite": 37336, + "onstage": 21821, + "onstorm": 49333, + "ont": 34303, + "ont": 11157, + "ontari": 6739, + "ontario": 42766, + "ontario": 7436, + "onte": 34723, + "onthe": 12241, + "onther": 46563, + "ontheroad": 47516, + "onthisday": 6862, + "onto": 11745, + "onto": 3141, + "ontology": 37364, + "ontour": 32155, + "onu": 44142, + "onward": 34827, + "onwards": 20682, + "ony": 9490, + "ony": 2926, + "onym": 11483, + "onymous": 13038, + "onyx": 31353, + "oo": 574, + "oo": 2822, + "ood": 16429, + "ood": 738, + "oodle": 45289, + "oods": 44660, + "oof": 42270, + "ooh": 16806, + "ook": 22326, + "ook": 8394, + "ooks": 31082, + "ool": 37702, + "ool": 929, + "oom": 22786, + "oom": 15002, + "oomf": 40607, + "oon": 35651, + "oon": 7100, + "ooo": 9571, + "oooh": 28927, + "oooo": 4002, + "oooo": 13643, + "ooooo": 12532, + "oooooo": 43590, + "oooooo": 20372, + "ooooooo": 30859, + "oooooooo": 15473, + "oooooooo": 43408, + "oooooooooooooooo": 48645, + "oop": 7326, + "ooper": 39906, + "oops": 9116, + "oor": 35239, + "oos": 9896, + "oosa": 30834, + "oose": 38941, + "oot": 17667, + "ootball": 28914, + "ootd": 16547, + "ooth": 12682, + "oott": 34316, + "ooza": 22809, + "op": 676, + "op": 3691, + "opa": 28949, + "opal": 28982, + "opar": 18167, + "opath": 33079, + "opathic": 37521, + "opathy": 28466, + "opau": 27239, + "opd": 38288, + "ope": 31694, + "ope": 11440, + "opec": 33138, + "opel": 36952, + "open": 3647, + "open": 1488, + "openaccess": 26591, + "opend": 28069, + "opendata": 35709, + "openday": 46991, + "opened": 5303, + "opener": 8998, + "openhouse": 36091, + "opening": 33728, + "opening": 2516, + "openingday": 36359, + "openings": 27643, + "openly": 23005, + "opens": 4801, + "opensource": 29930, + "oper": 2796, + "oper": 37533, + "opera": 8056, + "operate": 19306, + "operated": 23031, + "operates": 38675, + "operating": 12218, + "operation": 27173, + "operation": 7639, + "operational": 18237, + "operations": 8106, + "operative": 28380, + "operator": 15972, + "operators": 19267, + "opers": 48728, + "opes": 37258, + "oph": 6796, + "opha": 38634, + "ophel": 45017, + "ophelia": 49118, + "ophi": 44547, + "ophile": 35915, + "opho": 12900, + "ophobia": 21111, + "ophobic": 29934, + "ophon": 25120, + "ophone": 26345, + "ophthal": 33135, + "ophy": 28539, + "opi": 40056, + "opi": 48994, + "opin": 7636, + "opini": 14825, + "opinion": 7843, + "opinions": 16192, + "opio": 17371, + "opioid": 22833, + "opioids": 47578, + "opla": 36270, + "ople": 25663, + "opol": 15173, + "opoly": 23729, + "opor": 39650, + "opoulos": 42020, + "opp": 2020, + "opp": 21024, + "oppa": 23637, + "oppo": 7399, + "oppo": 41770, + "opponent": 17002, + "opponents": 19664, + "oppor": 2914, + "opportun": 2939, + "opportunities": 5978, + "opportunity": 4004, + "oppos": 10091, + "oppose": 23617, + "opposed": 22509, + "opposes": 47471, + "opposing": 24376, + "opposite": 12872, + "opposition": 11062, + "oppre": 17341, + "oppressed": 41492, + "oppression": 30650, + "opra": 28291, + "oprah": 22562, + "opry": 35340, + "ops": 3054, + "opt": 45103, + "opt": 27188, + "opted": 42035, + "opti": 6580, + "optic": 25190, + "optic": 24755, + "optical": 16822, + "optics": 27165, + "optim": 22331, + "optimal": 25235, + "optimi": 9737, + "optimis": 39459, + "optimism": 25226, + "optimist": 44581, + "optimistic": 23104, + "optimization": 25125, + "optimize": 30456, + "optimized": 43939, + "optimizing": 49157, + "optimum": 35974, + "optimus": 43453, + "option": 8464, + "optional": 25411, + "options": 7063, + "optome": 35533, + "opul": 39858, + "opus": 33295, + "opy": 21835, + "or": 523, + "or": 541, + "ora": 4301, + "orac": 24673, + "oracle": 37308, + "oracle": 15966, + "orah": 40820, + "orail": 45120, + "oral": 32490, + "oral": 6007, + "orama": 33619, + "oran": 32209, + "oran": 28395, + "orang": 22116, + "orange": 13957, + "orange": 4287, + "oranges": 32417, + "orangu": 36112, + "orb": 28894, + "orb": 36958, + "orbit": 19713, + "orbital": 40312, + "orc": 44305, + "orca": 18631, + "orcas": 47676, + "orch": 11893, + "orchar": 40226, + "orchard": 19530, + "orche": 8004, + "orchestr": 42937, + "orchestra": 9573, + "orchestral": 40285, + "orchi": 23696, + "orchid": 18678, + "orchids": 28376, + "ord": 26903, + "ord": 11502, + "orda": 33462, + "ordained": 38302, + "order": 24613, + "order": 2191, + "ordered": 8335, + "ordering": 19588, + "orderly": 43457, + "orders": 6187, + "ordin": 4378, + "ordinance": 38583, + "ordinary": 8012, + "ore": 3580, + "ore": 1423, + "orean": 36696, + "ored": 5133, + "oregon": 21759, + "oregon": 8035, + "oren": 21645, + "oreo": 21873, + "oreos": 41688, + "ores": 17328, + "org": 3401, + "org": 5593, + "organ": 3338, + "organ": 13213, + "organi": 3636, + "organic": 24080, + "organic": 5980, + "organics": 44199, + "organis": 13204, + "organisation": 15868, + "organisations": 20651, + "organise": 36073, + "organised": 13191, + "organiser": 49141, + "organisers": 35778, + "organising": 22787, + "organisms": 37041, + "organiz": 11107, + "organization": 8064, + "organizational": 29510, + "organizations": 13453, + "organize": 19973, + "organized": 10681, + "organizer": 23905, + "organizers": 27191, + "organizing": 15779, + "organs": 29872, + "orgs": 29500, + "ori": 1540, + "ori": 8693, + "oria": 11474, + "orial": 8648, + "orian": 21193, + "oric": 43810, + "orice": 41341, + "orie": 18815, + "orient": 13149, + "orient": 30770, + "oriental": 23056, + "orientation": 16873, + "oriente": 40390, + "oriented": 24596, + "orienteering": 42985, + "ories": 5934, + "orig": 2273, + "orig": 38463, + "origami": 31832, + "origin": 2555, + "origin": 12372, + "original": 18496, + "original": 3117, + "originally": 12849, + "originals": 16953, + "originated": 41823, + "origins": 16291, + "orin": 39863, + "oring": 3006, + "orio": 24308, + "orioles": 21430, + "orion": 21765, + "oris": 37064, + "orities": 7903, + "ority": 5556, + "orium": 12015, + "ork": 22202, + "ork": 37235, + "orkney": 34254, + "orl": 39465, + "orlando": 32247, + "orlando": 7827, + "orleans": 11127, + "orm": 38464, + "orn": 25412, + "orn": 8130, + "ornam": 36122, + "ornament": 23409, + "ornamental": 46270, + "ornaments": 28968, + "ornate": 46865, + "orni": 27713, + "ornithology": 38275, + "orns": 19340, + "oro": 9848, + "oro": 14573, + "orous": 19286, + "orph": 17318, + "orphan": 22718, + "orphan": 28994, + "orphanage": 45196, + "orphaned": 46792, + "orphans": 36588, + "orphe": 39186, + "orr": 32977, + "ors": 1127, + "orship": 20846, + "ort": 1019, + "ortega": 39727, + "orth": 22584, + "orth": 24461, + "ortho": 11366, + "orthodon": 37730, + "orthodox": 19008, + "orthop": 42123, + "orthopedic": 49341, + "ortiz": 23544, + "orton": 37238, + "oru": 44629, + "oru": 31281, + "orum": 42724, + "orwell": 41218, + "ory": 16983, + "ory": 1985, + "os": 2211, + "os": 1299, + "osa": 16340, + "osa": 17237, + "osaka": 21347, + "osborne": 22402, + "osbourne": 43376, + "osc": 5092, + "oscar": 21157, + "oscar": 8191, + "oscars": 11098, + "osce": 37303, + "oscill": 38272, + "ose": 46942, + "ose": 22541, + "osh": 30717, + "osh": 35011, + "osha": 33907, + "oshi": 34770, + "osi": 25247, + "osi": 17636, + "osis": 13903, + "osity": 12730, + "oslo": 20547, + "osm": 31626, + "osman": 46539, + "oso": 42793, + "oso": 21285, + "osp": 24387, + "ospre": 49001, + "osprey": 37893, + "oss": 29362, + "oss": 34640, + "ost": 23701, + "ost": 18749, + "oste": 20632, + "osteo": 43163, + "oster": 31781, + "ostr": 33673, + "ostrich": 47640, + "osu": 29480, + "osu": 19818, + "oswald": 38471, + "ot": 1863, + "ot": 2062, + "ota": 17509, + "ota": 8741, + "otago": 45919, + "otaku": 40743, + "otas": 47616, + "otc": 37934, + "otd": 5683, + "ote": 28511, + "ote": 19744, + "otes": 27280, + "oth": 33262, + "oth": 33519, + "other": 9758, + "other": 1010, + "others": 3326, + "otherwise": 12376, + "oti": 19567, + "oti": 45564, + "otic": 9671, + "otis": 28246, + "otive": 10877, + "oto": 23946, + "oto": 23399, + "otp": 29822, + "otr": 38685, + "ots": 5769, + "ott": 10167, + "ott": 7936, + "otta": 7623, + "otta": 20941, + "ottawa": 49027, + "ottawa": 9019, + "otte": 35214, + "otter": 34710, + "otter": 22456, + "otters": 38883, + "otti": 36721, + "ottnews": 33995, + "otto": 17730, + "ottoman": 27503, + "otw": 35259, + "otwol": 46868, + "ou": 520, + "ou": 6544, + "ouat": 32954, + "ouch": 13493, + "oud": 1359, + "oue": 48838, + "ouf": 34618, + "ough": 4204, + "ough": 991, + "ought": 2253, + "oughton": 36860, + "oui": 39421, + "ouk": 21796, + "oul": 20253, + "oul": 8081, + "ould": 859, + "oulos": 32808, + "oun": 636, + "oun": 20960, + "ounce": 15027, + "ounces": 30299, + "ound": 2013, + "ound": 853, + "oundation": 40132, + "ounded": 9634, + "ounding": 11944, + "ounds": 2753, + "oung": 35875, + "oung": 25341, + "ounge": 29427, + "ount": 43801, + "ount": 4172, + "ounts": 10963, + "oup": 32815, + "our": 727, + "our": 581, + "oura": 29806, + "oura": 36352, + "ourable": 24126, + "ourage": 34525, + "oural": 45840, + "oured": 6956, + "ouri": 12696, + "ouring": 12000, + "ourism": 25496, + "ourke": 26480, + "ourlives": 37541, + "ouro": 41224, + "ours": 1491, + "ourse": 15415, + "ourselves": 10124, + "ourt": 22960, + "oury": 29484, + "ous": 1987, + "ous": 879, + "ouse": 32048, + "ouse": 7603, + "ouses": 33666, + "ously": 2501, + "ousness": 10689, + "ousy": 28302, + "out": 1130, + "out": 620, + "outa": 35187, + "outage": 27320, + "outages": 40353, + "outback": 28532, + "outbound": 41256, + "outbreak": 20103, + "outcome": 16552, + "outcomes": 14016, + "outdated": 38313, + "outdoor": 19184, + "outdoor": 6368, + "outdoors": 10469, + "oute": 44180, + "outed": 34435, + "outer": 30499, + "outer": 14188, + "outes": 39600, + "outfield": 41826, + "outfit": 6525, + "outfits": 16366, + "outfitters": 37725, + "outfy": 34920, + "outgoing": 27302, + "outh": 16933, + "outh": 8111, + "outine": 35452, + "outing": 11251, + "outlander": 45820, + "outlander": 17095, + "outlaw": 37498, + "outlaw": 27340, + "outlaws": 30935, + "outlet": 16855, + "outlets": 20822, + "outline": 26894, + "outlines": 29159, + "outlining": 45960, + "outlook": 12983, + "outof": 43958, + "outpatient": 46603, + "outpost": 44622, + "output": 17255, + "outra": 14262, + "outrage": 23577, + "outraged": 43402, + "outrageous": 29342, + "outre": 14373, + "outreach": 15297, + "outright": 38200, + "outs": 5790, + "outsi": 22515, + "outside": 47693, + "outside": 2782, + "outsider": 41196, + "outsiders": 41742, + "outskirts": 42088, + "outsourcing": 34543, + "outstanding": 6387, + "outta": 15807, + "outtuesday": 48692, + "outw": 34650, + "oux": 40960, + "oux": 14228, + "ov": 6420, + "ov": 8479, + "ova": 12762, + "oval": 15039, + "ovarian": 42913, + "ovation": 24333, + "ove": 8649, + "ove": 15456, + "oven": 44620, + "oven": 12579, + "over": 1658, + "over": 962, + "overall": 6914, + "overboard": 42982, + "overcame": 47235, + "overcast": 36942, + "overcome": 14365, + "overcoming": 29348, + "overdose": 27017, + "overdrive": 40088, + "overdue": 30240, + "overflow": 32885, + "overflowing": 45370, + "overhaul": 31531, + "overhead": 20321, + "overland": 38808, + "overlay": 44827, + "overload": 24327, + "overlook": 35767, + "overlooked": 27632, + "overlooking": 17319, + "overly": 28820, + "overnight": 9913, + "overpass": 44310, + "overrated": 38214, + "overs": 45774, + "overs": 17329, + "overseas": 15100, + "oversight": 32494, + "oversized": 31557, + "overtime": 19347, + "overturned": 31048, + "overview": 14789, + "overwatch": 18124, + "overweight": 43465, + "overwhel": 12204, + "overwhelmed": 23459, + "overwhelming": 20306, + "overwhelmingly": 43549, + "ovi": 32508, + "ovic": 22417, + "ovich": 27623, + "ovie": 47677, + "ovo": 41920, + "ovo": 18065, + "ovski": 26167, + "ow": 2032, + "ow": 2250, + "owa": 32770, + "owe": 19073, + "owed": 37641, + "owen": 24838, + "owen": 12056, + "owens": 20664, + "owes": 35069, + "owing": 48582, + "owl": 34332, + "owl": 9899, + "owls": 18247, + "own": 3845, + "own": 1758, + "owned": 8536, + "owner": 5019, + "owners": 7712, + "ownership": 16583, + "owning": 24661, + "owns": 17533, + "owo": 46142, + "ows": 27423, + "owski": 22573, + "ox": 3282, + "ox": 12071, + "oxfam": 45466, + "oxford": 28588, + "oxford": 8824, + "oxfordshire": 37855, + "oxi": 33731, + "oxi": 48147, + "oxid": 17701, + "oxide": 28235, + "oxo": 37088, + "oxy": 12432, + "oxygen": 16214, + "oy": 6638, + "oy": 12437, + "oya": 38894, + "oye": 48677, + "oyster": 40545, + "oyster": 17253, + "oysters": 22672, + "oz": 10584, + "oz": 6044, + "ozar": 31848, + "ozil": 41365, + "ozone": 37052, + "ozzy": 39549, + "p": 79, + "p": 335, + "pa": 765, + "pa": 2217, + "paa": 32812, + "pab": 9354, + "pablo": 42172, + "pablo": 14473, + "pac": 2332, + "pac": 7608, + "pace": 40600, + "pace": 9450, + "paced": 32611, + "pacers": 23976, + "paces": 43001, + "paci": 5699, + "pacific": 19723, + "pacific": 6654, + "pacing": 45202, + "pack": 2711, + "pack": 3420, + "package": 7053, + "packaged": 29656, + "packages": 14305, + "packaging": 11658, + "packard": 46421, + "packed": 5883, + "packer": 28209, + "packers": 14294, + "packet": 25022, + "packets": 40448, + "packing": 9829, + "packs": 11086, + "paco": 41364, + "pacqui": 28456, + "pacquiao": 30485, + "pact": 27182, + "pad": 3798, + "pad": 7601, + "padded": 42253, + "paddington": 33162, + "paddle": 38276, + "paddle": 20811, + "paddling": 40645, + "paddock": 29590, + "paddy": 33103, + "paddy": 19855, + "padi": 47037, + "padilla": 22380, + "padma": 44595, + "padma": 46457, + "padre": 38343, + "padres": 22829, + "pads": 17353, + "paedi": 41488, + "paella": 46924, + "paf": 47185, + "pafc": 49259, + "pag": 4151, + "pag": 30525, + "pagan": 27854, + "page": 14996, + "page": 2504, + "pageant": 22139, + "pages": 8082, + "pagoda": 44309, + "pah": 41054, + "pah": 26884, + "pai": 20624, + "pai": 21198, + "paid": 5057, + "paige": 33659, + "paige": 16022, + "paign": 31796, + "pain": 2141, + "pain": 4495, + "paine": 38069, + "painful": 16361, + "pains": 25639, + "paint": 7948, + "paint": 5185, + "paintball": 39730, + "painted": 6433, + "painter": 10888, + "painters": 35703, + "painting": 49164, + "painting": 3086, + "paintings": 9956, + "paints": 21672, + "pair": 19848, + "pair": 4038, + "paired": 12433, + "pairing": 16313, + "pairings": 41152, + "pairs": 9950, + "pais": 16878, + "paisley": 22954, + "pajam": 24110, + "pajama": 40244, + "pajamas": 37231, + "pak": 13186, + "pak": 9094, + "paki": 3438, + "pakistan": 10713, + "pakistan": 3994, + "pakistani": 14050, + "pakistanis": 45707, + "pakv": 38196, + "pal": 1850, + "pal": 3611, + "pala": 17895, + "palace": 6381, + "palaces": 45625, + "palad": 28371, + "palae": 43379, + "palais": 35673, + "palate": 34666, + "palawan": 48202, + "palazzo": 36006, + "pale": 4768, + "pale": 12518, + "paleo": 36741, + "paleo": 22198, + "paler": 38028, + "palermo": 40635, + "palestin": 9449, + "palestine": 11682, + "palestinian": 11764, + "palestinians": 21874, + "palette": 13901, + "pali": 48063, + "palin": 40153, + "palis": 44256, + "pality": 27296, + "pall": 35817, + "palla": 21208, + "palladium": 37888, + "pallet": 39057, + "palli": 28954, + "palliative": 46014, + "pally": 46073, + "palm": 19651, + "palm": 8612, + "palma": 29888, + "palmer": 40112, + "palmer": 13633, + "palms": 27059, + "palo": 31562, + "palom": 47698, + "palooza": 25861, + "pals": 11043, + "palsy": 46651, + "pam": 8228, + "pam": 18513, + "pamela": 26991, + "pamp": 37653, + "pamper": 44345, + "pamph": 41332, + "pan": 1072, + "pan": 7437, + "panam": 24606, + "panama": 15522, + "panas": 26207, + "panasonic": 29750, + "pancake": 18723, + "pancakes": 15308, + "panch": 27251, + "pancra": 42472, + "pancre": 27708, + "pancreatic": 49337, + "pancy": 41625, + "pand": 5631, + "panda": 12952, + "pandas": 35119, + "pande": 38419, + "pandey": 34895, + "pandit": 41191, + "pandor": 30250, + "pandora": 17727, + "pandoramusic": 42344, + "pane": 27470, + "panel": 3724, + "paneli": 19410, + "panelist": 39719, + "panelists": 24619, + "panels": 12735, + "panera": 48471, + "pang": 16756, + "pang": 23672, + "panhandle": 40919, + "pani": 36092, + "panic": 46671, + "panic": 14124, + "panini": 30410, + "pann": 42302, + "panna": 49065, + "pano": 36165, + "panor": 12962, + "panorama": 19763, + "panoramic": 22563, + "pans": 35204, + "pant": 22550, + "panther": 22825, + "panther": 13262, + "panthers": 10494, + "panties": 32515, + "panto": 28776, + "pantry": 25608, + "pants": 5003, + "panty": 44217, + "pany": 45567, + "panzer": 41159, + "pao": 33790, + "paola": 44689, + "paolo": 48488, + "paolo": 21133, + "pap": 1884, + "pap": 30756, + "papa": 12211, + "papar": 32782, + "paparazzi": 37842, + "papaya": 44098, + "paper": 8680, + "paper": 2802, + "paperback": 17928, + "papers": 8204, + "paperwork": 35785, + "papi": 35177, + "papp": 26361, + "paprika": 44793, + "papua": 32629, + "par": 699, + "par": 9163, + "para": 18355, + "para": 8976, + "parach": 23147, + "parachute": 30122, + "parad": 37143, + "parade": 5809, + "parades": 46479, + "paradi": 6658, + "paradig": 27786, + "paradigm": 33485, + "paradise": 45869, + "paradise": 7247, + "paradox": 33109, + "parag": 11866, + "paragon": 48099, + "paragra": 24903, + "paragraph": 28499, + "paragu": 38021, + "paraguay": 43579, + "paral": 15143, + "paralle": 13184, + "parallel": 18201, + "paralleled": 42520, + "parallels": 46101, + "paraly": 30255, + "paralym": 18727, + "paralympic": 30806, + "paralympics": 37162, + "paralysis": 45702, + "param": 12250, + "parame": 27106, + "paramedic": 34630, + "paramedics": 35991, + "parameters": 44890, + "paramore": 34401, + "paramount": 26642, + "parano": 30283, + "paranoid": 43029, + "paranor": 16940, + "paranormal": 19047, + "parap": 41091, + "paras": 15198, + "parasite": 42460, + "parasites": 46175, + "parc": 30914, + "parcel": 30367, + "parcels": 45589, + "pard": 18773, + "pardon": 47606, + "pardon": 26565, + "pare": 18202, + "pared": 5498, + "paren": 3106, + "parent": 47848, + "parent": 10183, + "parental": 28339, + "parenthood": 23887, + "parenting": 14529, + "parents": 3731, + "pares": 12420, + "parfait": 46140, + "pari": 17961, + "pari": 27979, + "paris": 13982, + "paris": 3445, + "parisagreement": 47405, + "parish": 47328, + "parish": 13020, + "parisi": 45081, + "parisian": 38512, + "parity": 42734, + "park": 4985, + "park": 1452, + "parked": 16487, + "parker": 31119, + "parker": 8365, + "parkin": 34868, + "parking": 5984, + "parkinson": 28129, + "parkland": 31287, + "parkrun": 25747, + "parks": 6873, + "parkway": 19882, + "parl": 30373, + "parl": 29897, + "parliam": 5941, + "parliament": 41599, + "parliament": 7151, + "parliamentary": 17912, + "parlor": 38253, + "parlour": 37829, + "parma": 36077, + "parme": 26295, + "parmesan": 27274, + "paro": 17429, + "parody": 24318, + "parole": 32158, + "parr": 44113, + "parrish": 43043, + "parrot": 23565, + "parry": 40604, + "parsley": 30077, + "parsons": 22505, + "part": 1872, + "part": 1551, + "parte": 48508, + "parth": 34790, + "parti": 10509, + "partial": 18957, + "partially": 21269, + "partic": 2871, + "partici": 9540, + "particip": 4400, + "participant": 27674, + "participants": 10237, + "participate": 9433, + "participated": 14252, + "participates": 46414, + "participating": 11535, + "participation": 13529, + "particle": 27716, + "particles": 27012, + "particul": 11408, + "particular": 14098, + "particularly": 12170, + "parties": 9032, + "parting": 32844, + "partisan": 20772, + "partist": 44713, + "partition": 42219, + "partly": 21459, + "partner": 5210, + "partner": 4568, + "partnered": 21402, + "partnering": 21182, + "partners": 5568, + "partnership": 6123, + "partnerships": 17418, + "parton": 43245, + "partridge": 34872, + "parts": 5149, + "party": 12877, + "party": 1580, + "partying": 25702, + "pas": 1341, + "pas": 9525, + "pasadena": 25892, + "pascal": 28626, + "pasco": 49220, + "pascu": 42692, + "pash": 23936, + "pasha": 46986, + "paso": 18542, + "pasqu": 44941, + "pass": 5016, + "pass": 3511, + "passage": 16477, + "passages": 48937, + "passed": 4957, + "passenger": 12311, + "passengers": 12781, + "passer": 48544, + "passes": 7633, + "passi": 32471, + "passing": 6589, + "passion": 8822, + "passion": 5332, + "passionate": 10947, + "passionately": 44028, + "passions": 38441, + "passive": 23171, + "passover": 38426, + "passport": 14739, + "passports": 46368, + "password": 20258, + "passwords": 43095, + "past": 7315, + "past": 2729, + "pasta": 10441, + "paste": 34765, + "paste": 17038, + "pastel": 19457, + "pastels": 45699, + "pastor": 19792, + "pastor": 9664, + "pastoral": 37191, + "pastors": 30959, + "pastr": 45478, + "pastries": 39409, + "pastry": 18582, + "pasture": 34764, + "pastures": 47793, + "pat": 1300, + "pat": 7036, + "patag": 29862, + "patagonia": 32786, + "patch": 29284, + "patch": 8721, + "patches": 22104, + "patchwork": 44675, + "patchy": 47488, + "pate": 42122, + "pate": 42098, + "patel": 14168, + "patent": 14692, + "patented": 37277, + "patents": 33911, + "paterson": 36560, + "path": 7408, + "path": 5035, + "pathetic": 18222, + "pathfinder": 35415, + "pathi": 34976, + "pathi": 27347, + "pathic": 49025, + "patho": 18534, + "pathology": 23290, + "paths": 16333, + "pathway": 23488, + "pathways": 24690, + "pathy": 13330, + "pati": 2799, + "pati": 26708, + "patience": 13575, + "patient": 30139, + "patient": 6262, + "patiently": 22980, + "patients": 5543, + "patil": 49187, + "patio": 14304, + "pational": 30627, + "patna": 45025, + "patory": 41859, + "patreon": 17165, + "patri": 4771, + "patriarch": 49054, + "patriarchy": 48806, + "patric": 12569, + "patrice": 40731, + "patricia": 18143, + "patrick": 12078, + "patrick": 5286, + "patricks": 46783, + "patriot": 28896, + "patriot": 15692, + "patrioti": 35520, + "patriotic": 20217, + "patriotism": 35807, + "patriots": 8707, + "patro": 31650, + "patrol": 10073, + "patrolling": 39344, + "patrols": 35978, + "patron": 26658, + "patron": 17683, + "patrons": 28308, + "pats": 24874, + "patsy": 46093, + "patt": 12637, + "patter": 4982, + "pattern": 7447, + "patterned": 47212, + "patterns": 11637, + "patterson": 21384, + "patti": 44927, + "patti": 26123, + "pattinson": 32474, + "patton": 29026, + "patty": 48741, + "patty": 18321, + "pau": 1834, + "pau": 35970, + "paul": 6035, + "paul": 2597, + "paula": 37363, + "paula": 16777, + "pauline": 30438, + "paulo": 48002, + "paulo": 21628, + "pauls": 41413, + "pauls": 40010, + "paulson": 48201, + "pause": 19439, + "paused": 46782, + "pav": 6661, + "pave": 37107, + "paved": 27898, + "pavel": 43152, + "pavement": 27669, + "pavilion": 13374, + "paving": 28651, + "paw": 14009, + "paw": 16016, + "pawan": 29754, + "pawankalyan": 33702, + "pawn": 43195, + "paws": 16714, + "pax": 20007, + "pax": 19033, + "paxton": 38347, + "pay": 2642, + "pay": 3345, + "payback": 36413, + "paycheck": 45078, + "payday": 26957, + "payee": 46985, + "payer": 41503, + "paying": 8341, + "payment": 10596, + "payments": 11832, + "payne": 12775, + "paypal": 21442, + "payroll": 31610, + "pays": 10845, + "paysoff": 48174, + "paytm": 45352, + "payton": 27348, + "paz": 22267, + "pb": 20112, + "pb": 10981, + "pba": 28205, + "pbb": 48567, + "pbb": 40589, + "pbc": 49191, + "pbl": 35166, + "pbr": 32998, + "pbs": 17908, + "pc": 6782, + "pc": 3808, + "pca": 35705, + "pcb": 26235, + "pcc": 36059, + "pci": 38957, + "pcm": 47436, + "pcr": 35704, + "pcs": 11917, + "pcso": 31963, + "pct": 22168, + "pd": 4387, + "pd": 4675, + "pdates": 16842, + "pdc": 40498, + "pdf": 15181, + "pdp": 24601, + "pdt": 21743, + "pdx": 25470, + "pdx": 16153, + "pe": 661, + "pe": 956, + "pea": 13915, + "peabo": 34083, + "peabody": 41244, + "peac": 34615, + "peace": 6249, + "peace": 3021, + "peaceful": 9461, + "peacefully": 30530, + "peacekeeping": 43630, + "peach": 10522, + "peach": 11538, + "peaches": 27216, + "peak": 18572, + "peak": 6026, + "peakdistrict": 41289, + "peake": 24810, + "peaked": 36391, + "peaks": 14067, + "pean": 11563, + "peanu": 25843, + "peanut": 12491, + "peanuts": 26503, + "pear": 4910, + "pear": 18820, + "pearce": 25996, + "pearl": 21806, + "pearl": 8560, + "pearljam": 46739, + "pearls": 19581, + "pears": 39565, + "pearson": 20461, + "peas": 15937, + "peasant": 40621, + "peasants": 48788, + "peat": 26914, + "pebble": 28056, + "pebbles": 40155, + "pec": 32447, + "pec": 17611, + "pecan": 32177, + "peck": 25186, + "peck": 29234, + "pecker": 30169, + "peckham": 45863, + "pecu": 34200, + "peculiar": 42808, + "ped": 13197, + "ped": 2966, + "pedago": 34590, + "pedagogy": 48072, + "pedal": 32943, + "pedal": 19621, + "pedals": 38535, + "pede": 12862, + "pede": 19560, + "pedestri": 30027, + "pedestrian": 18256, + "pedestrians": 33895, + "pedi": 12967, + "pedia": 11733, + "pediatric": 48431, + "pediatric": 22071, + "pedic": 35319, + "pedic": 44528, + "pedro": 29963, + "pedro": 15114, + "peds": 45377, + "pee": 12988, + "pee": 11196, + "peed": 47369, + "peek": 46323, + "peek": 7569, + "peeking": 48771, + "peel": 34386, + "peel": 17158, + "peeled": 33533, + "peeling": 48649, + "peep": 25425, + "peep": 16857, + "peeps": 11681, + "peer": 32416, + "peer": 14432, + "peers": 21626, + "pees": 31830, + "peg": 32182, + "peg": 11207, + "pegas": 30018, + "pegasus": 37822, + "peggy": 24271, + "pei": 48166, + "pei": 12917, + "pel": 4286, + "pel": 7006, + "pele": 44105, + "pelican": 34131, + "pelicans": 29363, + "pell": 46981, + "pelle": 31267, + "pelled": 32506, + "pellegr": 38529, + "pellets": 48240, + "pelo": 40192, + "pelo": 40238, + "pelosi": 22169, + "pelvic": 45646, + "pemb": 19880, + "pembro": 24084, + "pembroke": 36702, + "pembroke": 40044, + "pembrokeshire": 40695, + "pen": 1501, + "pen": 5356, + "pena": 35788, + "penalties": 25417, + "penalty": 11491, + "penang": 29545, + "penc": 20065, + "pence": 18002, + "pencil": 41303, + "pencil": 11200, + "pencils": 21909, + "pend": 3052, + "pendant": 12415, + "pendants": 44117, + "pending": 12770, + "pendleton": 44272, + "pendu": 45336, + "penelope": 36703, + "penetr": 26058, + "peng": 42955, + "peng": 39200, + "pengu": 8854, + "penguin": 28249, + "penguin": 14952, + "penguins": 16557, + "peninsu": 13464, + "peninsula": 14070, + "penn": 7760, + "penn": 11128, + "pennant": 43971, + "penned": 45077, + "penney": 47856, + "pennies": 43094, + "pennsylvania": 13673, + "penny": 20400, + "penny": 11388, + "pens": 13307, + "pens": 13310, + "pensac": 30925, + "pensacola": 33573, + "pension": 32840, + "pension": 17764, + "pensions": 29773, + "penske": 47154, + "pent": 10699, + "pent": 22725, + "pentagon": 23133, + "pente": 33165, + "penthouse": 32673, + "penultimate": 36553, + "peop": 1030, + "people": 10573, + "people": 1047, + "peoples": 28241, + "peoples": 14627, + "peopleschoice": 32418, + "peoplesvote": 45830, + "peoria": 36985, + "pep": 12761, + "pep": 14898, + "pepe": 24778, + "pepp": 34425, + "pepper": 14861, + "pepper": 8253, + "peppermint": 30321, + "pepperoni": 47307, + "peppers": 14650, + "pepsi": 21307, + "per": 703, + "per": 1284, + "pera": 26294, + "perce": 24135, + "perceived": 38436, + "percent": 16328, + "percent": 9017, + "percentage": 19477, + "percep": 28017, + "perception": 20591, + "perceptions": 38138, + "perch": 34281, + "perched": 40071, + "percu": 41722, + "percussion": 23980, + "percy": 23940, + "pere": 8665, + "pere": 36300, + "pered": 24509, + "peregr": 37479, + "peregrine": 44546, + "pereira": 43927, + "peren": 24564, + "perenni": 26996, + "perennial": 34038, + "perez": 15107, + "perf": 22816, + "perfe": 1624, + "perfec": 6599, + "perfect": 17261, + "perfect": 1878, + "perfection": 9646, + "perfectly": 8037, + "perfecto": 42898, + "perfor": 2311, + "perform": 3866, + "perform": 5940, + "performan": 8973, + "performance": 2714, + "performances": 9553, + "performed": 9997, + "performer": 17061, + "performers": 18476, + "performing": 5170, + "performs": 13839, + "perfu": 14214, + "perfume": 17525, + "perhaps": 9297, + "peri": 12618, + "peri": 44068, + "perience": 19302, + "peril": 40119, + "peril": 48301, + "perimeter": 38499, + "pering": 29746, + "perio": 5101, + "period": 6131, + "periodic": 36476, + "periods": 24401, + "periph": 35308, + "peripheral": 43901, + "peris": 19461, + "periscope": 21668, + "perk": 33424, + "perkins": 20057, + "perks": 17660, + "perl": 44018, + "perm": 47847, + "perman": 9018, + "permanent": 11144, + "permanently": 25584, + "perme": 42456, + "permission": 15822, + "permit": 21950, + "permits": 33267, + "permitted": 44380, + "pero": 23551, + "perpe": 15749, + "perpetr": 33376, + "perpetu": 30132, + "perpetual": 32018, + "perrie": 32691, + "perry": 28478, + "perry": 7899, + "pers": 3688, + "pers": 10710, + "perse": 27498, + "persecu": 22878, + "persecution": 32009, + "perseverance": 29820, + "persi": 11509, + "persian": 19859, + "persist": 19412, + "persist": 40938, + "persistence": 34588, + "persistent": 29028, + "person": 3510, + "person": 2533, + "persona": 18401, + "personal": 10114, + "personal": 4121, + "personalised": 24186, + "personalities": 27888, + "personality": 10386, + "personalized": 17845, + "personally": 13885, + "personnel": 14546, + "persons": 14592, + "perspec": 17997, + "perspective": 8996, + "perspectives": 18777, + "persu": 20972, + "pert": 36970, + "pert": 16306, + "perth": 19067, + "perth": 11011, + "peru": 20612, + "peru": 12964, + "peruvian": 30822, + "pes": 38368, + "pes": 2598, + "pesa": 47409, + "pesc": 44044, + "pesh": 33184, + "peshaw": 28524, + "peshawar": 29230, + "pesky": 42512, + "pesos": 47872, + "pessi": 43902, + "pest": 20130, + "pest": 9425, + "pesticide": 48481, + "pesticides": 37868, + "pesto": 26186, + "pests": 41919, + "pet": 2167, + "pet": 3703, + "peta": 28785, + "petal": 38430, + "petal": 40469, + "petals": 26064, + "petday": 45314, + "pete": 14479, + "pete": 8571, + "peter": 5093, + "peter": 3696, + "peterborough": 26012, + "peters": 16336, + "petersburg": 21052, + "petersen": 39794, + "peterson": 16877, + "peth": 48920, + "petit": 36437, + "petit": 21276, + "petite": 27213, + "petition": 10975, + "petitions": 43536, + "petr": 29808, + "petra": 31300, + "petre": 47179, + "petri": 31831, + "petro": 8716, + "petrol": 18149, + "petroleum": 22063, + "petron": 42875, + "pets": 7663, + "pett": 27051, + "petti": 48001, + "petting": 44334, + "petty": 17324, + "peu": 21411, + "peuge": 22893, + "peugeot": 24129, + "pew": 21608, + "pew": 30783, + "pewdie": 41882, + "pewdiepie": 42563, + "pex": 43765, + "pey": 14966, + "pey": 30933, + "peyton": 49254, + "peyton": 20307, + "pez": 45798, + "pez": 10482, + "pf": 16680, + "pf": 12572, + "pfa": 47839, + "pfc": 35007, + "pff": 44121, + "pfi": 29810, + "pfw": 31229, + "pg": 12476, + "pg": 5211, + "pga": 13351, + "pgat": 36514, + "pgatour": 40094, + "pgh": 44862, + "pgh": 30031, + "pgs": 49204, + "ph": 745, + "ph": 2042, + "pha": 4443, + "pha": 26255, + "phal": 19962, + "phan": 8731, + "phan": 40126, + "phant": 36998, + "phantom": 37688, + "phantom": 14490, + "phar": 5570, + "phara": 35792, + "pharaoh": 40437, + "pharm": 45761, + "pharma": 17831, + "pharmac": 8193, + "pharmaceu": 19490, + "pharmaceutical": 25217, + "pharmaceuticals": 44623, + "pharmacist": 41024, + "pharmacists": 44337, + "pharmacy": 15293, + "pharo": 42308, + "pharoah": 49287, + "pharrell": 31316, + "phase": 8304, + "phases": 35337, + "phat": 42492, + "phc": 41102, + "phd": 20875, + "phd": 8472, + "phdchat": 39564, + "phdlife": 39638, + "phe": 4787, + "phe": 19853, + "pheasant": 41983, + "phee": 41292, + "phel": 23711, + "phelps": 27128, + "phen": 7718, + "pheno": 47336, + "phenom": 31673, + "phenom": 39618, + "phenomen": 11304, + "phenomena": 41538, + "phenomenal": 15035, + "phenomenon": 24464, + "pher": 9194, + "pher": 19828, + "phers": 29531, + "pherson": 36421, + "phew": 10295, + "phi": 2239, + "phi": 12220, + "phia": 9228, + "phic": 3977, + "phie": 30237, + "phies": 17062, + "phil": 2821, + "phil": 6199, + "phila": 47443, + "philadel": 9428, + "philadelphia": 9749, + "philanthro": 16587, + "philanthropist": 44153, + "philanthropy": 25047, + "philately": 33695, + "phile": 36543, + "philharmon": 25228, + "philharmonic": 31699, + "phili": 4277, + "philia": 46654, + "philip": 20748, + "philip": 11074, + "philipp": 5623, + "philipp": 47591, + "philippe": 20942, + "philippine": 17629, + "philippines": 8149, + "philips": 25175, + "phill": 42346, + "phill": 48272, + "philli": 6456, + "phillies": 18748, + "phillip": 48832, + "phillip": 19323, + "phillips": 11041, + "philly": 19545, + "philly": 7785, + "philos": 8395, + "philosop": 20349, + "philosoph": 10187, + "philosopher": 25220, + "philosophical": 32628, + "philosophy": 12213, + "phils": 38573, + "phin": 33816, + "phine": 40985, + "phins": 40210, + "phish": 36897, + "phishing": 36546, + "phl": 25603, + "pho": 816, + "pho": 22707, + "phobia": 28749, + "phoe": 22673, + "phoebe": 27582, + "phoeni": 6778, + "phoenix": 20615, + "phoenix": 7793, + "phol": 48140, + "phon": 19602, + "phon": 31115, + "phone": 15486, + "phone": 1951, + "phones": 6351, + "phony": 31925, + "phora": 31363, + "phosp": 22638, + "photo": 1153, + "photo": 1125, + "photobomb": 37075, + "photobook": 41894, + "photog": 28115, + "photogenic": 36108, + "photogra": 36754, + "photograph": 1688, + "photograph": 8853, + "photographed": 11573, + "photographer": 5748, + "photographers": 17141, + "photographic": 22053, + "photographing": 30074, + "photographs": 15759, + "photography": 33183, + "photography": 2108, + "photom": 32223, + "photoo": 11106, + "photooftheday": 11933, + "photos": 2479, + "photoshoot": 11121, + "photoshop": 12419, + "photoshopped": 35738, + "phouse": 27848, + "php": 17370, + "phra": 12777, + "phrase": 18809, + "phrases": 35264, + "phs": 16495, + "phu": 21274, + "phuket": 34028, + "phx": 35466, + "phx": 29507, + "phy": 6484, + "phy": 4292, + "phyl": 35600, + "phyllis": 37844, + "phys": 3734, + "phys": 37894, + "physi": 13782, + "physic": 46641, + "physical": 44127, + "physical": 6671, + "physically": 18105, + "physician": 21055, + "physicians": 26702, + "physicist": 29052, + "physics": 9369, + "physio": 29574, + "physio": 29177, + "physiology": 32349, + "physique": 42884, + "phyto": 42197, + "pi": 741, + "pi": 5357, + "pia": 8918, + "pian": 24637, + "pianist": 21048, + "piano": 49278, + "piano": 7894, + "pianos": 47904, + "piazza": 28496, + "pic": 901, + "pic": 1282, + "pical": 5482, + "picard": 48507, + "picasso": 21481, + "piccad": 33876, + "piccadilly": 37287, + "piccollage": 43621, + "pick": 6379, + "pick": 3142, + "picked": 6018, + "picker": 43105, + "pickering": 47605, + "picket": 33559, + "picking": 9545, + "pickle": 24570, + "pickled": 21705, + "pickles": 25001, + "picks": 8551, + "pickup": 15382, + "pickups": 33383, + "picnic": 12007, + "pico": 23363, + "picoftheday": 18319, + "pics": 2559, + "pict": 18778, + "pictorial": 40640, + "picture": 11663, + "picture": 1674, + "pictured": 7647, + "pictures": 3646, + "picturesque": 24894, + "pid": 5225, + "piday": 48056, + "pie": 12065, + "pie": 5319, + "piece": 39632, + "piece": 2754, + "pieces": 6194, + "pied": 24686, + "pied": 12713, + "piedmont": 39691, + "pier": 5641, + "pier": 11348, + "pierc": 49216, + "pierce": 48462, + "pierce": 16782, + "pierced": 32799, + "piercing": 22557, + "piero": 43125, + "pierre": 34670, + "pierre": 11985, + "piers": 29030, + "pies": 6898, + "pieter": 44801, + "pietro": 42169, + "piff": 40719, + "pig": 12009, + "pig": 9619, + "pigeon": 18008, + "pigeons": 32910, + "piggy": 28245, + "pigment": 40284, + "pigs": 16228, + "pik": 48539, + "pika": 47372, + "pikach": 27268, + "pikachu": 28107, + "pike": 33457, + "pike": 14011, + "pil": 2893, + "pil": 20645, + "pilates": 29518, + "pile": 44403, + "pile": 13930, + "piled": 26873, + "piles": 31968, + "pilgri": 13966, + "pilgrim": 32662, + "pilgrimage": 24335, + "pilgrims": 31370, + "piling": 43050, + "pilip": 27234, + "pilipinas": 32392, + "pill": 14830, + "pill": 19226, + "pillar": 17322, + "pillars": 22054, + "pillow": 42237, + "pillow": 12182, + "pillows": 26499, + "pills": 23964, + "pilo": 37526, + "pilot": 31619, + "pilot": 6687, + "pilots": 15586, + "pilsner": 47153, + "pim": 15285, + "pim": 35472, + "pimp": 35789, + "pin": 2629, + "pin": 5164, + "pinball": 31679, + "pinch": 26114, + "pine": 9398, + "pine": 7374, + "pineapple": 14831, + "pines": 20338, + "ping": 23720, + "ping": 2089, + "pinion": 40557, + "pink": 11151, + "pink": 3360, + "pinkfloyd": 48520, + "pinky": 29803, + "pinn": 31448, + "pinnacle": 32754, + "pinned": 12165, + "pinning": 44515, + "pino": 36633, + "pinot": 41399, + "pinot": 21146, + "pinoy": 43578, + "pinoy": 35258, + "pins": 14619, + "pinst": 41173, + "pint": 42537, + "pint": 13584, + "pinterest": 15379, + "pinto": 35992, + "pints": 27935, + "pinup": 37349, + "pio": 22108, + "pion": 36728, + "pion": 29190, + "pione": 7975, + "pioneer": 34892, + "pioneer": 12459, + "pioneering": 25933, + "pioneers": 22383, + "pious": 42441, + "pip": 30854, + "pipe": 29333, + "pipe": 10459, + "pipel": 12387, + "pipeline": 14151, + "pipelines": 39683, + "piper": 47052, + "piper": 16293, + "pipes": 16991, + "piping": 40744, + "pippa": 47672, + "pir": 4351, + "pir": 38899, + "piracy": 39452, + "piran": 49034, + "pirate": 38680, + "pirate": 13592, + "pirates": 10442, + "pire": 16613, + "pires": 14988, + "pis": 9230, + "pis": 44441, + "pisa": 43632, + "pisces": 45982, + "piss": 20818, + "pissed": 17989, + "pist": 15556, + "pist": 32826, + "pistachi": 29760, + "pistachio": 36320, + "pistol": 20480, + "piston": 48236, + "pistons": 27242, + "pistor": 48162, + "pit": 2946, + "pit": 7476, + "pita": 27070, + "pitbull": 25295, + "pitch": 8992, + "pitch": 5872, + "pitched": 28447, + "pitcher": 13445, + "pitchers": 27835, + "pitches": 21005, + "pitching": 16455, + "piti": 47568, + "pits": 24144, + "pitt": 7607, + "pitt": 15599, + "pitts": 9531, + "pittsburgh": 10453, + "pity": 24380, + "pius": 39988, + "pivo": 18009, + "pivot": 31805, + "pivotal": 31432, + "pix": 6185, + "pix": 13088, + "pixar": 27493, + "pixel": 14384, + "pixel": 13241, + "pixelart": 18516, + "pixels": 34099, + "pixie": 35573, + "piyu": 30772, + "piyush": 36191, + "piyushgoyal": 45318, + "pizz": 3897, + "pizza": 4474, + "pizzas": 30647, + "pizzeria": 44174, + "pj": 12524, + "pj": 17179, + "pjnet": 22011, + "pjs": 36009, + "pk": 10149, + "pk": 10991, + "pkg": 49011, + "pkk": 47480, + "pknot": 41779, + "pkwy": 36827, + "pl": 712, + "pl": 5678, + "pla": 841, + "pla": 19945, + "plac": 2331, + "place": 14884, + "place": 1445, + "placed": 9729, + "placement": 16724, + "placements": 43885, + "placer": 49170, + "places": 4448, + "placing": 18531, + "plague": 25360, + "plaid": 23291, + "plain": 22776, + "plain": 10709, + "plains": 16345, + "plan": 1740, + "plan": 2970, + "pland": 24801, + "plane": 22728, + "plane": 5363, + "planes": 12581, + "planet": 16833, + "planet": 5172, + "planetary": 28361, + "planets": 22315, + "plank": 30991, + "plankton": 48249, + "plann": 6409, + "planned": 8169, + "planner": 18083, + "planners": 33664, + "planning": 4446, + "plano": 34063, + "plans": 4181, + "plant": 8521, + "plant": 3912, + "plantation": 20014, + "plantbased": 33720, + "planted": 14286, + "planter": 34453, + "planters": 43661, + "planting": 13922, + "plants": 5829, + "plaque": 16097, + "plaques": 45610, + "plar": 26754, + "plas": 45673, + "plasma": 24999, + "plaster": 31980, + "plastic": 15645, + "plastic": 6102, + "plasticpollution": 47129, + "plastics": 20999, + "plasticsurgery": 48555, + "plat": 3172, + "plata": 46456, + "plate": 28744, + "plate": 5135, + "plateau": 29301, + "plated": 21161, + "plates": 11485, + "platform": 5549, + "platforms": 13551, + "platin": 10267, + "plating": 44564, + "platinum": 10979, + "plato": 41101, + "platoon": 41254, + "platt": 44459, + "platt": 40097, + "platte": 46785, + "platter": 29071, + "platz": 40878, + "plau": 39139, + "play": 1222, + "play": 1453, + "playa": 23756, + "playable": 33885, + "playback": 39194, + "playbook": 34856, + "playboy": 24383, + "played": 3432, + "player": 24503, + "player": 2477, + "players": 3030, + "playful": 23871, + "playground": 15861, + "playhouse": 23254, + "playin": 24674, + "playing": 47368, + "playing": 1629, + "playlist": 9180, + "playlists": 47183, + "playo": 5804, + "playoff": 9655, + "playoffs": 9548, + "plays": 5134, + "playstation": 11332, + "playtime": 43037, + "playwright": 32070, + "plaza": 8943, + "plc": 16827, + "ple": 926, + "ple": 1619, + "plea": 21956, + "plead": 47539, + "pleads": 31425, + "plear": 21362, + "pleas": 8481, + "pleas": 48740, + "pleasant": 12271, + "please": 41074, + "please": 1474, + "pleased": 6107, + "pleasing": 32893, + "pleasure": 5854, + "pleasures": 29513, + "pledge": 11507, + "pledged": 36799, + "pledges": 26746, + "pledis": 41202, + "plein": 43429, + "plenary": 19891, + "plenty": 7524, + "pler": 17677, + "ples": 6248, + "pless": 39821, + "pless": 17059, + "plets": 43230, + "plex": 23765, + "plex": 15241, + "pley": 19543, + "pli": 30001, + "pli": 45797, + "plic": 5806, + "plicity": 19823, + "plight": 40317, + "plin": 44531, + "plin": 32335, + "pline": 25376, + "pling": 12899, + "plings": 31184, + "pll": 47629, + "pll": 25266, + "pln": 48755, + "plo": 1778, + "plo": 43523, + "plor": 34695, + "plot": 9918, + "plots": 25672, + "plotting": 30751, + "plough": 33811, + "plow": 38363, + "pls": 5572, + "plu": 2052, + "plug": 12628, + "plugged": 23261, + "plugin": 31278, + "plugins": 48797, + "plugs": 28083, + "plum": 26267, + "plum": 16202, + "plumb": 21769, + "plumber": 43478, + "plumbing": 24647, + "plume": 39495, + "plun": 15122, + "plunge": 26506, + "plur": 44664, + "plus": 3097, + "plush": 18926, + "pluto": 26380, + "ply": 17249, + "ply": 28705, + "plying": 36071, + "plym": 11907, + "plymouth": 13786, + "plz": 10538, + "pm": 13699, + "pm": 990, + "pmi": 41206, + "pmln": 23208, + "pmo": 18782, + "pmoindia": 20374, + "pms": 44223, + "pn": 14431, + "pn": 13774, + "pnc": 37148, + "pne": 30966, + "pneu": 28714, + "pneumonia": 42906, + "png": 20992, + "pnp": 25972, + "pnpp": 42175, + "pnw": 31521, + "po": 628, + "po": 3057, + "poa": 43912, + "poached": 27665, + "poaching": 35140, + "poc": 13232, + "poc": 27780, + "pocaly": 37987, + "pocalypse": 42307, + "poche": 38336, + "poche": 39022, + "pocket": 29147, + "pocket": 8504, + "pockets": 19566, + "pocon": 41850, + "pod": 3583, + "pod": 7446, + "podcast": 39654, + "podcast": 4294, + "podcasting": 40106, + "podcasts": 19392, + "pode": 33368, + "poder": 24960, + "podernfamily": 26620, + "podi": 32853, + "podium": 14093, + "pods": 18776, + "poe": 4746, + "poe": 19254, + "poem": 9436, + "poems": 15577, + "poet": 41019, + "poet": 9872, + "poetic": 26365, + "poetry": 20192, + "poetry": 6038, + "poetryday": 39255, + "poets": 19804, + "pof": 40850, + "poff": 28236, + "pogba": 25998, + "poign": 29682, + "poignant": 32138, + "poin": 9074, + "point": 13280, + "point": 2301, + "pointe": 24631, + "pointed": 20703, + "pointer": 29883, + "pointers": 36760, + "pointing": 19233, + "pointless": 33586, + "points": 3396, + "pois": 17008, + "poise": 45087, + "poised": 27354, + "poison": 30722, + "poison": 17074, + "poisoned": 43624, + "poisoning": 25750, + "poisonous": 37131, + "pok": 15387, + "poke": 6892, + "poke": 23186, + "pokemon": 16239, + "pokemon": 9528, + "pokemongo": 23985, + "poker": 30735, + "poker": 11865, + "pokes": 40221, + "poking": 49169, + "poké": 20656, + "pokémon": 22066, + "pol": 977, + "pol": 7649, + "pola": 43876, + "poland": 9834, + "polar": 21432, + "polar": 12214, + "polari": 27919, + "polaris": 37965, + "polarized": 48437, + "polaro": 25237, + "polaroid": 30427, + "poldark": 41322, + "pole": 26682, + "pole": 8170, + "poles": 22585, + "poli": 9675, + "poli": 5414, + "polic": 16126, + "police": 15535, + "police": 2120, + "policeman": 37713, + "policemen": 47946, + "polici": 10819, + "policies": 10993, + "policing": 20969, + "policy": 30173, + "policy": 4660, + "polio": 30533, + "polis": 16133, + "polish": 46941, + "polish": 9632, + "polished": 21478, + "polishing": 43629, + "polit": 2247, + "politan": 15337, + "polite": 31497, + "politi": 40597, + "politic": 33333, + "political": 37744, + "political": 4197, + "politically": 24323, + "politician": 15960, + "politicians": 12914, + "politico": 39403, + "politics": 4929, + "polk": 33317, + "polka": 29476, + "poll": 7032, + "pollen": 27651, + "pollin": 19152, + "pollinators": 36599, + "polling": 18024, + "pollo": 42755, + "pollock": 37614, + "polls": 11813, + "pollu": 8370, + "polluted": 43346, + "pollution": 10384, + "polly": 31204, + "polo": 35928, + "polo": 10229, + "poly": 6833, + "poly": 18367, + "polye": 31730, + "polyester": 38514, + "polym": 23626, + "polymer": 29993, + "polyne": 38892, + "polyvore": 24771, + "pom": 7548, + "pom": 24280, + "pome": 27963, + "pomegran": 29326, + "pomegranate": 32415, + "pomer": 35156, + "pomona": 41690, + "pompe": 18352, + "pompeii": 47775, + "pompeo": 34351, + "pompey": 35079, + "pon": 3809, + "pon": 22391, + "ponce": 43637, + "pond": 10750, + "ponder": 36863, + "pondering": 47395, + "ponds": 31033, + "pone": 32183, + "pong": 40546, + "pong": 17710, + "ponies": 34157, + "pons": 41255, + "pont": 47563, + "pont": 22997, + "ponte": 40892, + "ponti": 15527, + "pontiac": 25373, + "pontifex": 33566, + "ponty": 45152, + "pony": 24438, + "pony": 12678, + "ponytail": 43265, + "poo": 6601, + "poo": 14389, + "pooch": 37037, + "poodle": 34961, + "pooh": 27103, + "pooja": 35676, + "pool": 12484, + "pool": 2831, + "poole": 26290, + "pools": 18736, + "poolside": 35509, + "poon": 33799, + "poon": 36178, + "poop": 23310, + "poor": 14528, + "poor": 3665, + "poorest": 40771, + "poorly": 21101, + "pop": 6530, + "pop": 2852, + "popart": 47425, + "popcorn": 15034, + "pope": 16994, + "pope": 9283, + "popefrancis": 37254, + "poplar": 38726, + "popo": 38835, + "popo": 35572, + "popp": 13156, + "popped": 14934, + "poppies": 30385, + "poppin": 28536, + "popping": 18152, + "poppins": 41216, + "poppy": 32194, + "poppy": 15447, + "pops": 11705, + "popsic": 38481, + "popu": 3785, + "popul": 6593, + "popular": 15854, + "popular": 4368, + "popularity": 19235, + "populated": 38420, + "population": 8423, + "populations": 23797, + "populism": 48998, + "populist": 49376, + "popup": 33053, + "por": 817, + "por": 7697, + "pora": 23537, + "porcel": 19409, + "porcelain": 20451, + "porch": 17154, + "pore": 28267, + "pork": 40379, + "pork": 7897, + "poro": 48110, + "porridge": 34924, + "porsch": 48009, + "porsche": 44049, + "porsche": 8783, + "port": 1641, + "port": 1418, + "porta": 45037, + "portable": 11949, + "portage": 32087, + "portal": 14982, + "porte": 28654, + "ported": 16879, + "porter": 28319, + "porter": 10318, + "porters": 15670, + "portfoli": 45766, + "portfolio": 11938, + "porth": 37425, + "porti": 45760, + "porting": 26052, + "portion": 13739, + "portions": 22914, + "portland": 38366, + "portland": 8880, + "portman": 34755, + "porto": 24853, + "porto": 18947, + "portobello": 48025, + "portra": 4175, + "portrait": 39312, + "portrait": 5352, + "portraits": 14203, + "portray": 46282, + "portrayal": 39238, + "portrayed": 36093, + "ports": 7734, + "portsm": 17063, + "portsmouth": 19074, + "portu": 7159, + "portugal": 9503, + "portugue": 17498, + "portuguese": 18019, + "pos": 1780, + "pos": 11839, + "pose": 25478, + "pose": 4230, + "posed": 5206, + "posei": 47270, + "poser": 46899, + "poses": 9773, + "posey": 34852, + "posh": 26748, + "posing": 10518, + "posit": 28793, + "positi": 7895, + "position": 4657, + "positioned": 34482, + "positioning": 30657, + "positions": 12188, + "positive": 21811, + "positive": 4844, + "positively": 24688, + "positivity": 19966, + "poss": 39745, + "posse": 17414, + "posse": 28413, + "possess": 36810, + "possessed": 36220, + "possession": 16154, + "possessions": 40588, + "possi": 2521, + "possibilities": 17932, + "possibility": 18517, + "possible": 3134, + "possibly": 8601, + "possum": 38575, + "post": 3489, + "post": 1549, + "postage": 27570, + "postal": 21687, + "postcard": 14785, + "postcards": 23922, + "postdoc": 41013, + "posted": 4752, + "poster": 22881, + "poster": 3574, + "posters": 9673, + "postgame": 34873, + "postgraduate": 31997, + "posthum": 42410, + "posting": 7559, + "postman": 38285, + "postpon": 23247, + "postponed": 25097, + "posts": 7824, + "postseason": 24521, + "posture": 29681, + "posure": 35539, + "pot": 3547, + "pot": 5168, + "potam": 45825, + "potassi": 36889, + "potassium": 37147, + "potat": 5975, + "potato": 8527, + "potatoes": 11567, + "potd": 28765, + "pote": 41869, + "poten": 4454, + "potent": 26082, + "potenti": 44104, + "potential": 5100, + "potentially": 16508, + "potholes": 47506, + "potion": 46055, + "potom": 38848, + "potomac": 43372, + "pots": 19234, + "pott": 28698, + "potted": 48581, + "potter": 24975, + "potter": 9026, + "pottery": 18396, + "potts": 39839, + "potty": 43569, + "potus": 8740, + "pou": 9423, + "pouch": 26811, + "poul": 22485, + "poultry": 31005, + "poun": 33719, + "pound": 33809, + "pound": 10674, + "pounding": 46544, + "pounds": 10752, + "pour": 33112, + "pour": 8180, + "poured": 26621, + "pouring": 16098, + "pours": 26005, + "pout": 39621, + "poutine": 43768, + "pov": 25731, + "pover": 8432, + "pover": 29464, + "poverty": 9095, + "pow": 1317, + "pow": 17745, + "powder": 32427, + "powder": 9674, + "powe": 36955, + "powell": 13305, + "power": 2789, + "power": 1807, + "powerball": 47803, + "powered": 45442, + "powered": 7332, + "powerful": 4875, + "powerhouse": 22858, + "powering": 16231, + "powerof": 31961, + "powerpoint": 38940, + "powerrangers": 40620, + "powers": 9422, + "pox": 43649, + "poy": 34737, + "poyn": 47655, + "poz": 39953, + "pp": 604, + "pp": 4186, + "ppa": 10416, + "ppard": 23391, + "ppc": 27778, + "ppe": 24573, + "ppe": 11867, + "pped": 1873, + "ppel": 46523, + "ppen": 30663, + "pper": 6719, + "pper": 2440, + "ppers": 5232, + "ppery": 27833, + "ppet": 20744, + "ppets": 25849, + "ppg": 27433, + "ppi": 9594, + "ppie": 33795, + "ppin": 8076, + "pping": 22214, + "pping": 1682, + "ppings": 35687, + "ppl": 6758, + "pple": 12302, + "ppm": 42053, + "ppo": 10215, + "ppor": 37613, + "ppp": 14017, + "pps": 10683, + "ppv": 38864, + "ppy": 30360, + "ppy": 3860, + "pr": 766, + "pr": 4150, + "pra": 1865, + "pra": 19285, + "prab": 17901, + "prabhas": 29959, + "prabhu": 31529, + "prac": 2243, + "practi": 29995, + "practic": 5495, + "practical": 10792, + "practically": 25588, + "practice": 3349, + "practiced": 36749, + "practices": 9040, + "practicing": 12750, + "practise": 38938, + "practising": 36478, + "practiti": 19909, + "practitioner": 32591, + "practitioners": 29045, + "prada": 29456, + "pradesh": 15384, + "prado": 44141, + "prag": 31025, + "prague": 14940, + "prairi": 12629, + "prairie": 14753, + "praise": 10013, + "praised": 27649, + "praises": 23049, + "praising": 36961, + "prakash": 43708, + "prakash": 25366, + "pram": 47774, + "pran": 20048, + "prank": 23654, + "pras": 41562, + "prasad": 29562, + "prat": 23069, + "prati": 45773, + "pratt": 37863, + "pratt": 23396, + "prawn": 33102, + "prawns": 34903, + "pray": 12671, + "pray": 6041, + "prayed": 34665, + "prayer": 41452, + "prayer": 6583, + "prayers": 8393, + "prayfor": 18443, + "praying": 11550, + "prays": 46602, + "prc": 28781, + "pre": 679, + "pre": 2900, + "preach": 22545, + "preacher": 29357, + "preaching": 23642, + "precau": 36532, + "precautions": 47845, + "prece": 15361, + "preci": 5470, + "precin": 27908, + "precinct": 32587, + "precious": 8226, + "precipit": 27463, + "precipitation": 33399, + "precise": 24457, + "precisely": 34954, + "precision": 44021, + "precision": 15621, + "pred": 40370, + "predat": 13364, + "predator": 20653, + "predators": 25569, + "prede": 38454, + "predecess": 38963, + "predic": 4876, + "predict": 16900, + "predictable": 25344, + "predicted": 18702, + "predicting": 30414, + "prediction": 16296, + "predictions": 15125, + "predictive": 29798, + "predicts": 25960, + "preds": 40125, + "pree": 47026, + "preet": 30131, + "prefe": 14542, + "prefecture": 32890, + "prefer": 33426, + "prefer": 11450, + "preference": 35057, + "preferences": 38118, + "preferred": 18772, + "prefers": 38528, + "pregame": 18575, + "pregn": 7190, + "pregnancy": 12769, + "pregnant": 11195, + "prehistoric": 32750, + "prejudice": 28337, + "preli": 15523, + "prelimin": 19990, + "preliminary": 20997, + "prelims": 43223, + "prelude": 42966, + "prem": 32090, + "prem": 21724, + "premature": 39253, + "premi": 2413, + "premier": 16996, + "premier": 5539, + "premiere": 5367, + "premiered": 27652, + "premieres": 19907, + "premiering": 32615, + "premierleague": 22608, + "premiers": 44883, + "premiership": 23665, + "premiosm": 38460, + "premiosmtvmiaw": 38630, + "premise": 45952, + "premises": 27266, + "premium": 8011, + "pren": 20801, + "preneur": 46288, + "preorder": 16703, + "preorders": 45985, + "prep": 6430, + "prep": 7277, + "prepa": 26270, + "prepaid": 42934, + "prepar": 4968, + "preparation": 11651, + "preparations": 19135, + "prepare": 7014, + "prepared": 7677, + "preparedness": 29492, + "prepares": 16375, + "preparing": 7365, + "prepped": 34379, + "prepping": 16459, + "preps": 14765, + "prequel": 40461, + "pres": 1385, + "pres": 8529, + "presale": 27135, + "presby": 30447, + "presbyter": 33959, + "presbyterian": 35370, + "preschool": 24354, + "prescott": 29392, + "prescri": 14851, + "prescribed": 36968, + "prescription": 23061, + "preseason": 13813, + "presen": 16742, + "presence": 8848, + "present": 2344, + "present": 2881, + "presentation": 4594, + "presentations": 16998, + "presented": 4587, + "presenter": 18587, + "presenters": 32759, + "presenting": 5339, + "presents": 4215, + "preserv": 17616, + "preservation": 21074, + "preserve": 15570, + "preserved": 23161, + "preserves": 44881, + "preserving": 32315, + "presi": 1697, + "presiden": 43374, + "presidency": 18077, + "president": 19900, + "president": 1940, + "presidente": 47363, + "presidenti": 48297, + "presidential": 8503, + "presidents": 16726, + "presiding": 45298, + "presley": 30013, + "press": 4124, + "press": 2124, + "pressed": 20080, + "presser": 27826, + "presses": 33748, + "pressing": 20893, + "pressure": 6083, + "pressures": 38487, + "prest": 41840, + "presti": 12245, + "prestige": 29328, + "prestigious": 15888, + "presto": 42211, + "preston": 37335, + "preston": 15179, + "presu": 21667, + "presumably": 42562, + "pret": 9652, + "preten": 15871, + "pretend": 18111, + "pretending": 21306, + "pretoria": 36080, + "prett": 46667, + "prettier": 31745, + "prettiest": 22866, + "pretty": 18286, + "pretty": 2111, + "pretz": 24890, + "pretzel": 36707, + "pretzels": 45468, + "prev": 20274, + "prevail": 31637, + "prevalence": 41729, + "prevalent": 46260, + "preven": 29382, + "prevent": 26436, + "prevent": 7968, + "preventable": 44250, + "prevented": 35356, + "preventing": 21756, + "prevention": 9500, + "preventive": 40949, + "prevents": 31746, + "preview": 4449, + "previews": 20279, + "previous": 9252, + "previously": 13359, + "prey": 17131, + "prez": 17956, + "pri": 955, + "pri": 23400, + "pric": 24275, + "price": 13254, + "price": 2827, + "priced": 16934, + "priceless": 15743, + "prices": 5954, + "pricing": 14800, + "prick": 43921, + "prick": 46516, + "pride": 15323, + "pride": 3436, + "pridemonth": 41410, + "prie": 22477, + "priest": 38756, + "priest": 14222, + "priests": 30005, + "prim": 22004, + "prima": 35611, + "prima": 33277, + "primal": 36604, + "primar": 21579, + "primaries": 46126, + "primarily": 29465, + "primark": 48329, + "primary": 35024, + "primary": 5814, + "primavera": 44899, + "prime": 14162, + "prime": 5183, + "primed": 45694, + "primer": 22388, + "primetime": 29763, + "primitive": 37467, + "primo": 43215, + "primrose": 45891, + "prin": 1588, + "prince": 9457, + "prince": 4735, + "princes": 45329, + "princes": 30136, + "princess": 24123, + "princess": 5079, + "princesses": 34161, + "princeton": 22433, + "princi": 5129, + "principal": 33599, + "principal": 8860, + "principals": 27524, + "principle": 19595, + "principles": 13755, + "print": 17851, + "print": 3557, + "printable": 29648, + "printed": 7978, + "printer": 14521, + "printers": 27881, + "printing": 7369, + "printmaking": 38669, + "prints": 7704, + "prior": 20328, + "prior": 10572, + "priorit": 47773, + "prioriti": 28822, + "priorities": 15232, + "prioritize": 46715, + "priority": 12451, + "priory": 38665, + "prisc": 32468, + "priscilla": 42396, + "prise": 23343, + "prism": 49311, + "prism": 34356, + "prison": 9281, + "prison": 6622, + "prisoner": 21427, + "prisoners": 17460, + "prisons": 26607, + "pristine": 30618, + "prit": 41668, + "prit": 37523, + "prith": 39173, + "prius": 43561, + "priv": 3270, + "privacy": 10437, + "private": 20362, + "private": 4439, + "privately": 32970, + "privati": 27379, + "privi": 8367, + "privileg": 18015, + "privilege": 11537, + "privileged": 18166, + "prix": 10875, + "priya": 31275, + "priyan": 16488, + "priyanka": 31959, + "priyankach": 30030, + "priyankachopra": 30264, + "prize": 48222, + "prize": 4521, + "prized": 38769, + "prizes": 9268, + "prk": 37094, + "pro": 644, + "pro": 2630, + "proactive": 33364, + "prob": 17706, + "prob": 24007, + "probab": 3907, + "probability": 32637, + "probable": 42444, + "probably": 4047, + "probation": 36531, + "probe": 14359, + "probes": 48564, + "probiotics": 49395, + "proble": 2719, + "problem": 4324, + "problematic": 33767, + "problems": 4671, + "probs": 16330, + "probz": 34243, + "proc": 38417, + "proce": 4076, + "procedu": 18204, + "procedural": 48177, + "procedure": 20163, + "procedures": 21109, + "proceed": 26664, + "proceed": 33894, + "proceedings": 26953, + "proceeds": 11882, + "process": 17291, + "process": 4078, + "processed": 23816, + "processes": 15169, + "processing": 11737, + "procession": 26288, + "processor": 22838, + "processors": 43634, + "proclaimed": 34489, + "proclamation": 32065, + "procra": 25361, + "procrastin": 25586, + "procrastination": 42825, + "procreate": 39336, + "proctor": 47204, + "procu": 21001, + "procurement": 23733, + "prod": 44349, + "prod": 11991, + "prodi": 27759, + "prodigy": 31973, + "produ": 27852, + "produc": 1471, + "produce": 7529, + "produced": 7479, + "producer": 7064, + "producers": 13883, + "produces": 19940, + "producing": 13579, + "product": 32602, + "product": 4306, + "production": 4146, + "productions": 14166, + "productive": 9697, + "productivity": 12800, + "products": 3964, + "prof": 15043, + "prof": 5488, + "profe": 2611, + "profess": 5486, + "professi": 3705, + "profession": 8104, + "profession": 19671, + "professional": 46007, + "professional": 4774, + "professionalism": 41252, + "professionally": 33892, + "professionals": 10165, + "professor": 47302, + "professor": 6092, + "professors": 27758, + "profici": 34685, + "profile": 14291, + "profile": 6444, + "profiles": 22070, + "profiling": 37123, + "profit": 16941, + "profit": 7909, + "profitable": 25465, + "profits": 13410, + "profound": 48245, + "profound": 22998, + "profs": 19260, + "prog": 22219, + "progno": 46070, + "program": 4162, + "program": 2737, + "programme": 6322, + "programmer": 37001, + "programmes": 20468, + "programming": 10831, + "programs": 7345, + "progre": 7069, + "progress": 4421, + "progressi": 23297, + "progressing": 32346, + "progression": 24772, + "progressive": 12208, + "progressives": 41709, + "prohi": 41124, + "prohib": 45040, + "prohibition": 34440, + "proj": 39156, + "proje": 48345, + "projec": 1610, + "project": 15911, + "project": 1965, + "projected": 22873, + "projection": 22384, + "projections": 34638, + "projector": 27816, + "projects": 5090, + "proli": 19710, + "prolife": 32126, + "prolifer": 39018, + "prolific": 27839, + "prolly": 45968, + "prolon": 35379, + "prolonged": 41972, + "prom": 40363, + "prom": 7944, + "prome": 34355, + "promen": 33578, + "promenade": 35522, + "promethe": 44183, + "promin": 35217, + "prominent": 19172, + "promis": 3963, + "promise": 6745, + "promised": 11516, + "promises": 12064, + "promising": 14183, + "promo": 3037, + "promo": 6755, + "promos": 35044, + "promote": 47384, + "promote": 8003, + "promoted": 16395, + "promoter": 33081, + "promotes": 20169, + "promoting": 9695, + "promotion": 9259, + "promotional": 17619, + "promotions": 19142, + "promp": 11671, + "prompt": 20198, + "prompted": 45746, + "prompts": 33490, + "proms": 37759, + "pron": 13285, + "prone": 30964, + "pronoun": 23022, + "pronounce": 40489, + "pronounced": 34109, + "pronto": 44296, + "proof": 17020, + "proof": 5248, + "proofing": 35679, + "proofs": 41023, + "prop": 19123, + "prop": 16254, + "propag": 12151, + "propaganda": 14718, + "propane": 45546, + "propel": 48439, + "propeller": 47404, + "proper": 3577, + "proper": 8205, + "properly": 12560, + "properties": 10922, + "property": 26486, + "property": 5043, + "prophe": 9662, + "prophecy": 32501, + "prophet": 15549, + "prophetic": 47476, + "prophets": 39441, + "propor": 35016, + "proportion": 35775, + "proportions": 39391, + "propos": 9455, + "proposal": 12139, + "proposals": 20568, + "propose": 28471, + "proposed": 10615, + "proposes": 27133, + "proposing": 42631, + "proposition": 44780, + "propri": 28243, + "props": 15249, + "propulsion": 49380, + "pros": 33925, + "pros": 14147, + "prosciutto": 46565, + "prose": 47063, + "prose": 28675, + "prosecco": 28839, + "prosecu": 12136, + "prosecution": 30902, + "prosecutor": 23736, + "prosecutors": 31656, + "prosp": 24242, + "prospec": 12693, + "prospect": 11211, + "prospective": 28034, + "prospects": 15372, + "prosper": 16121, + "prosper": 33526, + "prosperity": 17203, + "prosperous": 28252, + "prost": 47923, + "prostate": 28808, + "prostatec": 49064, + "prosthetic": 44602, + "prostitu": 37333, + "protag": 28950, + "protagonist": 38183, + "prote": 1845, + "protec": 5640, + "protect": 25563, + "protect": 4817, + "protected": 12266, + "protecting": 11710, + "protection": 6238, + "protections": 33772, + "protective": 17028, + "protector": 20441, + "protectors": 45039, + "protects": 21889, + "protein": 8088, + "proteins": 28661, + "protest": 6279, + "protestant": 46945, + "protested": 48089, + "protester": 42073, + "protesters": 12660, + "protesting": 18788, + "protestors": 27822, + "protests": 12450, + "proto": 8672, + "proto": 44958, + "protocol": 19938, + "protocols": 39631, + "proton": 40009, + "prototype": 16675, + "prototyping": 42081, + "prou": 5739, + "proud": 11080, + "proud": 1679, + "prouder": 39585, + "proudest": 46806, + "proudly": 11203, + "proudof": 48184, + "proudtobe": 35043, + "prov": 23772, + "prov": 35021, + "prove": 10107, + "proved": 16473, + "proven": 35405, + "proven": 14569, + "provence": 28067, + "prover": 18312, + "proverb": 34419, + "proverbs": 27016, + "proves": 16119, + "provi": 2289, + "provide": 4832, + "provided": 9046, + "providence": 19331, + "provider": 14409, + "providers": 17120, + "provides": 7161, + "providing": 7250, + "provin": 12074, + "province": 8978, + "provinces": 35050, + "provincial": 16002, + "proving": 18055, + "provision": 30148, + "provisional": 36008, + "provisions": 39269, + "provo": 15367, + "provoc": 31618, + "provocative": 43809, + "provoking": 25510, + "provost": 36627, + "prow": 38737, + "prowrestling": 39825, + "prox": 41616, + "proxim": 31436, + "proximity": 38298, + "proxy": 31680, + "prs": 23879, + "pru": 12961, + "pruitt": 39453, + "prun": 29029, + "pruning": 48133, + "pry": 31965, + "pryor": 43375, + "ps": 3982, + "ps": 814, + "psa": 14031, + "psal": 13859, + "psalm": 17995, + "psalms": 35003, + "psb": 37017, + "psc": 43118, + "psd": 28810, + "pse": 19737, + "pse": 5423, + "pseu": 24919, + "pseudo": 46618, + "psg": 17123, + "psi": 45848, + "psi": 24533, + "psic": 29299, + "psis": 33041, + "psl": 21373, + "psn": 36781, + "pso": 27045, + "pson": 7487, + "psori": 44688, + "psp": 32769, + "pss": 35718, + "pss": 42535, + "psst": 47814, + "pst": 12692, + "psu": 41286, + "psu": 28338, + "psv": 44530, + "psy": 3576, + "psy": 11056, + "psych": 31041, + "psych": 20509, + "psyched": 19932, + "psyched": 35199, + "psychedelic": 23292, + "psychi": 18147, + "psychiatric": 30578, + "psychiatry": 39706, + "psychic": 24916, + "psycho": 6472, + "psycho": 22154, + "psychological": 18153, + "psychologist": 32827, + "psychology": 12352, + "psychop": 30112, + "psychotic": 48774, + "pt": 11139, + "pt": 1459, + "pta": 11586, + "ptbo": 40481, + "ptc": 44646, + "pte": 47804, + "pter": 49323, + "pti": 29375, + "pti": 10491, + "ptic": 20670, + "ption": 3479, + "ptions": 24963, + "pto": 31372, + "pto": 34092, + "pton": 19780, + "pts": 5886, + "ptsd": 23973, + "ptv": 42402, + "pu": 755, + "pu": 11780, + "pub": 20720, + "pub": 6301, + "puberty": 44122, + "pubg": 31496, + "publ": 3434, + "publi": 1617, + "public": 3592, + "public": 2122, + "publica": 49007, + "publication": 13538, + "publications": 27334, + "publichealth": 35872, + "publicity": 20831, + "publicly": 18554, + "publish": 19032, + "published": 4311, + "publisher": 20455, + "publishers": 25222, + "publishes": 35633, + "publishing": 10994, + "publix": 47985, + "pubs": 21099, + "puc": 48779, + "puck": 17550, + "pud": 39234, + "pudding": 14025, + "puddle": 33545, + "pue": 20161, + "pueblo": 33076, + "puer": 8968, + "puerto": 12289, + "puertor": 22757, + "puertorico": 26356, + "puff": 44477, + "puff": 17184, + "puffin": 47632, + "puffs": 47453, + "puffy": 49245, + "pug": 20950, + "pug": 17739, + "pugchat": 42266, + "pugh": 41302, + "puglia": 38345, + "pugs": 39425, + "puj": 46163, + "puja": 33753, + "puk": 31811, + "pul": 2469, + "pul": 40512, + "pula": 45856, + "puli": 47293, + "pulit": 27745, + "pulitzer": 31419, + "pull": 20155, + "pull": 6857, + "pulled": 8525, + "pulling": 12897, + "pullman": 40203, + "pullover": 44020, + "pulls": 16041, + "pulmon": 32613, + "pulmonary": 39132, + "pulp": 25410, + "pulse": 40091, + "pulse": 12485, + "pulses": 42177, + "pulsion": 35398, + "pum": 37497, + "puma": 20858, + "pump": 5179, + "pump": 9173, + "pumped": 12796, + "pumping": 25150, + "pumpkin": 36386, + "pumpkin": 8842, + "pumpkins": 23787, + "pumps": 18540, + "pun": 2707, + "pun": 19929, + "punc": 43907, + "punch": 29332, + "punch": 10730, + "punched": 31689, + "punches": 35279, + "punching": 33468, + "punctu": 31565, + "punctuation": 47051, + "pundit": 41466, + "pune": 32593, + "pune": 14488, + "pung": 45420, + "puni": 11479, + "punish": 34569, + "punished": 31598, + "punisher": 38509, + "punishment": 19099, + "punjab": 19405, + "punjab": 12883, + "punjabi": 25430, + "punk": 28933, + "punk": 7246, + "punks": 47171, + "puns": 35231, + "punt": 32699, + "punta": 34112, + "punter": 47092, + "pup": 11926, + "pup": 11302, + "pupil": 27265, + "pupils": 13628, + "pupp": 7116, + "puppet": 18439, + "puppets": 28475, + "puppies": 14820, + "puppy": 25431, + "puppy": 6829, + "puppylove": 40849, + "pups": 20778, + "pur": 1727, + "pur": 6265, + "pura": 25596, + "puram": 46174, + "purcell": 46065, + "purch": 8384, + "purchase": 5481, + "purchased": 13399, + "purchases": 21887, + "purchasing": 20718, + "purdu": 40691, + "purdue": 22280, + "pure": 14202, + "pure": 5979, + "puree": 45474, + "purely": 32459, + "puremichigan": 39783, + "purest": 45497, + "purge": 33514, + "puri": 16910, + "puri": 21974, + "purification": 47724, + "purity": 29780, + "purple": 17837, + "purple": 5496, + "purpose": 33492, + "purpose": 7391, + "purposes": 22020, + "purr": 49262, + "purr": 46343, + "purse": 16480, + "pursue": 19463, + "pursuing": 26424, + "pursuit": 16469, + "purée": 40981, + "pus": 13841, + "pusa": 40825, + "push": 16028, + "push": 6831, + "pushaw": 35407, + "pushaward": 35448, + "pushawards": 47184, + "pushed": 16155, + "pushes": 23828, + "pushing": 11549, + "put": 29535, + "put": 1983, + "putin": 10693, + "putnam": 40235, + "puts": 7898, + "putt": 30279, + "putter": 44723, + "putting": 5154, + "puzz": 19760, + "puzzle": 12875, + "puzzles": 27986, + "pv": 14517, + "pv": 13495, + "pvc": 26959, + "pvp": 44172, + "pvt": 29898, + "pw": 19419, + "pw": 16067, + "pwc": 22965, + "px": 24790, + "px": 10262, + "pxrtg": 36262, + "py": 4005, + "py": 7504, + "pye": 31099, + "pyeongchang": 36066, + "pyg": 41450, + "pyram": 14405, + "pyramid": 18725, + "pyramids": 36877, + "pyrene": 36740, + "pyrenees": 39744, + "pyro": 39762, + "python": 13370, + "pz": 48361, + "pé": 43167, + "q": 80, + "q": 336, + "qa": 24944, + "qa": 16360, + "qad": 27844, + "qadri": 35672, + "qaeda": 31246, + "qanda": 48672, + "qanon": 19182, + "qant": 35404, + "qantas": 43250, + "qatar": 32804, + "qatar": 10872, + "qb": 8073, + "qbs": 38188, + "qc": 17406, + "qe": 30974, + "qf": 27215, + "qi": 25054, + "qi": 11256, + "qing": 46522, + "qing": 34339, + "ql": 28366, + "qld": 23039, + "qld": 13765, + "qldpol": 42296, + "qm": 42148, + "qotd": 24504, + "qpr": 24788, + "qq": 31960, + "qr": 18193, + "qs": 14364, + "qt": 15013, + "qtr": 44803, + "qu": 666, + "qu": 28646, + "qua": 20363, + "quack": 45575, + "quad": 11656, + "quad": 13419, + "quadcopter": 39792, + "quadru": 35831, + "quaid": 34265, + "quail": 34392, + "quaint": 45976, + "quake": 8421, + "quaker": 43395, + "quakes": 24572, + "qual": 9979, + "qual": 32405, + "qualcomm": 38683, + "quali": 4574, + "qualification": 21508, + "qualifications": 35225, + "qualified": 11927, + "qualifier": 18733, + "qualifiers": 21388, + "qualifies": 35820, + "qualify": 17019, + "qualifying": 11895, + "qualitative": 45847, + "qualities": 20488, + "quality": 28545, + "quality": 3027, + "quan": 11669, + "quan": 27490, + "quand": 28198, + "quant": 15050, + "quanti": 31540, + "quantitative": 40583, + "quantities": 33917, + "quantity": 26920, + "quantum": 15320, + "quar": 3856, + "quare": 42549, + "quarry": 27601, + "quart": 7851, + "quarter": 8816, + "quarter": 6632, + "quarterback": 16545, + "quarterfinal": 37992, + "quarterfinals": 28971, + "quarterly": 23350, + "quarters": 10146, + "quartet": 18056, + "quartz": 17752, + "quat": 25715, + "quattro": 40300, + "quay": 40276, + "quay": 17304, + "que": 1147, + "que": 2319, + "quebec": 15373, + "queen": 6407, + "queen": 2997, + "queenof": 44398, + "queens": 22943, + "queens": 9330, + "queensland": 15168, + "queer": 38874, + "queer": 18161, + "quel": 39774, + "quel": 21879, + "quen": 23876, + "quen": 38324, + "quent": 23808, + "quentin": 27530, + "quer": 17378, + "quer": 26859, + "quered": 23210, + "queries": 32958, + "querque": 30338, + "query": 27464, + "ques": 25328, + "ques": 7715, + "queso": 40110, + "quest": 31653, + "quest": 4846, + "questi": 2391, + "question": 18961, + "question": 4382, + "questionable": 30733, + "questioned": 31847, + "questioning": 24887, + "questions": 3883, + "quests": 44611, + "quet": 8513, + "quets": 39055, + "quetta": 38326, + "quette": 18993, + "queu": 32705, + "queue": 18549, + "queues": 40649, + "queuing": 44082, + "quez": 18677, + "quezon": 41117, + "qui": 1912, + "qui": 18046, + "quic": 26474, + "quiche": 47723, + "quick": 5969, + "quick": 3712, + "quicker": 29211, + "quickest": 37734, + "quickly": 7787, + "quid": 30732, + "quie": 43875, + "quien": 43482, + "quiere": 42723, + "quiero": 32567, + "quiet": 17853, + "quiet": 7557, + "quietly": 22208, + "quig": 44690, + "quil": 12305, + "quill": 48951, + "quilt": 23977, + "quilted": 46052, + "quin": 8607, + "quin": 17167, + "quincy": 27640, + "quind": 32339, + "quinn": 12306, + "quinoa": 26703, + "quins": 39701, + "quint": 26898, + "quinta": 47446, + "quinte": 22098, + "quintess": 37538, + "quintet": 35125, + "quipment": 42813, + "quir": 15943, + "quirky": 25044, + "quis": 15064, + "quist": 25128, + "quit": 19358, + "quit": 11140, + "quite": 4135, + "quito": 35828, + "quits": 32505, + "quitting": 33871, + "quity": 33133, + "quiz": 31197, + "quiz": 8344, + "quizz": 35041, + "quo": 3046, + "quo": 28127, + "quoi": 45549, + "quot": 5452, + "quot": 47587, + "quota": 42097, + "quotation": 49195, + "quote": 15446, + "quote": 4020, + "quoted": 27706, + "quoteoftheday": 19975, + "quotes": 5808, + "quoting": 31651, + "qur": 37782, + "quran": 19690, + "qureshi": 46307, + "qvist": 42322, + "qx": 45038, + "r": 81, + "r": 337, + "ra": 559, + "ra": 1735, + "raa": 44344, + "rab": 14816, + "rab": 33224, + "rabb": 6875, + "rabbi": 20959, + "rabbit": 10274, + "rabbits": 27028, + "rabhu": 25806, + "rable": 10182, + "rac": 1773, + "rac": 30462, + "raccoon": 29516, + "race": 10978, + "race": 2471, + "racec": 18814, + "racecourse": 25036, + "raced": 36021, + "racer": 16798, + "racers": 33603, + "races": 8605, + "raceway": 24650, + "rach": 6876, + "rach": 33429, + "racha": 21952, + "racha": 35022, + "rachael": 29095, + "rachel": 13511, + "rachel": 8029, + "raci": 33381, + "racial": 13801, + "racially": 43577, + "racing": 23306, + "racing": 3699, + "racism": 11276, + "racist": 9684, + "racists": 41777, + "rack": 24600, + "rack": 12034, + "racket": 37691, + "racks": 21191, + "rad": 4473, + "rad": 8238, + "rada": 30437, + "radar": 9672, + "radcliffe": 33096, + "rade": 44494, + "rade": 17911, + "rader": 45002, + "radford": 45800, + "radha": 43122, + "radi": 5772, + "radial": 42028, + "radiance": 45670, + "radiant": 25614, + "radiation": 18210, + "radiator": 39372, + "radic": 18082, + "radical": 13712, + "radicals": 45903, + "radio": 7176, + "radio": 2638, + "radioactive": 34704, + "radiodisney": 36483, + "radiohead": 39472, + "radiology": 29684, + "radios": 43669, + "radish": 37789, + "radius": 37570, + "rado": 29784, + "rae": 21646, + "rae": 15051, + "rael": 45390, + "raer": 44561, + "raf": 11495, + "raf": 11490, + "rafa": 14352, + "rafa": 24850, + "rafael": 38221, + "rafael": 19216, + "rafaelnadal": 49219, + "raff": 34900, + "raffic": 32928, + "raffle": 13752, + "raffles": 43489, + "rafi": 35304, + "raft": 9233, + "rafting": 36309, + "rag": 13958, + "rag": 20687, + "rage": 8593, + "rages": 34253, + "ragh": 35642, + "ragha": 40972, + "raging": 25015, + "ragn": 24125, + "ragnar": 34385, + "ragnarok": 41856, + "ragon": 34768, + "rags": 47838, + "rah": 12277, + "rah": 8766, + "raheem": 43317, + "rahim": 24152, + "rahman": 19680, + "rahu": 13129, + "rahul": 37239, + "rahul": 17440, + "rahulg": 27510, + "rahulgandhi": 28293, + "rai": 9165, + "rai": 9638, + "raid": 6877, + "raided": 43417, + "raider": 27368, + "raider": 21455, + "raidernation": 47901, + "raiders": 11817, + "raids": 26655, + "rail": 4573, + "rail": 6879, + "raila": 47273, + "railminindia": 35557, + "railroad": 17080, + "rails": 23427, + "railway": 27614, + "railway": 7856, + "railwayana": 46750, + "railways": 20765, + "raim": 45785, + "rain": 3128, + "rain": 2443, + "raina": 30564, + "rainbow": 24562, + "rainbow": 6286, + "rainbows": 30483, + "raine": 49038, + "raine": 6871, + "rained": 32310, + "rainf": 15024, + "rainfall": 15350, + "rainforest": 22823, + "rainier": 37850, + "raining": 13964, + "rains": 14272, + "rainy": 10222, + "rais": 14729, + "raise": 24249, + "raise": 5078, + "raised": 6027, + "raiser": 33555, + "raises": 13297, + "raisethe": 47109, + "raisin": 36864, + "raising": 6883, + "raj": 5958, + "raj": 10813, + "raja": 46069, + "raja": 19150, + "rajan": 46595, + "rajas": 16185, + "rajasthan": 18017, + "raje": 21899, + "rajesh": 43602, + "raji": 27569, + "rajini": 29600, + "rajini": 40622, + "rajinikanth": 32922, + "rajiv": 40197, + "rajkumar": 49304, + "rajput": 47572, + "raju": 47029, + "rak": 13523, + "rak": 26287, + "rake": 26825, + "rake": 32712, + "rakesh": 41083, + "ral": 8062, + "ral": 1406, + "rale": 14192, + "raleigh": 18207, + "rall": 23249, + "rallies": 25230, + "rally": 18882, + "rally": 5041, + "rallying": 36836, + "ralph": 25290, + "ralph": 12234, + "ram": 1976, + "ram": 2007, + "rama": 22112, + "ramad": 12736, + "ramadan": 15547, + "ramadhan": 47415, + "raman": 39816, + "ramapho": 43963, + "ramaphosa": 44993, + "ramatta": 49112, + "rambo": 41855, + "ramcharan": 45275, + "rame": 47745, + "ramen": 18892, + "ramesh": 48640, + "ramesh": 40186, + "rami": 43016, + "ramirez": 23877, + "ramon": 27958, + "ramone": 47201, + "ramos": 21046, + "ramp": 14271, + "rampage": 32077, + "rampant": 41985, + "ramps": 35257, + "rams": 10292, + "ramsay": 26259, + "ramsey": 19215, + "ran": 1433, + "ran": 4031, + "rana": 22143, + "ranbir": 40881, + "rance": 29034, + "ranch": 43955, + "ranch": 10659, + "rancho": 26258, + "rand": 5628, + "rand": 18718, + "randall": 23639, + "rande": 21469, + "randolph": 29899, + "random": 11396, + "random": 6160, + "randomly": 17272, + "rands": 39153, + "randy": 29479, + "randy": 13279, + "rane": 28852, + "rang": 4043, + "rang": 24377, + "range": 13627, + "range": 3818, + "ranger": 31472, + "ranger": 13593, + "rangers": 7664, + "ranges": 25685, + "ranging": 25946, + "rani": 29264, + "rani": 22631, + "rank": 11501, + "ranked": 8307, + "rankin": 37539, + "ranking": 12347, + "rankings": 12596, + "ranks": 14469, + "rano": 18608, + "rans": 46259, + "ransom": 28523, + "ransom": 34646, + "ransomware": 33815, + "rant": 46467, + "rant": 9819, + "rants": 34014, + "ranveer": 32402, + "ranveer": 41482, + "ranveerofficial": 42116, + "rao": 16913, + "rap": 7773, + "rap": 7348, + "rape": 46099, + "rape": 10070, + "raped": 23700, + "rapha": 22754, + "raphael": 30091, + "rapi": 8610, + "rapid": 47697, + "rapid": 12205, + "rapidly": 16710, + "rapids": 18848, + "raping": 44926, + "rapist": 33360, + "rapp": 19283, + "rapper": 11860, + "rappers": 30315, + "rapping": 42864, + "raps": 37887, + "raptor": 26762, + "raptors": 17035, + "raq": 39787, + "raq": 43312, + "raqqa": 47074, + "raquel": 44338, + "rar": 26819, + "rar": 24605, + "rard": 21012, + "rare": 18992, + "rare": 3865, + "rarely": 17315, + "rarest": 43237, + "rarity": 45862, + "ras": 23492, + "ras": 8224, + "rasc": 30085, + "rascal": 43481, + "rash": 14917, + "rash": 30608, + "rashad": 46527, + "rasheed": 41638, + "rashi": 19426, + "rashid": 26757, + "rasp": 10487, + "raspberries": 37742, + "raspberry": 40162, + "raspberry": 13615, + "raspberrypi": 43934, + "rass": 45654, + "rasta": 47002, + "rat": 3806, + "rat": 8985, + "rata": 28568, + "ratchet": 25078, + "rate": 5068, + "rated": 8183, + "rates": 6864, + "rath": 18268, + "rath": 39772, + "rather": 5252, + "rati": 11486, + "rating": 10567, + "ratings": 14176, + "ratio": 15893, + "ration": 27002, + "ration": 35662, + "rational": 33086, + "ratna": 49078, + "ratri": 32288, + "rats": 19043, + "ratt": 20737, + "ratt": 34785, + "rattle": 40824, + "rattle": 41839, + "rau": 27744, + "raul": 30218, + "raun": 41169, + "rav": 14367, + "rav": 23606, + "rave": 38784, + "rave": 17601, + "ravel": 27927, + "raven": 10269, + "raven": 16803, + "ravens": 17946, + "ravi": 22947, + "ravi": 19538, + "ravin": 39099, + "raving": 45807, + "raviol": 41104, + "ravioli": 43460, + "raw": 10166, + "raw": 6323, + "rawlings": 40662, + "rax": 38520, + "ray": 5312, + "ray": 3077, + "raya": 29991, + "raymond": 16683, + "rayn": 47852, + "rayon": 47900, + "rays": 11064, + "raz": 9700, + "raz": 19087, + "raza": 37724, + "razer": 33832, + "razor": 24934, + "razor": 21300, + "razz": 43769, + "rb": 12740, + "rb": 7477, + "rbc": 37500, + "rbi": 15687, + "rbs": 29102, + "rc": 7575, + "rc": 7457, + "rca": 33942, + "rcb": 45240, + "rcmp": 31489, + "rcn": 49370, + "rctid": 49223, + "rd": 13501, + "rd": 1973, + "rda": 45755, + "rdr": 44364, + "rds": 32378, + "re": 515, + "re": 810, + "rea": 11521, + "reach": 4483, + "reach": 4279, + "reached": 6878, + "reaches": 14462, + "reaching": 11358, + "react": 36566, + "react": 15065, + "reacted": 42515, + "reacting": 40595, + "reaction": 7189, + "reactions": 18438, + "reactive": 42072, + "reactjs": 46173, + "reactor": 32037, + "reacts": 23115, + "read": 933, + "read": 1199, + "reader": 9884, + "readers": 10335, + "readiness": 28131, + "reading": 17556, + "reading": 2337, + "readingfc": 47428, + "readings": 23361, + "reads": 6597, + "ready": 17351, + "ready": 1112, + "reagan": 17767, + "real": 2017, + "real": 1532, + "realdonaldtrump": 7025, + "reale": 5930, + "realest": 45855, + "realestate": 32937, + "realestate": 6569, + "reali": 4185, + "realis": 38114, + "realise": 14773, + "realised": 17945, + "realising": 39537, + "realism": 20024, + "realist": 30248, + "realistic": 16157, + "realities": 32443, + "reality": 46802, + "reality": 5004, + "realization": 40402, + "realize": 7538, + "realized": 10489, + "realizes": 42918, + "realizing": 23284, + "reall": 39686, + "really": 43249, + "really": 1414, + "realm": 23083, + "realmadrid": 27866, + "realms": 43033, + "realness": 46761, + "realtime": 44002, + "realtime": 38203, + "realtor": 18038, + "realtors": 31759, + "realty": 20471, + "ream": 37242, + "ream": 15219, + "rean": 48477, + "reap": 31334, + "reaper": 29922, + "rear": 39652, + "rear": 10223, + "reas": 9121, + "reason": 12882, + "reason": 3893, + "reasonable": 18558, + "reasonably": 38589, + "reasoning": 30341, + "reasons": 5686, + "reau": 32398, + "reb": 12370, + "reb": 18796, + "reba": 48543, + "rebate": 43817, + "rebe": 25227, + "rebec": 10774, + "rebecca": 12892, + "rebel": 8185, + "rebel": 12248, + "rebellion": 22170, + "rebels": 13623, + "rebirth": 33303, + "reboot": 22385, + "reborn": 30229, + "reboun": 43381, + "rebound": 31280, + "rebounds": 19190, + "rebs": 28164, + "rebu": 43162, + "rebuild": 20022, + "rebuilding": 30880, + "rebuilt": 33137, + "rec": 1020, + "rec": 11243, + "recall": 15151, + "recalled": 32142, + "recalling": 47855, + "recalls": 24740, + "recap": 29816, + "recap": 8337, + "recaps": 47997, + "recard": 35536, + "rece": 1890, + "recei": 2148, + "receip": 38503, + "receipt": 30479, + "receipts": 41181, + "receive": 4800, + "received": 4178, + "receiver": 17659, + "receivers": 45294, + "receives": 10027, + "receiving": 7252, + "recent": 3969, + "recently": 4482, + "recep": 17450, + "reception": 8364, + "receptions": 46881, + "receptor": 41835, + "recess": 38182, + "recession": 27176, + "recharge": 29396, + "rechargeable": 37516, + "reci": 2037, + "recipe": 28923, + "recipe": 4614, + "recipeoftheday": 38727, + "recipes": 9243, + "recipi": 10136, + "recipient": 13703, + "recipients": 18940, + "recipro": 41789, + "recital": 23457, + "recite": 48824, + "reck": 11715, + "reckless": 26284, + "reckon": 23854, + "recl": 42277, + "reclaim": 35969, + "reclaimed": 32648, + "reco": 2535, + "reco": 46038, + "recogn": 6343, + "recogni": 5329, + "recognise": 19824, + "recognised": 20986, + "recognising": 48423, + "recognition": 9415, + "recognizable": 47240, + "recognize": 10905, + "recognized": 9929, + "recognizes": 26909, + "recognizing": 19666, + "recomm": 4540, + "recommend": 11628, + "recommend": 8942, + "recommendation": 20118, + "recommendations": 16516, + "recommended": 11100, + "recommending": 44301, + "recommends": 22940, + "recon": 15371, + "recon": 28996, + "reconciliation": 26451, + "reconstruction": 24955, + "recor": 1723, + "record": 21328, + "record": 2717, + "recorded": 9392, + "recorder": 26747, + "recording": 48237, + "recording": 6942, + "recordings": 19715, + "records": 4529, + "recover": 16785, + "recovered": 16444, + "recovering": 19005, + "recovers": 47935, + "recovery": 6591, + "recre": 22148, + "recreate": 29775, + "recreated": 40888, + "recreating": 48224, + "recreation": 17331, + "recreational": 24329, + "recru": 4745, + "recruit": 9011, + "recruit": 15585, + "recruited": 36518, + "recruiter": 43120, + "recruiters": 46542, + "recruiting": 10533, + "recruitment": 10541, + "recruits": 22647, + "recs": 33069, + "rectan": 43041, + "rectangular": 43321, + "rector": 41585, + "recu": 26798, + "recur": 19983, + "recurring": 35912, + "recy": 6790, + "recycla": 40659, + "recyclable": 48907, + "recycle": 19366, + "recycled": 16829, + "recycling": 12566, + "red": 1893, + "red": 736, + "redbubble": 46137, + "redbull": 29483, + "redbull": 29219, + "redcarpet": 32259, + "redcross": 30659, + "redd": 22149, + "redd": 40618, + "redding": 41061, + "reddish": 43383, + "reddit": 15226, + "reddy": 23028, + "rede": 10913, + "redeem": 37449, + "redefining": 46352, + "redemption": 20233, + "redesign": 24188, + "redesigned": 33111, + "redevelopment": 30322, + "redhead": 36267, + "redi": 7976, + "redman": 44753, + "redmond": 39627, + "rednation": 28180, + "rednationrising": 28262, + "redneck": 39105, + "redness": 22626, + "redo": 42524, + "redon": 48506, + "redro": 37722, + "reds": 11221, + "redskins": 19023, + "redsox": 19144, + "reduc": 5015, + "reduce": 6604, + "reduced": 10821, + "reduces": 20539, + "reducing": 13836, + "reduction": 12219, + "reductions": 48263, + "redux": 43014, + "redvelvet": 41845, + "redwings": 31058, + "redwood": 31748, + "ree": 9282, + "ree": 5813, + "reebok": 26734, + "reece": 30457, + "reed": 26209, + "reed": 10435, + "reedus": 32865, + "reef": 46557, + "reef": 15624, + "reefs": 34459, + "reel": 34467, + "reel": 17166, + "reels": 48127, + "reem": 48891, + "reen": 21638, + "reen": 23679, + "rees": 18314, + "reese": 20929, + "reeves": 23060, + "ref": 4067, + "ref": 9591, + "refe": 5624, + "refer": 18425, + "refer": 22325, + "referee": 20398, + "referees": 45583, + "referen": 13535, + "reference": 10214, + "references": 24009, + "referendum": 16732, + "referr": 47784, + "referral": 30219, + "referred": 22969, + "referring": 29797, + "refers": 30069, + "refill": 37859, + "refin": 13455, + "refined": 26098, + "refinery": 31393, + "refining": 48406, + "reflec": 4608, + "reflect": 13373, + "reflected": 28732, + "reflecting": 19700, + "reflection": 11884, + "reflections": 16647, + "reflective": 27008, + "reflects": 15821, + "reflex": 45756, + "reflex": 36050, + "reform": 45678, + "reform": 8875, + "reformation": 45119, + "reformed": 40880, + "reforms": 19274, + "refr": 34850, + "refre": 11995, + "refresh": 17836, + "refresh": 23288, + "refreshed": 35925, + "refresher": 41481, + "refreshing": 14159, + "refreshments": 31127, + "refriger": 21076, + "refrigerator": 36662, + "refs": 35595, + "refu": 3545, + "refuge": 5638, + "refuge": 17432, + "refugee": 11556, + "refugees": 42687, + "refugees": 8316, + "refund": 28899, + "refur": 15519, + "refurbi": 18259, + "refurbished": 26190, + "refurbishment": 35803, + "refusal": 46547, + "refuse": 16412, + "refused": 17190, + "refuses": 20085, + "refusing": 26704, + "reg": 5472, + "reg": 12353, + "regain": 37510, + "regal": 31512, + "regal": 25028, + "regan": 34062, + "regar": 5881, + "regard": 21801, + "regarded": 32017, + "regarding": 8493, + "regardless": 17220, + "regards": 23079, + "regatta": 26316, + "regen": 46545, + "regency": 29341, + "regeneration": 29257, + "regent": 30455, + "regents": 46710, + "regg": 12757, + "reggae": 37821, + "reggae": 15214, + "reggie": 21872, + "regi": 1608, + "regime": 11378, + "regiment": 18603, + "regin": 23287, + "regina": 16841, + "region": 16542, + "region": 4341, + "regional": 5552, + "regionals": 26043, + "regions": 14530, + "regis": 28094, + "register": 3967, + "registered": 10254, + "registering": 33510, + "registr": 29193, + "registration": 7302, + "registrations": 38423, + "registry": 30020, + "rego": 47351, + "regram": 30329, + "regrann": 48802, + "regre": 8627, + "regression": 43733, + "regret": 14374, + "regrets": 23231, + "regu": 3411, + "regui": 46722, + "regul": 11847, + "regular": 14882, + "regular": 6307, + "regularly": 17263, + "regulat": 14575, + "regulate": 33494, + "regulated": 31384, + "regulating": 48156, + "regulation": 14267, + "regulations": 16654, + "regulator": 30364, + "regulators": 35837, + "regulatory": 17717, + "reh": 21492, + "reha": 10193, + "rehab": 16973, + "rehabil": 17930, + "rehabilitation": 21042, + "rehear": 7273, + "rehearsal": 11482, + "rehearsals": 17977, + "rehearsing": 23125, + "rehman": 39206, + "rei": 15343, + "rei": 26033, + "reic": 41230, + "reich": 48589, + "reich": 28929, + "reid": 45125, + "reid": 11744, + "reig": 13092, + "reign": 41419, + "reign": 14827, + "reigning": 28409, + "reigns": 21217, + "reiki": 46960, + "reilly": 28120, + "reim": 35421, + "reimagined": 46799, + "reimbur": 39857, + "rein": 9240, + "rein": 45009, + "reina": 43847, + "reinde": 23810, + "reindeer": 25072, + "reinfor": 48161, + "reinforced": 41909, + "reinst": 33969, + "reinvent": 38171, + "reissue": 34042, + "reiter": 35394, + "rejec": 9958, + "reject": 22435, + "rejected": 17505, + "rejection": 32264, + "rejects": 23155, + "rejo": 20150, + "rejoice": 24712, + "rejuven": 26332, + "rek": 47542, + "rek": 19201, + "rel": 1825, + "rel": 5233, + "rela": 4362, + "reland": 15220, + "relat": 27192, + "relatable": 31010, + "relate": 17520, + "related": 5880, + "relates": 36064, + "relating": 27373, + "relation": 4561, + "relation": 16207, + "relations": 10100, + "relationship": 47239, + "relationship": 5837, + "relationships": 10610, + "relative": 17265, + "relatively": 18351, + "relatives": 21981, + "relax": 6777, + "relax": 9035, + "relaxation": 22194, + "relaxed": 18999, + "relaxing": 10256, + "relay": 12403, + "relays": 28404, + "rele": 1602, + "release": 29100, + "release": 2706, + "released": 3410, + "releases": 7393, + "releasethe": 44008, + "releasing": 10321, + "releg": 23378, + "relegated": 45884, + "relegation": 35040, + "relent": 22213, + "relentless": 27207, + "relessly": 33927, + "relev": 9349, + "relevance": 31400, + "relevant": 10568, + "reli": 2674, + "reliability": 27220, + "reliable": 13714, + "reliance": 27727, + "relic": 27802, + "relics": 43208, + "relief": 7518, + "relies": 41579, + "relieve": 28623, + "relieved": 36597, + "religi": 4940, + "religion": 8803, + "religions": 31189, + "religious": 8289, + "relish": 35550, + "relive": 23939, + "reliving": 47558, + "rell": 28802, + "rell": 7127, + "rella": 9952, + "relle": 31390, + "reloaded": 38908, + "relocated": 46791, + "relocation": 39198, + "rels": 23320, + "relu": 32058, + "reluct": 32549, + "reluctant": 45552, + "rely": 4158, + "relying": 42168, + "rem": 15098, + "rem": 21637, + "rema": 4569, + "remain": 29144, + "remain": 6415, + "remainder": 41672, + "remained": 23714, + "remaining": 11392, + "remains": 6807, + "remake": 16234, + "remark": 11136, + "remarkable": 12404, + "remarkably": 39087, + "remarks": 15001, + "remastered": 24932, + "rematch": 26473, + "rembrandt": 45972, + "reme": 20071, + "remedi": 18442, + "remedies": 25581, + "remedy": 25794, + "remem": 7966, + "rememb": 7062, + "remember": 22045, + "remember": 2195, + "remembered": 11763, + "remembering": 8135, + "remembers": 12551, + "remembrance": 40321, + "remembrance": 15860, + "remembranceday": 48333, + "rement": 7173, + "rements": 12667, + "remi": 41693, + "remin": 3216, + "remind": 9868, + "reminded": 12309, + "reminder": 5565, + "reminders": 34121, + "reminding": 19976, + "reminds": 8303, + "remington": 43527, + "reminis": 17723, + "reminiscent": 41704, + "reminiscing": 32552, + "remix": 8519, + "remixes": 31011, + "remn": 29127, + "remnants": 39032, + "remo": 4064, + "remo": 33259, + "remodel": 34159, + "remodel": 37495, + "remodeling": 41432, + "remote": 47163, + "remote": 9687, + "remotely": 32375, + "removable": 44095, + "removal": 13679, + "remove": 9709, + "removed": 10289, + "remover": 44267, + "removes": 29018, + "removing": 18504, + "remy": 30434, + "ren": 737, + "ren": 2596, + "rena": 12591, + "renais": 15409, + "renaissance": 16007, + "renal": 36096, + "renamed": 31535, + "renault": 17600, + "rence": 19245, + "rence": 1553, + "rences": 8545, + "rend": 33932, + "rend": 22851, + "render": 39752, + "render": 13024, + "rendered": 23652, + "rendering": 21339, + "renders": 39419, + "rendez": 43293, + "rendezvous": 45644, + "rendition": 28891, + "rendon": 46272, + "rendous": 49403, + "rends": 38842, + "rene": 15438, + "rene": 12597, + "renee": 23480, + "reneg": 29909, + "renegade": 41229, + "renergy": 37151, + "renew": 6645, + "renew": 22015, + "renewable": 31269, + "renewable": 15941, + "renewableenergy": 33357, + "renewables": 21619, + "renewal": 21270, + "renewed": 20524, + "renfre": 45043, + "reng": 36795, + "reno": 11520, + "reno": 12831, + "renov": 9984, + "renovated": 23839, + "renovation": 17121, + "renovations": 31311, + "renowned": 14727, + "rens": 18183, + "renshaw": 44445, + "rent": 17377, + "rent": 1609, + "rental": 12193, + "rentals": 24105, + "rented": 35932, + "rential": 31692, + "renting": 37662, + "rently": 2615, + "rents": 31109, + "reo": 15963, + "reo": 26854, + "reon": 15761, + "reopen": 26883, + "reopened": 32868, + "reopening": 36663, + "reopens": 40644, + "rep": 4229, + "rep": 6487, + "repair": 8419, + "repaired": 32953, + "repairing": 38534, + "repairs": 16297, + "repar": 34065, + "repe": 5785, + "repeal": 42622, + "repeal": 23938, + "repeat": 10192, + "repeated": 27904, + "repeatedly": 26630, + "repeating": 33834, + "repeats": 39158, + "repell": 46235, + "repent": 47261, + "reper": 29085, + "repet": 38533, + "repl": 13047, + "replac": 6069, + "replace": 9466, + "replaceable": 47762, + "replaced": 13200, + "replacement": 10835, + "replaces": 27781, + "replacing": 18647, + "replay": 16875, + "repleni": 44839, + "replic": 21651, + "replica": 18125, + "replied": 24238, + "replies": 18808, + "reply": 8965, + "replying": 47599, + "repor": 2628, + "report": 2417, + "reported": 7598, + "reportedly": 10953, + "reporter": 11019, + "reporters": 18454, + "reporting": 9218, + "reports": 4908, + "reposit": 41276, + "repository": 46977, + "repost": 33147, + "repost": 7217, + "repostapp": 38388, + "reposting": 20223, + "reppin": 19163, + "repping": 22574, + "repre": 3397, + "represent": 8293, + "represent": 8406, + "representation": 13520, + "representative": 13175, + "representatives": 15591, + "represented": 12299, + "representing": 7561, + "represents": 14433, + "repri": 31854, + "reproduction": 35714, + "reproductive": 25522, + "reps": 14265, + "reptile": 36938, + "reptiles": 38679, + "republic": 6376, + "republic": 7185, + "republican": 9842, + "republicans": 12384, + "repur": 41852, + "req": 42411, + "requ": 10664, + "reque": 9539, + "request": 7813, + "requested": 16199, + "requesting": 33245, + "requests": 17087, + "requi": 4863, + "requiem": 40316, + "require": 14437, + "required": 8500, + "requirement": 27146, + "requirements": 12860, + "requires": 13396, + "requiring": 33425, + "requis": 42602, + "rer": 41295, + "rer": 3407, + "rera": 14301, + "rero": 21860, + "rers": 18869, + "res": 4466, + "res": 934, + "resc": 3956, + "rescheduled": 43553, + "rescu": 8618, + "rescue": 28567, + "rescue": 5718, + "rescued": 11919, + "rescues": 32439, + "rescuing": 43770, + "rese": 13000, + "resear": 6090, + "research": 25694, + "research": 2379, + "researched": 42733, + "researcher": 18334, + "researchers": 9522, + "researching": 24544, + "reseller": 35391, + "resemb": 16916, + "resemblance": 26856, + "resemble": 37230, + "resembles": 35417, + "reser": 16420, + "reserv": 11906, + "reservation": 20289, + "reservations": 19307, + "reserve": 6911, + "reserved": 19796, + "reserves": 19705, + "reservoir": 20574, + "reset": 26250, + "resh": 47432, + "reshi": 39435, + "resi": 2152, + "residen": 22311, + "residence": 11672, + "residences": 38855, + "residency": 18545, + "resident": 9016, + "residente": 44637, + "residentevil": 48393, + "residential": 11002, + "residents": 6008, + "resign": 23584, + "resignation": 24779, + "resigned": 31014, + "resigns": 29738, + "resil": 10932, + "resili": 39212, + "resilience": 15271, + "resilient": 24694, + "resin": 24156, + "resist": 37345, + "resist": 9587, + "resistance": 7392, + "resistant": 17542, + "resisting": 43679, + "resolution": 9977, + "resolutions": 26816, + "resolve": 20787, + "resolved": 28807, + "reson": 18092, + "resonance": 42310, + "resort": 6594, + "resorts": 18839, + "resource": 43729, + "resource": 9760, + "resources": 6723, + "respec": 7466, + "respect": 31411, + "respect": 4916, + "respected": 19126, + "respectful": 24379, + "respecting": 36172, + "respective": 25817, + "respectively": 28794, + "respects": 23553, + "respir": 20771, + "respiratory": 24483, + "respon": 2421, + "respond": 12355, + "responded": 21121, + "respondents": 49253, + "responders": 25155, + "responding": 18037, + "responds": 17436, + "response": 5399, + "responses": 19006, + "responsi": 5490, + "responsibilities": 30375, + "responsibility": 11272, + "responsible": 8936, + "responsibly": 33675, + "responsive": 21544, + "ress": 34651, + "ress": 13629, + "resso": 15133, + "rest": 10974, + "rest": 2539, + "restart": 37378, + "restaur": 3775, + "restaurant": 41930, + "restaurant": 4489, + "restaurants": 11714, + "rested": 46020, + "resting": 18044, + "restless": 36724, + "restling": 30076, + "resto": 11118, + "resto": 41666, + "restock": 34060, + "restocked": 36966, + "restor": 8984, + "restoration": 11989, + "restorative": 46509, + "restore": 14008, + "restored": 14238, + "restoring": 24406, + "restra": 25424, + "restric": 11036, + "restricted": 27197, + "restriction": 44282, + "restrictions": 19884, + "restroom": 43423, + "restructuring": 43260, + "rests": 33775, + "resu": 10095, + "resul": 2655, + "result": 5659, + "resulted": 26449, + "resulting": 24581, + "results": 3790, + "resume": 15077, + "resumes": 30268, + "resur": 14865, + "resurg": 45962, + "resurgence": 47692, + "resurrec": 18487, + "resurrection": 25811, + "resusc": 47523, + "ret": 20500, + "ret": 10048, + "reta": 20153, + "retail": 14910, + "retail": 6455, + "retailer": 22549, + "retailers": 19418, + "retain": 24430, + "retained": 42737, + "retaining": 35571, + "retains": 42583, + "retali": 33101, + "retar": 29964, + "retarded": 44111, + "retention": 26247, + "rethink": 29078, + "rethinking": 42951, + "reti": 4721, + "retin": 31270, + "retina": 36919, + "retire": 18846, + "retired": 11477, + "retirement": 9205, + "retires": 29060, + "retiring": 21200, + "retrac": 32735, + "retreat": 11210, + "retri": 16918, + "retriever": 28394, + "retro": 6535, + "retro": 7755, + "retrogamer": 47220, + "retrogaming": 11316, + "retrospective": 27105, + "rett": 41082, + "rett": 8425, + "rette": 33066, + "return": 43042, + "return": 3458, + "returned": 10476, + "returning": 9290, + "returns": 5020, + "retwee": 48190, + "retweet": 3195, + "retweeted": 12705, + "retweeting": 32345, + "retweets": 10160, + "rety": 41550, + "reu": 20255, + "reu": 40371, + "reuben": 40450, + "reunion": 10247, + "reunite": 26179, + "reunited": 13516, + "reusable": 30395, + "reuse": 26535, + "reut": 15210, + "reuters": 15569, + "rev": 8424, + "rev": 11789, + "revamp": 29819, + "revamped": 36420, + "revan": 45277, + "reve": 3115, + "reveal": 8052, + "revealed": 7171, + "revealing": 21321, + "reveals": 6621, + "revel": 14133, + "revelation": 24053, + "revelations": 36163, + "reven": 10171, + "revenge": 12717, + "revenue": 10637, + "revenues": 33348, + "rever": 14829, + "rever": 41913, + "revere": 44187, + "reverend": 34407, + "revers": 20726, + "reversal": 33367, + "reverse": 12812, + "reversed": 42485, + "reversi": 31601, + "reversible": 34212, + "revi": 8317, + "review": 2268, + "reviewed": 16678, + "reviewer": 36409, + "reviewers": 48195, + "reviewing": 20458, + "reviews": 7227, + "revise": 46801, + "revised": 22806, + "revising": 46882, + "revision": 20335, + "revisit": 26568, + "revisited": 34302, + "revisiting": 33144, + "revit": 26367, + "revitalization": 46923, + "revival": 14142, + "revive": 26450, + "revived": 42912, + "revo": 28660, + "revol": 13447, + "revolt": 31697, + "revolu": 4900, + "revolution": 17699, + "revolution": 6644, + "revolutionary": 14734, + "revolver": 38747, + "revolving": 47230, + "revs": 49286, + "revue": 43428, + "rew": 37564, + "rewar": 15857, + "reward": 11223, + "rewarded": 27163, + "rewarding": 23351, + "rewards": 15235, + "rewatch": 35610, + "rewatching": 41287, + "rewind": 26867, + "rewrite": 45218, + "rex": 13002, + "rex": 10904, + "rexperience": 33924, + "rey": 9681, + "rey": 4517, + "reyes": 18255, + "reykja": 47571, + "reyn": 11998, + "reynolds": 14309, + "reys": 48284, + "rez": 27597, + "rez": 15192, + "reza": 35888, + "rf": 35529, + "rf": 16368, + "rfc": 19003, + "rfid": 40204, + "rg": 33055, + "rg": 14897, + "rgb": 36128, + "rgv": 33685, + "rh": 8745, + "rh": 22404, + "rha": 19473, + "rhapso": 32532, + "rhapsody": 35774, + "rhe": 9186, + "rhea": 28612, + "rhetor": 24359, + "rhetoric": 29985, + "rhett": 42984, + "rheu": 42953, + "rhi": 21212, + "rhin": 12269, + "rhine": 22863, + "rhine": 44833, + "rhinestone": 30450, + "rhino": 41744, + "rhino": 20056, + "rhinos": 30671, + "rho": 7637, + "rhode": 39302, + "rhode": 27907, + "rhodes": 17785, + "rhon": 25882, + "rhonda": 46100, + "rhp": 27199, + "rhs": 24551, + "rhu": 23897, + "rhubarb": 30213, + "rhy": 7740, + "rhyme": 37356, + "rhymes": 33143, + "rhys": 28647, + "rhyth": 27069, + "rhythm": 16172, + "rhythmic": 46386, + "rhythms": 40872, + "ri": 553, + "ri": 2574, + "ria": 3650, + "rial": 15200, + "rian": 7788, + "rib": 44634, + "rib": 18298, + "riba": 44992, + "ribb": 10081, + "ribbon": 12114, + "ribbons": 35271, + "ribe": 46115, + "ribs": 17519, + "ric": 920, + "ric": 4798, + "rica": 14230, + "rical": 18109, + "rican": 30958, + "ricardo": 23140, + "ricci": 35783, + "ricciardo": 49282, + "rice": 36362, + "rice": 4741, + "rich": 5223, + "rich": 4021, + "richar": 9350, + "richard": 9080, + "richard": 4470, + "richards": 11372, + "richardson": 15984, + "riche": 23286, + "richer": 34138, + "riches": 37093, + "richest": 25572, + "richi": 38934, + "richie": 19797, + "richland": 43079, + "richmond": 34143, + "richmond": 11292, + "richter": 37591, + "rick": 6237, + "rick": 3064, + "ricket": 46161, + "ricket": 23671, + "ricks": 23111, + "ricky": 19188, + "ricky": 12814, + "rico": 37962, + "rico": 11362, + "ricotta": 38473, + "rics": 7353, + "ricul": 6980, + "rid": 18103, + "rid": 9874, + "ridd": 21990, + "ridden": 32025, + "riddle": 31839, + "ride": 15816, + "ride": 2994, + "rider": 31056, + "rider": 9707, + "riders": 10826, + "rides": 11308, + "ridg": 42646, + "ridge": 16580, + "ridge": 6352, + "ridic": 9624, + "ridiculous": 12659, + "ridiculously": 25661, + "ridin": 47869, + "riding": 6765, + "ridley": 27883, + "rie": 14824, + "rie": 5322, + "ried": 7552, + "riel": 26696, + "rien": 35237, + "rier": 40714, + "rier": 13336, + "ries": 28179, + "ries": 3059, + "riesling": 36372, + "rif": 7044, + "riff": 30359, + "rifle": 15354, + "rifles": 25678, + "rift": 26681, + "rig": 18462, + "rig": 13871, + "riga": 36626, + "rigged": 35897, + "rigging": 38160, + "riggs": 40328, + "righ": 15391, + "right": 13341, + "right": 1155, + "righte": 20762, + "righteous": 28169, + "righteousness": 42481, + "rightful": 42601, + "rightly": 42669, + "rights": 3336, + "rigid": 43138, + "rigor": 36788, + "rigorous": 41654, + "rigs": 42893, + "rihanna": 13744, + "rij": 41097, + "rik": 31136, + "rik": 27832, + "rika": 28580, + "ril": 12270, + "ril": 2388, + "riley": 35056, + "riley": 12260, + "rill": 23705, + "rilla": 43956, + "rilla": 18685, + "rim": 28147, + "rim": 12199, + "rime": 27064, + "rimin": 11527, + "rimo": 47817, + "rims": 34327, + "rin": 5859, + "rin": 11739, + "rina": 12869, + "rine": 24952, + "ring": 8318, + "ring": 2540, + "ringed": 44712, + "ringer": 35761, + "ringing": 26035, + "ringo": 38845, + "rings": 5751, + "rington": 12455, + "rink": 21497, + "rinka": 47316, + "rino": 47188, + "rinse": 48320, + "rio": 15681, + "rio": 5782, + "rion": 31623, + "rion": 34046, + "rios": 32814, + "riot": 32636, + "riot": 14218, + "riots": 24844, + "rious": 6340, + "rip": 10353, + "rip": 4243, + "ripe": 22832, + "ripley": 41589, + "ripp": 25276, + "ripped": 17815, + "ripper": 35347, + "ripping": 29126, + "ripple": 24825, + "rips": 30182, + "rir": 36792, + "ris": 6108, + "ris": 1999, + "rise": 13641, + "rise": 3151, + "risen": 23653, + "risers": 44983, + "rises": 13362, + "riseup": 35760, + "rish": 18378, + "rish": 18927, + "rishi": 48434, + "rising": 30452, + "rising": 5448, + "risis": 37998, + "risk": 27967, + "risk": 4213, + "risking": 48155, + "risks": 12474, + "risky": 27630, + "risotto": 31471, + "rist": 40610, + "rit": 5156, + "rit": 17333, + "rita": 16178, + "ritchie": 30997, + "rite": 39318, + "rite": 18429, + "rites": 36160, + "rith": 48169, + "rith": 48850, + "riti": 32904, + "rito": 19379, + "ritos": 33507, + "ritt": 26092, + "ritter": 34854, + "ritu": 13391, + "ritual": 19712, + "rituals": 31145, + "ritz": 39151, + "ritz": 25627, + "rium": 33884, + "riv": 25113, + "rival": 13412, + "rival": 15629, + "rivalry": 19511, + "rivals": 15135, + "rive": 27588, + "rive": 34917, + "river": 5239, + "river": 2473, + "rivera": 18275, + "riverdale": 28304, + "riverfront": 44439, + "rivers": 10723, + "riverside": 15809, + "riveting": 44024, + "riviera": 25851, + "rix": 43407, + "rix": 9483, + "riya": 36908, + "riyad": 31564, + "riyadh": 33577, + "riz": 18426, + "riz": 35411, + "rizal": 41555, + "rizio": 40191, + "rizz": 34826, + "rizzo": 49076, + "rj": 26016, + "rj": 20949, + "rk": 38725, + "rk": 21422, + "rl": 18041, + "rl": 14590, + "rlly": 43222, + "rly": 25954, + "rm": 20202, + "rm": 8431, + "rmb": 49097, + "rms": 40529, + "rn": 13206, + "rn": 7666, + "rna": 24566, + "rnb": 31556, + "rnc": 35309, + "rnli": 29748, + "ro": 532, + "ro": 2795, + "roa": 8313, + "roach": 31073, + "road": 4370, + "road": 1759, + "roadhouse": 47891, + "roadmap": 30111, + "roads": 6189, + "roadsafety": 39992, + "roadshow": 21168, + "roadside": 26928, + "roadster": 28920, + "roadto": 24681, + "roadtrip": 15094, + "roadway": 42744, + "roam": 34045, + "roaming": 29240, + "roano": 34184, + "roanoke": 36587, + "roar": 34193, + "roar": 18483, + "roaring": 26428, + "roast": 11404, + "roasted": 10479, + "roasting": 32228, + "rob": 2668, + "rob": 6442, + "robb": 14059, + "robb": 39673, + "robbed": 24163, + "robber": 35545, + "robbers": 40852, + "robbery": 16393, + "robbi": 44898, + "robbie": 37200, + "robbie": 15970, + "robbing": 47569, + "robbins": 23461, + "robby": 44128, + "robe": 23116, + "rober": 4532, + "robert": 8811, + "robert": 3929, + "roberta": 43373, + "roberto": 42645, + "roberto": 16227, + "roberts": 10366, + "robertson": 17643, + "robes": 29304, + "robi": 16743, + "robin": 6681, + "robin": 7988, + "robins": 35502, + "robinson": 8523, + "robles": 47646, + "roblo": 27481, + "roblox": 37798, + "robo": 4672, + "robo": 36057, + "robot": 46089, + "robot": 8797, + "robotic": 23975, + "robotics": 13546, + "robots": 13473, + "robson": 31113, + "robust": 22780, + "robyn": 34533, + "roc": 3268, + "roc": 13776, + "rocco": 30009, + "roch": 23788, + "rochdale": 41880, + "roche": 31776, + "rochelle": 40161, + "rochester": 18057, + "rock": 2640, + "rock": 2172, + "rockab": 39353, + "rockabilly": 45019, + "rocke": 19914, + "rocked": 16116, + "rockefeller": 35476, + "rocker": 29008, + "rockers": 32338, + "rocket": 25435, + "rocket": 8383, + "rockets": 13292, + "rockford": 41039, + "rockies": 20621, + "rockin": 12073, + "rocking": 7081, + "rockn": 24442, + "rocknroll": 27840, + "rocks": 6135, + "rockstar": 23603, + "rockstar": 18000, + "rockstargames": 27516, + "rockstars": 46639, + "rockthe": 49363, + "rockwell": 34747, + "rocky": 33481, + "rocky": 9648, + "rod": 9712, + "rod": 8291, + "roddy": 42332, + "rode": 18449, + "rodeo": 18250, + "rodgers": 17612, + "rodi": 49100, + "rodney": 21753, + "rodri": 11053, + "rodrigo": 33944, + "rodriguez": 14057, + "rods": 28618, + "roe": 27671, + "roe": 9996, + "rof": 33029, + "rofl": 48228, + "roft": 45212, + "rog": 34269, + "rog": 34017, + "rogen": 23380, + "roger": 13929, + "roger": 7735, + "rogerfederer": 40182, + "rogers": 10661, + "rogue": 32575, + "rogue": 15162, + "roh": 14933, + "roh": 29840, + "rohan": 39848, + "rohing": 23600, + "rohingya": 26146, + "rohit": 44649, + "rohit": 24299, + "roi": 21877, + "rok": 36807, + "rol": 3393, + "rol": 7818, + "roland": 33713, + "roland": 19569, + "role": 18485, + "role": 3414, + "roles": 11871, + "rolex": 21093, + "rolf": 48606, + "roll": 4711, + "roll": 3341, + "rolled": 11982, + "roller": 21034, + "roller": 12342, + "rollercoaster": 38248, + "rollers": 36941, + "rollin": 27545, + "rolling": 24250, + "rolling": 6347, + "rollingstones": 41309, + "rollins": 27724, + "rollout": 47710, + "rollover": 39214, + "rolls": 8614, + "rolltide": 28101, + "rom": 11377, + "rom": 19205, + "roma": 44134, + "roma": 11631, + "romain": 48897, + "roman": 4416, + "roman": 7370, + "romance": 7215, + "romania": 15884, + "romanian": 30866, + "romano": 38409, + "romans": 23066, + "romantic": 41457, + "romantic": 8821, + "rome": 9406, + "rome": 5243, + "romeo": 14429, + "romero": 23694, + "romney": 19287, + "romo": 32248, + "romper": 43699, + "ron": 2393, + "ron": 3372, + "rona": 42385, + "ronal": 46194, + "ronald": 15683, + "ronaldo": 13463, + "ronan": 34971, + "rond": 31935, + "ronda": 37436, + "rondo": 43756, + "rone": 48082, + "rone": 32763, + "roni": 47234, + "ronnie": 45257, + "ronnie": 16421, + "rons": 19536, + "ront": 48881, + "roo": 1249, + "roo": 31227, + "rood": 38007, + "roof": 9120, + "roof": 6449, + "roofing": 24415, + "roofs": 34635, + "rooftop": 16319, + "rook": 35918, + "rookie": 9771, + "rookies": 31917, + "room": 8845, + "room": 1530, + "roomie": 36851, + "roommate": 19825, + "roommates": 37323, + "rooms": 6328, + "rooney": 17712, + "roos": 32938, + "roosevel": 17644, + "roosevelt": 18488, + "rooster": 46263, + "rooster": 30926, + "roosters": 43693, + "root": 25930, + "root": 9728, + "rooted": 30428, + "rooting": 25523, + "roots": 8084, + "rop": 43401, + "rope": 9953, + "ropes": 30506, + "ror": 8668, + "ror": 2843, + "rors": 12072, + "rory": 42804, + "rory": 17813, + "ros": 5288, + "ros": 6930, + "rosa": 14393, + "rosal": 30397, + "rosario": 33640, + "rosary": 33098, + "rosberg": 46037, + "rose": 6146, + "rose": 3568, + "roseanne": 47528, + "rosel": 33616, + "rosemary": 19472, + "rosen": 13214, + "rosen": 36424, + "rosenberg": 43558, + "rosenthal": 46990, + "roses": 9061, + "rosetta": 43800, + "rosewood": 38686, + "rosie": 43049, + "rosie": 16888, + "ross": 8801, + "ross": 2158, + "rosse": 11602, + "rossi": 24817, + "rosso": 33023, + "roster": 12487, + "roswell": 45116, + "rosy": 46705, + "rosé": 28006, + "rot": 10055, + "rot": 9643, + "rotar": 45959, + "rotary": 14654, + "rotating": 32265, + "rotation": 18089, + "rotc": 32252, + "roth": 17741, + "roth": 19139, + "rother": 23174, + "rotherham": 37687, + "rothschild": 45089, + "roti": 46940, + "roto": 34698, + "rotor": 42991, + "rots": 16642, + "rott": 34806, + "rotten": 24324, + "rotter": 22614, + "rotterdam": 23422, + "rotun": 42970, + "rou": 2964, + "rou": 34783, + "roud": 28375, + "rouge": 16209, + "rough": 11699, + "rough": 8511, + "roughly": 21910, + "roughs": 37598, + "rouhani": 39912, + "roulette": 39930, + "roun": 5602, + "round": 9403, + "round": 2522, + "roundabout": 29953, + "rounded": 26973, + "rounder": 37024, + "rounding": 40208, + "rounds": 11242, + "roundtable": 19386, + "roundup": 17503, + "roup": 29220, + "rourke": 38753, + "rous": 33645, + "rous": 34531, + "rousey": 46267, + "rout": 7502, + "rout": 41778, + "route": 5261, + "router": 29962, + "routes": 14923, + "routine": 12319, + "routines": 44074, + "routing": 44086, + "roux": 43416, + "rov": 23971, + "rove": 30130, + "rover": 12776, + "rovers": 16373, + "row": 5275, + "row": 1044, + "rowan": 26240, + "rowdy": 32141, + "rowe": 28323, + "rowed": 22615, + "rower": 43345, + "rowers": 41806, + "rowing": 12807, + "rowland": 33037, + "rowley": 48793, + "rowling": 29371, + "rown": 22287, + "rown": 25060, + "rows": 9409, + "rox": 14111, + "rox": 41033, + "roxy": 28093, + "roy": 2128, + "roy": 6354, + "royal": 6691, + "royal": 3853, + "royale": 20630, + "royalnavy": 41545, + "royals": 13335, + "royalties": 48660, + "royalty": 18296, + "royalwedding": 27461, + "royce": 18444, + "royd": 41476, + "royo": 39357, + "roz": 28989, + "roz": 37250, + "rp": 17305, + "rp": 8174, + "rpa": 41872, + "rpg": 12445, + "rpm": 23715, + "rps": 49215, + "rr": 5311, + "rr": 9126, + "rrp": 36967, + "rrr": 18267, + "rrrr": 25561, + "rrrr": 34444, + "rs": 6978, + "rs": 1724, + "rsa": 29437, + "rsc": 48524, + "rsd": 34426, + "rsi": 39046, + "rsl": 44752, + "rsp": 16381, + "rspb": 38508, + "rspb": 36727, + "rspca": 45643, + "rss": 46466, + "rss": 22350, + "rstats": 38700, + "rsvp": 9774, + "rt": 8959, + "rt": 8991, + "rtc": 31648, + "rte": 33822, + "rte": 23322, + "rtg": 22028, + "rti": 47549, + "rtr": 43999, + "rts": 8496, + "rtw": 34673, + "ru": 681, + "ru": 13735, + "rub": 15862, + "rub": 22586, + "rubb": 19597, + "rubbed": 45239, + "rubber": 31131, + "rubber": 11331, + "rubbing": 41262, + "rubbish": 21108, + "rubble": 42230, + "ruben": 44058, + "ruben": 29722, + "rubi": 27856, + "rubin": 34128, + "rubio": 24244, + "rubs": 43422, + "ruby": 24552, + "ruby": 11493, + "ruck": 27449, + "rucker": 45402, + "rud": 35256, + "rudd": 31836, + "rude": 16548, + "rudi": 48360, + "rudol": 40927, + "rudolf": 46835, + "rudolph": 30119, + "rudy": 38226, + "rudy": 22131, + "rue": 38024, + "rue": 19276, + "rufc": 45084, + "ruff": 28177, + "ruff": 30304, + "rufus": 39322, + "rug": 4217, + "rug": 19220, + "rugby": 15091, + "rugby": 4964, + "rugbyleague": 44419, + "ruger": 48655, + "rugged": 25225, + "rugs": 29946, + "rui": 46974, + "ruin": 16256, + "ruined": 17231, + "ruining": 29952, + "ruins": 16094, + "ruiz": 27873, + "ruk": 46628, + "rukh": 43075, + "rukh": 27631, + "rule": 31643, + "rule": 6175, + "ruled": 16324, + "ruler": 26286, + "rulers": 45328, + "rules": 5272, + "ruling": 14690, + "rum": 9223, + "rum": 11233, + "rumb": 42432, + "rumble": 18900, + "rumi": 31428, + "rumor": 22254, + "rumored": 36694, + "rumors": 16160, + "rumour": 34296, + "rumours": 20716, + "rump": 29366, + "run": 1639, + "run": 1934, + "runaway": 28851, + "runchat": 25838, + "rundown": 41100, + "rune": 33882, + "rune": 49244, + "runner": 37370, + "runner": 7913, + "runners": 10571, + "runnin": 43130, + "running": 24451, + "running": 2761, + "runoff": 38564, + "runs": 5586, + "runway": 13927, + "rup": 7996, + "rup": 14980, + "rupaul": 44211, + "rupee": 43916, + "rupees": 44110, + "rupert": 25625, + "rupt": 23055, + "ruption": 35403, + "rural": 28801, + "rural": 8737, + "rus": 35811, + "rus": 5998, + "rush": 12148, + "rush": 6973, + "rushed": 28104, + "rusher": 48745, + "rushes": 47217, + "rushing": 20284, + "russ": 6285, + "russ": 20764, + "russell": 26122, + "russell": 8150, + "russi": 2600, + "russia": 4018, + "russian": 30731, + "russian": 4868, + "russians": 25413, + "russo": 30679, + "rust": 28682, + "rust": 14212, + "rustic": 19822, + "rusty": 43966, + "rusty": 22646, + "rut": 14973, + "rut": 39102, + "rutger": 49029, + "rutgers": 28934, + "ruth": 15798, + "ruth": 12029, + "ruther": 26676, + "rutherford": 31070, + "ruthless": 36063, + "rutland": 46024, + "ruto": 43702, + "ruz": 23275, + "rv": 17135, + "rv": 17951, + "rva": 24278, + "rw": 9085, + "rw": 22926, + "rwa": 47452, + "rwand": 31758, + "rwanda": 15427, + "rwby": 39698, + "rwc": 32321, + "rx": 41188, + "rx": 15945, + "ry": 1511, + "ry": 913, + "ryan": 8682, + "ryan": 4053, + "ryanair": 43526, + "ryder": 43564, + "ryder": 21805, + "rye": 24015, + "rye": 17409, + "rying": 7838, + "ryn": 37728, + "ryo": 24460, + "rys": 21654, + "ryu": 46656, + "ryu": 34604, + "ré": 29106, + "s": 82, + "s": 338, + "sa": 774, + "sa": 1344, + "saa": 13429, + "saab": 27158, + "saad": 36530, + "saas": 25761, + "saat": 33151, + "sab": 3233, + "sab": 23213, + "saba": 38344, + "sabah": 32854, + "saban": 41620, + "sabar": 47102, + "sabbath": 26008, + "sabc": 30010, + "sabcnews": 41093, + "saber": 46822, + "saber": 25624, + "sabha": 23431, + "sabi": 47073, + "sabine": 44062, + "sable": 19224, + "sabot": 30700, + "sabotage": 40496, + "sabre": 35110, + "sabres": 29620, + "sabrin": 37029, + "sabrina": 24994, + "sac": 3632, + "sac": 12905, + "sach": 30168, + "sacha": 49010, + "sachin": 47527, + "sachin": 30297, + "sachs": 31451, + "sack": 28964, + "sack": 14979, + "sacked": 27519, + "sacks": 26441, + "sacram": 13334, + "sacramento": 16065, + "sacred": 40612, + "sacred": 12477, + "sacri": 15283, + "sacrif": 12117, + "sacrific": 16919, + "sacrifice": 12556, + "sacrificed": 31116, + "sacrifices": 28858, + "sacrificing": 48146, + "sad": 2810, + "sad": 3719, + "saddened": 27720, + "saddest": 34925, + "saddle": 30469, + "saddle": 20283, + "sade": 27429, + "sadh": 40955, + "sadi": 22207, + "sadie": 30333, + "sadiq": 44107, + "sadler": 45600, + "sadly": 11603, + "sadness": 20399, + "sae": 38633, + "sae": 34883, + "saeed": 29745, + "saf": 2125, + "saf": 25760, + "safar": 23443, + "safari": 14091, + "safarilive": 34816, + "safc": 27998, + "safe": 2901, + "safe": 2996, + "safeguard": 42249, + "safeguarding": 47451, + "safely": 11513, + "safer": 40124, + "safer": 15504, + "safest": 38973, + "safety": 19050, + "safety": 3406, + "safetyfirst": 43608, + "saffron": 27529, + "sag": 6609, + "sag": 30048, + "saga": 15758, + "sagan": 37193, + "sagar": 42518, + "sage": 25800, + "sage": 7509, + "sages": 25979, + "sagin": 47097, + "sagitt": 44685, + "sagu": 44708, + "sah": 30943, + "sah": 26342, + "saha": 36062, + "sahara": 24599, + "saharan": 44255, + "sahi": 24608, + "sahib": 34150, + "sai": 16048, + "sai": 10886, + "said": 40319, + "said": 1946, + "saif": 44164, + "saig": 36328, + "saigon": 41081, + "sail": 7528, + "sail": 12156, + "sailed": 43047, + "sailing": 11003, + "sailor": 28002, + "sailor": 16076, + "sailormoon": 40673, + "sailors": 25355, + "sails": 27526, + "sain": 21226, + "sain": 40378, + "sains": 24860, + "sainsbury": 45879, + "sainsburys": 36934, + "saint": 11274, + "saint": 5599, + "saints": 8769, + "saintsfc": 31102, + "sair": 46600, + "sair": 30971, + "saire": 28087, + "saison": 33256, + "sait": 48008, + "saj": 33580, + "sak": 11511, + "sak": 35900, + "saka": 33609, + "sake": 12874, + "sakh": 43945, + "saki": 40514, + "saku": 37550, + "sakura": 24162, + "sal": 980, + "sal": 6126, + "sala": 17300, + "salaam": 46773, + "salad": 6188, + "salads": 30948, + "salah": 22516, + "salam": 19007, + "salam": 33963, + "salamat": 44696, + "salami": 46885, + "salaries": 33132, + "salary": 16312, + "salazar": 45988, + "sale": 17786, + "sale": 1690, + "saleh": 38353, + "salem": 48194, + "salem": 16884, + "sales": 13347, + "sales": 3765, + "salesforce": 22680, + "salesman": 37633, + "salford": 25629, + "sali": 15411, + "salim": 42760, + "salinas": 41990, + "saline": 46918, + "salis": 20667, + "salis": 39378, + "salisbury": 24763, + "sall": 27122, + "sall": 20883, + "salle": 23738, + "sally": 29542, + "sally": 13349, + "salman": 13754, + "salman": 16219, + "salmankhan": 15177, + "salmon": 37040, + "salmon": 9137, + "salom": 38268, + "salon": 33916, + "salon": 11105, + "saloon": 26038, + "sals": 16307, + "salsa": 16442, + "salt": 12763, + "salt": 6611, + "salted": 26313, + "saltlife": 47809, + "salts": 40559, + "saltwater": 43616, + "salty": 20678, + "salu": 31711, + "salud": 46867, + "salut": 44998, + "salute": 44908, + "salute": 9747, + "salutes": 32762, + "salv": 8299, + "salvador": 20874, + "salvage": 33131, + "salvation": 19534, + "salvatore": 38772, + "salz": 33594, + "salzburg": 43396, + "sam": 1644, + "sam": 3730, + "sama": 19272, + "samanth": 11465, + "samantha": 15466, + "samanthap": 38266, + "samanthaprabhu": 38643, + "samar": 21820, + "samaritan": 45495, + "samba": 37190, + "same": 23062, + "same": 2208, + "samheughan": 36255, + "sami": 48400, + "sami": 24322, + "sammy": 31091, + "sammy": 16758, + "samo": 30006, + "samoa": 34932, + "samp": 31225, + "sample": 9542, + "sampler": 40629, + "samples": 13387, + "sampling": 19522, + "sampson": 39983, + "sams": 44667, + "samson": 34659, + "samsun": 47875, + "samsung": 35369, + "samsung": 8115, + "samu": 7646, + "samuel": 30612, + "samuel": 12787, + "samurai": 21739, + "san": 1489, + "san": 2223, + "sana": 19434, + "sanantonio": 34714, + "sanat": 29091, + "sanatomy": 36052, + "sanc": 7398, + "sance": 15930, + "sanchez": 13971, + "sanctioned": 43032, + "sanctions": 17790, + "sanctu": 12712, + "sanctuary": 14044, + "sand": 2147, + "sand": 5094, + "sandal": 36445, + "sandal": 42185, + "sandals": 20731, + "sandalwood": 47502, + "sandeep": 46973, + "sander": 34111, + "sanders": 10429, + "sanderson": 36198, + "sandi": 44249, + "sandiego": 45997, + "sandiego": 15793, + "sandman": 45730, + "sando": 35921, + "sandoval": 44157, + "sandra": 33733, + "sandra": 13415, + "sandro": 42389, + "sands": 5936, + "sandstone": 36796, + "sandwich": 17050, + "sandwich": 8687, + "sandwiches": 19667, + "sandy": 29679, + "sandy": 10355, + "sane": 23419, + "sanford": 32330, + "sanfrancisco": 20254, + "sang": 13235, + "sang": 11684, + "sange": 12466, + "sangria": 42665, + "sani": 39137, + "sani": 34492, + "sanitary": 33842, + "sanitation": 25414, + "saniti": 43987, + "sanity": 30517, + "sanjay": 31712, + "sanjay": 25796, + "sanje": 40405, + "sanjose": 45971, + "sank": 43692, + "sano": 34053, + "sans": 16982, + "sansk": 39689, + "sanskrit": 48083, + "sant": 8356, + "sant": 23120, + "santa": 22175, + "santa": 4555, + "santac": 28876, + "santam": 45627, + "santana": 27033, + "santander": 46476, + "santi": 13856, + "santiago": 16568, + "santo": 29631, + "santo": 18400, + "santor": 28448, + "santorini": 39573, + "santos": 16582, + "sany": 47679, + "sao": 28026, + "sap": 8089, + "sap": 11591, + "sapi": 40016, + "sapp": 13427, + "sapp": 40729, + "sapphire": 22044, + "sar": 1808, + "sar": 9424, + "sara": 37196, + "sara": 10063, + "sarab": 40716, + "sarac": 35722, + "sarah": 9086, + "sarah": 5327, + "saraj": 42592, + "sarajevo": 48211, + "saras": 20373, + "sarasota": 31990, + "sarato": 24845, + "saratoga": 29496, + "sarawak": 47331, + "sarcasm": 37246, + "sarcastic": 48639, + "sardar": 41786, + "sarde": 43925, + "sardin": 27383, + "sardinia": 41025, + "sare": 13051, + "saree": 30860, + "sargent": 34864, + "sari": 42327, + "sari": 20261, + "saries": 47586, + "sarkar": 30673, + "sarko": 33658, + "sarkodie": 42848, + "sarmy": 20954, + "sart": 33006, + "sary": 15398, + "sas": 3960, + "sas": 5235, + "sash": 35656, + "sasha": 46078, + "sasha": 20894, + "sasia": 44751, + "sask": 47091, + "sask": 30416, + "saskat": 17102, + "saskatchewan": 23899, + "saskatoon": 31128, + "sass": 31351, + "sassy": 20827, + "sat": 1382, + "sat": 3279, + "sata": 41520, + "satan": 19446, + "satanic": 38224, + "satchel": 45908, + "sate": 35749, + "satell": 9031, + "satellite": 10316, + "satellites": 28483, + "sath": 29675, + "sathletics": 30154, + "sati": 7038, + "satin": 21803, + "sation": 23674, + "sations": 31232, + "satire": 29875, + "satis": 9906, + "satisf": 22941, + "satisfaction": 19925, + "satisfied": 18101, + "satisfy": 29444, + "satisfying": 23755, + "sato": 34376, + "satu": 45283, + "satur": 1634, + "saturated": 32466, + "saturday": 12537, + "saturday": 1748, + "saturdaymorning": 29053, + "saturdaymotivation": 40843, + "saturdays": 18930, + "saturn": 17312, + "saty": 39426, + "sau": 2096, + "sau": 19455, + "sauce": 5520, + "saucer": 42272, + "sauces": 40367, + "saucy": 46684, + "saudi": 24511, + "saudi": 8548, + "saudiarabia": 28680, + "sauer": 46333, + "saul": 47623, + "saul": 23252, + "sault": 40361, + "sauna": 35460, + "saunders": 23794, + "saur": 13227, + "saura": 46532, + "saurus": 22118, + "saus": 36121, + "sausage": 11855, + "sausages": 31593, + "sauté": 36290, + "sautéed": 38517, + "sauvi": 30116, + "sauvignon": 32745, + "sav": 2248, + "sav": 26533, + "sava": 40198, + "savag": 43039, + "savage": 11859, + "savannah": 18662, + "save": 5895, + "save": 2673, + "saved": 7137, + "saveour": 33390, + "saver": 20987, + "savers": 31416, + "saves": 12907, + "savethe": 18031, + "savi": 14721, + "saving": 28498, + "saving": 6979, + "savings": 10651, + "savior": 24762, + "saviour": 35800, + "savor": 48071, + "savory": 32992, + "savoury": 49071, + "savoy": 39552, + "savvy": 29278, + "saw": 12429, + "saw": 2425, + "sawa": 39613, + "sawards": 29012, + "sawyer": 27726, + "sax": 14169, + "sax": 23766, + "saxon": 31856, + "saxophon": 43760, + "saxophone": 32296, + "say": 3047, + "say": 1451, + "saya": 35170, + "sayang": 46322, + "sayers": 44116, + "sayin": 23662, + "saying": 4455, + "says": 1563, + "saz": 35577, + "sb": 5576, + "sb": 4977, + "sba": 44970, + "sback": 43840, + "sband": 27539, + "sbaseball": 46491, + "sbball": 39190, + "sbc": 31404, + "sberg": 20358, + "sbi": 41369, + "sbk": 39211, + "sboro": 18909, + "sbridge": 49228, + "sbs": 18883, + "sbu": 48075, + "sbu": 46281, + "sburg": 7390, + "sburgh": 48205, + "sbury": 14081, + "sby": 26519, + "sby": 10287, + "sc": 663, + "sc": 3219, + "sca": 11001, + "scab": 31716, + "scaf": 28981, + "scafe": 45574, + "scaffolding": 41687, + "scal": 10859, + "scala": 37997, + "scalable": 44084, + "scale": 37817, + "scale": 5879, + "scaled": 41923, + "scales": 22891, + "scaling": 29116, + "scallo": 19936, + "scallop": 39544, + "scallops": 31430, + "scalp": 38898, + "scam": 17620, + "scam": 13215, + "scamp": 28451, + "scams": 34395, + "scan": 10650, + "scan": 11261, + "scanada": 27121, + "scand": 8110, + "scandal": 35420, + "scandal": 11622, + "scandals": 45490, + "scandin": 32014, + "scandinavian": 35661, + "scanned": 43719, + "scanner": 24185, + "scanning": 24092, + "scans": 31251, + "scap": 35883, + "scape": 36005, + "scape": 12314, + "scapes": 31933, + "scar": 4171, + "scar": 18088, + "scarborough": 24254, + "scarce": 38572, + "scarcity": 45812, + "scare": 33536, + "scare": 15920, + "scarec": 38814, + "scarecrow": 46504, + "scared": 9870, + "scares": 34096, + "scarf": 13365, + "scari": 27050, + "scariest": 37213, + "scarlet": 20389, + "scarlett": 28325, + "scars": 20747, + "scarves": 29249, + "scary": 9250, + "scat": 13899, + "scattered": 22090, + "scavenger": 36778, + "scc": 19458, + "scd": 48422, + "scen": 2204, + "scenario": 20456, + "scenarios": 31346, + "scence": 33418, + "scene": 3562, + "scenery": 16025, + "scenes": 5415, + "scenic": 15394, + "scent": 36277, + "scent": 7683, + "scented": 27190, + "scenter": 23059, + "scentre": 39371, + "scents": 26336, + "scep": 24439, + "scfc": 38578, + "sch": 844, + "sch": 7542, + "scha": 42809, + "schaf": 45588, + "schaft": 41010, + "schal": 35568, + "schalke": 41029, + "schallenge": 43665, + "schan": 31328, + "schar": 15085, + "schat": 31842, + "schau": 35830, + "sche": 3038, + "sche": 7289, + "schedu": 4207, + "schedule": 5521, + "scheduled": 10986, + "schedules": 28986, + "scheduling": 32216, + "scheer": 26776, + "schel": 39881, + "schel": 38569, + "schem": 17720, + "scheme": 9024, + "schemes": 22958, + "schen": 22738, + "scher": 21925, + "scher": 21299, + "schi": 13731, + "schi": 24984, + "schicago": 46230, + "schiff": 39431, + "schild": 32148, + "schiz": 33230, + "schizoph": 40004, + "schizophre": 41163, + "schle": 32022, + "schmid": 17375, + "schmidt": 18463, + "schnau": 45745, + "schnei": 19941, + "schneider": 22972, + "schnit": 40903, + "scho": 2493, + "schoice": 23860, + "schol": 4498, + "scholar": 7192, + "scholar": 12830, + "scholarly": 41065, + "scholars": 13818, + "scholarship": 9070, + "scholarships": 17866, + "scholastic": 35743, + "schoo": 20721, + "school": 6063, + "school": 1228, + "schooled": 44722, + "schoolers": 31455, + "schooling": 28608, + "schools": 3513, + "schre": 47685, + "schri": 25453, + "schro": 32381, + "schu": 11318, + "schubert": 46939, + "schul": 14945, + "schultz": 30308, + "schulz": 39572, + "schumacher": 39208, + "schumer": 25313, + "schur": 42475, + "schwab": 47602, + "schwar": 13985, + "schwartz": 30617, + "schwarz": 27074, + "schwarzenegger": 33860, + "schwe": 25324, + "sci": 2267, + "sci": 8309, + "sciart": 31704, + "scicom": 28606, + "scicomm": 29573, + "scien": 39261, + "science": 10201, + "science": 2497, + "sciencefiction": 39170, + "sciences": 11481, + "scienti": 4338, + "scientific": 9750, + "scientist": 11083, + "scientists": 8045, + "sciento": 36193, + "scientology": 44694, + "scifi": 41862, + "scifi": 12230, + "scion": 47208, + "sciss": 25667, + "scissors": 30867, + "sciutto": 44392, + "sclerosis": 39446, + "sclub": 20017, + "sco": 1065, + "sco": 4763, + "scoe": 31164, + "scol": 13599, + "scoll": 44895, + "scollege": 39536, + "scom": 26407, + "scon": 17163, + "scon": 29272, + "scones": 36443, + "sconf": 39704, + "scoo": 14199, + "scooby": 34469, + "scoop": 13829, + "scoops": 41360, + "scope": 7979, + "scopes": 30328, + "scopic": 23869, + "scopy": 20018, + "scor": 8442, + "score": 12067, + "score": 4431, + "scoreboard": 30104, + "scorecard": 38128, + "scored": 6143, + "scoreless": 33469, + "scorer": 16572, + "scorers": 26699, + "scores": 7039, + "scoring": 9198, + "scorpi": 15445, + "scorpio": 34331, + "scorpion": 28461, + "scorpions": 45401, + "scorsese": 45975, + "scot": 2496, + "scot": 9271, + "scotch": 16687, + "scoti": 46446, + "scotia": 27859, + "scotland": 29174, + "scotland": 4203, + "scots": 17260, + "scotsman": 39612, + "scott": 7775, + "scott": 3664, + "scotti": 6227, + "scottish": 18039, + "scottish": 7442, + "scottsdale": 27817, + "scotty": 39697, + "scotty": 26836, + "scotus": 21720, + "scou": 44909, + "scoun": 16110, + "scouncil": 48787, + "scountry": 40432, + "scour": 46172, + "scout": 32213, + "scout": 10786, + "scouting": 19072, + "scouts": 14837, + "scow": 27929, + "scowboys": 31386, + "scp": 45030, + "scr": 36131, + "scra": 11187, + "scrabble": 39488, + "scram": 17289, + "scramble": 32688, + "scrambled": 39026, + "scran": 41774, + "scranton": 45274, + "scrap": 27950, + "scrap": 21695, + "scrapbook": 48733, + "scrapped": 43325, + "scraps": 40809, + "scrat": 9572, + "scratch": 13258, + "scratched": 48831, + "scratches": 46556, + "scratching": 44617, + "scre": 1795, + "scream": 31645, + "scream": 13239, + "screamed": 35427, + "screaming": 12891, + "screams": 23989, + "screen": 5351, + "screen": 3750, + "screened": 31450, + "screening": 6688, + "screenings": 27655, + "screenplay": 30058, + "screens": 12689, + "screenshot": 20637, + "screenshot": 12646, + "screenshots": 26783, + "screenshotsaturday": 21406, + "screenwriter": 37293, + "screenwriting": 35465, + "screw": 25529, + "screw": 14225, + "screwdriver": 48748, + "screwed": 30592, + "screws": 38292, + "scri": 2139, + "scrib": 34259, + "scribe": 36228, + "scribed": 38334, + "scricket": 45947, + "scrim": 21978, + "scrimmage": 25216, + "scrip": 11955, + "script": 8374, + "scripted": 40513, + "scription": 26604, + "scriptions": 39512, + "scripts": 20109, + "scripture": 27186, + "scro": 30768, + "scroll": 24160, + "scrolling": 28889, + "scrolls": 38113, + "scroo": 42263, + "scru": 7589, + "scrub": 23432, + "scrubs": 37919, + "scrum": 29047, + "scrump": 39791, + "scrumptious": 40987, + "scrutiny": 34305, + "scs": 26853, + "sct": 39284, + "scu": 8181, + "scu": 32135, + "scuba": 39053, + "scuba": 20559, + "scubadiving": 49046, + "scue": 25955, + "scul": 4948, + "scully": 36598, + "sculp": 6093, + "sculpt": 45044, + "sculpted": 41296, + "sculpting": 44389, + "sculptor": 29409, + "sculpture": 8757, + "sculptures": 20378, + "scum": 29655, + "scumb": 44525, + "scup": 21506, + "scur": 32742, + "scwx": 41966, + "scy": 27471, + "sd": 3080, + "sd": 4159, + "sda": 25548, + "sdale": 12327, + "sday": 5902, + "sday": 1376, + "sdays": 14491, + "sdc": 40992, + "sdcc": 13246, + "sden": 17241, + "sdf": 34681, + "sdg": 20177, + "sdgs": 16261, + "sdk": 40015, + "sdlive": 34561, + "sdn": 41925, + "sdsu": 41284, + "se": 567, + "se": 611, + "sea": 5970, + "sea": 2102, + "seab": 15728, + "seabir": 42558, + "seac": 35626, + "seaf": 9336, + "seafood": 12472, + "seag": 15730, + "seagu": 38076, + "seagull": 38858, + "seagulls": 42215, + "seahawks": 15341, + "seal": 21381, + "seal": 10159, + "sealed": 13358, + "sealing": 42992, + "seals": 18179, + "seam": 13710, + "seam": 44201, + "seaman": 47513, + "seamless": 29373, + "seamus": 40175, + "sean": 11406, + "sean": 6077, + "seanhannity": 43316, + "seap": 29983, + "seaport": 46418, + "sear": 1612, + "search": 23129, + "search": 1920, + "searched": 28961, + "searches": 26378, + "searching": 10626, + "seared": 29727, + "sears": 26693, + "seas": 7329, + "seas": 9556, + "seascape": 42593, + "seaside": 18867, + "season": 19288, + "season": 1367, + "seasonal": 14215, + "seasoned": 28399, + "seasoning": 43439, + "seasons": 8635, + "seat": 19670, + "seat": 4922, + "seated": 23953, + "seater": 37543, + "seating": 16240, + "seats": 6944, + "seattle": 24388, + "seattle": 6274, + "seau": 32263, + "seaw": 32658, + "seaweed": 30204, + "seaworld": 27422, + "seb": 35766, + "seb": 25171, + "sebasti": 10324, + "sebastian": 43792, + "sebastian": 13181, + "sebring": 41086, + "sec": 2875, + "sec": 5338, + "seca": 37847, + "secco": 27394, + "sece": 46297, + "seclu": 42392, + "secon": 1846, + "second": 9329, + "second": 2241, + "secondary": 13107, + "seconds": 6541, + "secre": 2460, + "secret": 20710, + "secret": 4145, + "secretari": 29515, + "secretariat": 31767, + "secretary": 6552, + "secretly": 21400, + "secrets": 9735, + "secs": 28665, + "sect": 15772, + "section": 34986, + "section": 4853, + "sectional": 21876, + "sections": 20061, + "sector": 6579, + "sectors": 22173, + "secu": 4894, + "secular": 47483, + "secular": 27560, + "secur": 2557, + "secure": 44763, + "secure": 7515, + "secured": 16848, + "secures": 31567, + "securing": 24759, + "securities": 25080, + "security": 31245, + "security": 2741, + "sed": 14034, + "sed": 1252, + "sedan": 24237, + "sedg": 46926, + "sedge": 45288, + "sedi": 29269, + "sedly": 31771, + "sedona": 46862, + "seduc": 19933, + "seductive": 43721, + "see": 1751, + "see": 862, + "seed": 14064, + "seed": 6488, + "seeded": 33688, + "seeding": 40050, + "seedlings": 47933, + "seeds": 9128, + "seeing": 3214, + "seek": 8839, + "seeker": 28011, + "seekers": 20732, + "seeking": 8592, + "seeks": 12594, + "seem": 20043, + "seem": 7523, + "seemed": 17240, + "seemingly": 25917, + "seems": 4453, + "seen": 36273, + "seen": 2041, + "seer": 32486, + "sees": 7594, + "seeyou": 41279, + "sef": 27453, + "seg": 10551, + "sega": 16122, + "segment": 15615, + "segments": 43053, + "segreg": 49117, + "segregation": 39086, + "segu": 33156, + "segun": 43087, + "seh": 27536, + "seh": 41430, + "sehun": 17705, + "sei": 13130, + "sei": 15907, + "sein": 24669, + "seine": 41378, + "seinfeld": 33706, + "seis": 25559, + "seismic": 38459, + "seiz": 22171, + "seize": 26624, + "seized": 15826, + "seizure": 36804, + "seizures": 47199, + "sek": 45515, + "sek": 25880, + "sel": 1000, + "sel": 4098, + "sela": 47006, + "selamat": 37692, + "selangor": 44402, + "selby": 43546, + "selca": 38606, + "selcaday": 35924, + "seldom": 48322, + "sele": 29137, + "selec": 3014, + "select": 8690, + "selected": 6881, + "selecting": 32696, + "selection": 6724, + "selections": 24099, + "selective": 28686, + "selects": 32902, + "selen": 19970, + "selena": 14677, + "selenagomez": 27653, + "seley": 30556, + "self": 10139, + "self": 1322, + "selfcare": 39560, + "selfi": 3007, + "selfie": 26735, + "selfie": 3666, + "selfies": 46058, + "selfies": 10050, + "selfish": 26907, + "selfless": 34236, + "sell": 10279, + "sell": 5119, + "seller": 11779, + "sellers": 16562, + "selling": 4396, + "sells": 14306, + "selma": 36652, + "sels": 42070, + "selves": 4505, + "sely": 8402, + "sem": 8645, + "sem": 17106, + "sema": 31816, + "seman": 29119, + "seman": 28378, + "semana": 41780, + "semb": 36054, + "seme": 10855, + "sement": 10714, + "sements": 31449, + "semester": 11905, + "semi": 11023, + "semi": 6684, + "semic": 26967, + "semicon": 34315, + "semiconduc": 35646, + "semiconductor": 43551, + "semifinal": 22935, + "semifinals": 21863, + "semin": 5595, + "seminar": 7269, + "seminars": 34870, + "seminary": 31655, + "seminole": 42956, + "semis": 24013, + "semit": 22628, + "semite": 23721, + "semitic": 34894, + "semitism": 25911, + "semper": 47391, + "sen": 1057, + "sen": 2249, + "sena": 21584, + "senate": 30703, + "senate": 6843, + "senator": 20871, + "senator": 8495, + "senators": 16889, + "send": 27684, + "send": 3625, + "sending": 6985, + "sends": 10817, + "sene": 25269, + "seneca": 33419, + "senegal": 28255, + "senew": 49313, + "seng": 43022, + "seng": 29971, + "senior": 19865, + "senior": 3415, + "seniors": 8138, + "senna": 36195, + "senpai": 46562, + "sens": 5218, + "sens": 22837, + "sensation": 19383, + "sensational": 23051, + "sense": 29162, + "sense": 4747, + "sensei": 36158, + "senses": 21809, + "sensi": 38802, + "sensible": 30635, + "sensing": 29236, + "sensiti": 20531, + "sensitive": 13734, + "sensitivity": 27788, + "sensor": 15330, + "sensors": 20356, + "sensory": 21831, + "sensu": 28157, + "sensual": 40860, + "sent": 6200, + "sent": 3676, + "sentence": 12737, + "sentenced": 17773, + "sentences": 25858, + "sentencing": 34394, + "senti": 19042, + "sentim": 25102, + "sentiment": 25949, + "sentimental": 40070, + "sentiments": 47450, + "sentin": 20042, + "sentinel": 23123, + "senting": 3924, + "seo": 24743, + "seo": 8622, + "seok": 34697, + "seok": 22482, + "seokjin": 45584, + "seoul": 13253, + "sep": 3212, + "sep": 10434, + "separ": 6859, + "separate": 13886, + "separated": 22163, + "separately": 41904, + "separates": 45365, + "separati": 39377, + "separating": 43480, + "separation": 22007, + "sephora": 38414, + "sepsis": 40205, + "sept": 5380, + "septe": 3672, + "september": 3707, + "septic": 34690, + "sepul": 47360, + "seq": 44379, + "sequ": 5491, + "seque": 44662, + "sequel": 15701, + "sequence": 18833, + "sequences": 47306, + "sequencing": 33484, + "sequo": 32781, + "sequoia": 42404, + "ser": 803, + "ser": 2771, + "sera": 28250, + "serbia": 19038, + "serbian": 33687, + "sere": 35770, + "seren": 7880, + "serena": 19519, + "serenawilliams": 48316, + "serendip": 45805, + "serendipity": 49386, + "serene": 28269, + "serenity": 24187, + "serge": 13477, + "serge": 35700, + "sergeant": 22049, + "sergei": 39870, + "sergey": 35390, + "sergi": 47675, + "sergio": 18359, + "seri": 2763, + "seri": 37509, + "serial": 14216, + "serie": 19752, + "seriea": 32660, + "series": 1857, + "serious": 47421, + "serious": 4770, + "seriously": 4885, + "sermon": 24884, + "sero": 48883, + "serpent": 37084, + "serpent": 35364, + "serra": 39851, + "serrano": 44236, + "sers": 13509, + "serum": 25385, + "serv": 1297, + "serv": 24571, + "servant": 20810, + "servants": 29652, + "serve": 39202, + "serve": 2838, + "served": 4740, + "server": 36458, + "server": 8398, + "serverless": 49243, + "servers": 22262, + "serves": 9915, + "servic": 27115, + "service": 21496, + "service": 2086, + "serviced": 44687, + "services": 3100, + "servicing": 41300, + "serving": 5722, + "sery": 14279, + "ses": 23708, + "ses": 1386, + "sesame": 21706, + "sese": 37128, + "sesh": 24274, + "session": 2550, + "sessions": 6327, + "set": 7965, + "set": 1167, + "setback": 43605, + "seth": 20005, + "seth": 11870, + "sethu": 38933, + "setlist": 33141, + "seton": 43799, + "sets": 4650, + "sett": 4984, + "sett": 17567, + "sette": 14613, + "setter": 23153, + "settes": 44145, + "setti": 45170, + "setting": 5264, + "settings": 18628, + "settle": 15075, + "settled": 18310, + "settlement": 16494, + "settlements": 36605, + "settlers": 35671, + "settles": 41498, + "settling": 22036, + "setup": 11092, + "seu": 31539, + "seul": 48975, + "seum": 18838, + "seun": 24209, + "seung": 32393, + "seung": 33711, + "seungri": 41627, + "seuss": 34441, + "sev": 26585, + "sev": 37600, + "seva": 42604, + "seve": 21458, + "seve": 22468, + "sevel": 17439, + "seven": 7874, + "seven": 5757, + "sevens": 29911, + "sevent": 43048, + "seventeen": 19337, + "seventh": 17568, + "seventy": 47170, + "sever": 3250, + "sever": 45557, + "several": 5560, + "severance": 26194, + "severe": 6215, + "severely": 24417, + "severn": 34626, + "severy": 34207, + "sevilla": 24947, + "seville": 34988, + "sew": 28640, + "sewage": 32777, + "sewer": 28294, + "sewing": 15974, + "sewn": 42118, + "sex": 3548, + "sex": 5937, + "sexi": 20562, + "sexiest": 25426, + "sexism": 32059, + "sexist": 33047, + "sexu": 14741, + "sexual": 6749, + "sexuality": 21244, + "sexually": 23032, + "sexy": 21019, + "sexy": 38127, + "sey": 6317, + "sey": 2258, + "seychel": 36809, + "seychelles": 38519, + "seye": 35604, + "seym": 22657, + "seymour": 25850, + "seys": 15081, + "sez": 42377, + "señ": 43368, + "sf": 4435, + "sf": 4915, + "sfa": 32675, + "sfam": 37649, + "sfb": 27930, + "sfc": 14129, + "sfest": 49024, + "sff": 42056, + "sfgiants": 20923, + "sfield": 11801, + "sfo": 39182, + "sfootball": 45259, + "sfor": 9115, + "sford": 28917, + "sforsale": 28888, + "sfw": 18073, + "sfx": 37995, + "sg": 9599, + "sg": 7611, + "sga": 33049, + "sgate": 27558, + "sgh": 47590, + "sgo": 5393, + "sgo": 21044, + "sgt": 13748, + "sh": 552, + "sh": 849, + "sha": 1514, + "sha": 3337, + "shaa": 44221, + "shab": 8323, + "shabbat": 38042, + "shabby": 28838, + "shack": 23866, + "shack": 18785, + "shad": 3182, + "shad": 23874, + "shade": 34554, + "shade": 10097, + "shaded": 43506, + "shades": 46608, + "shades": 9270, + "shadesof": 45180, + "shading": 37348, + "shado": 9325, + "shadow": 15243, + "shadow": 7068, + "shadowhun": 19931, + "shadowhunters": 24834, + "shadowing": 46092, + "shadows": 12971, + "shady": 22158, + "shaf": 12032, + "shaft": 21545, + "shag": 22439, + "shaggy": 42662, + "shah": 13203, + "shah": 8439, + "shahe": 23643, + "shaheed": 30060, + "shaheer": 43969, + "shahi": 46972, + "shahid": 25696, + "shahid": 27138, + "shahidkapoor": 29892, + "shahzad": 45915, + "shai": 47941, + "shaikh": 45712, + "shail": 37603, + "shair": 43135, + "shak": 8385, + "shake": 8206, + "shake": 8251, + "shaken": 38237, + "shaker": 26210, + "shakers": 38411, + "shakes": 19668, + "shakespe": 9890, + "shakespeare": 22499, + "shakespeare": 12488, + "shakespearesunday": 32320, + "shaking": 19101, + "shakira": 40795, + "shakti": 48593, + "shakti": 32458, + "shakur": 48915, + "shal": 15056, + "shal": 28175, + "shale": 32864, + "shall": 4742, + "shallow": 23730, + "shalom": 31339, + "sham": 6453, + "sham": 9005, + "shaman": 48727, + "shambles": 40799, + "shame": 14776, + "shame": 7593, + "shameful": 28283, + "shameless": 25380, + "shaming": 40553, + "shampoo": 23944, + "shamrock": 34199, + "shan": 5171, + "shan": 8834, + "shana": 44835, + "shand": 29101, + "shane": 26863, + "shane": 11572, + "shang": 11141, + "shanghai": 12742, + "shani": 46665, + "shank": 24685, + "shankar": 24108, + "shann": 9932, + "shannon": 22842, + "shannon": 13581, + "shant": 36610, + "shap": 5581, + "shape": 26925, + "shape": 6448, + "shaped": 10127, + "shapes": 15377, + "shaping": 18632, + "shapiro": 32110, + "shaq": 46402, + "shaq": 26843, + "shar": 1669, + "shar": 36542, + "shara": 48849, + "sharapo": 36489, + "sharapova": 36671, + "shard": 42207, + "share": 7585, + "share": 1978, + "shared": 5368, + "shareholder": 38241, + "shareholders": 34778, + "sharepoint": 39213, + "shares": 4974, + "sharethe": 49277, + "shareyour": 45890, + "shari": 27738, + "shari": 47390, + "sharia": 37244, + "sharif": 15501, + "sharing": 3567, + "sharjah": 33420, + "shark": 15836, + "shark": 7980, + "sharks": 10047, + "sharkweek": 39571, + "sharma": 10105, + "sharon": 28722, + "sharon": 14138, + "sharp": 17126, + "sharp": 8157, + "sharpe": 34374, + "sharpen": 41465, + "sharpie": 46858, + "sharply": 37185, + "shasta": 46727, + "shat": 12169, + "shat": 44388, + "shatter": 45008, + "shattered": 26820, + "shau": 13750, + "shaun": 23446, + "shaun": 16669, + "shav": 11410, + "shave": 17735, + "shaved": 25571, + "shaving": 24261, + "shaw": 6122, + "shaw": 6805, + "shawa": 46413, + "shawl": 35132, + "shawn": 16677, + "shawn": 10970, + "shawnee": 48060, + "shawnmendes": 27277, + "shawty": 38026, + "shay": 10778, + "shay": 18361, + "shaykh": 47223, + "shaz": 18618, + "shazam": 29063, + "shc": 43419, + "shd": 37729, + "she": 1729, + "she": 1043, + "shea": 20407, + "shead": 44287, + "shead": 20434, + "shealth": 41743, + "shealth": 22197, + "shear": 27974, + "shear": 32108, + "shearer": 40505, + "sheath": 45637, + "shed": 16586, + "shed": 1492, + "shedding": 33608, + "sheds": 25921, + "shee": 23450, + "shee": 34321, + "sheed": 26105, + "sheehan": 41809, + "sheen": 25025, + "sheep": 23604, + "sheep": 9629, + "sheer": 17577, + "sheeran": 18561, + "sheet": 7298, + "sheets": 12744, + "shef": 8237, + "sheff": 38844, + "sheff": 43821, + "sheffiel": 26940, + "sheffield": 41763, + "sheffield": 10420, + "sheffieldissuper": 33628, + "sheh": 31667, + "sheikh": 15031, + "sheil": 42765, + "sheila": 25734, + "shek": 33285, + "shel": 3159, + "shelby": 36906, + "shelby": 16885, + "sheldon": 25079, + "shelf": 10955, + "shell": 23374, + "shell": 6648, + "shelley": 22497, + "shelling": 43166, + "shells": 19265, + "shelly": 37461, + "shelter": 8599, + "sheltered": 48070, + "shelters": 24312, + "shelton": 24471, + "shelves": 16225, + "shem": 40299, + "shen": 10154, + "shen": 31098, + "shenan": 20965, + "shenando": 44666, + "shenanigans": 26590, + "shenko": 39751, + "shenmue": 48279, + "shenzhen": 38970, + "shep": 33757, + "shep": 44857, + "shepard": 26810, + "shepher": 11008, + "shepherd": 13242, + "shepherds": 42792, + "sheppard": 37304, + "sher": 3570, + "sher": 4510, + "sheraton": 39400, + "shere": 21507, + "sheri": 9235, + "sheridan": 27085, + "sheriff": 10309, + "sherlock": 17294, + "sherman": 17822, + "sherry": 44348, + "sherry": 24689, + "shers": 14141, + "sherwood": 24527, + "sheryl": 39773, + "shes": 45514, + "shes": 2502, + "shet": 15850, + "shetland": 29595, + "shetty": 25533, + "shev": 45182, + "sheva": 45132, + "shh": 35025, + "shhh": 36932, + "shi": 823, + "shi": 3533, + "shia": 23791, + "shibu": 36177, + "shibuya": 41623, + "shie": 26638, + "shiel": 33413, + "shield": 8670, + "shields": 19085, + "shies": 35312, + "shif": 35317, + "shift": 43767, + "shift": 6905, + "shifted": 34429, + "shifter": 48944, + "shifting": 21992, + "shifts": 23957, + "shik": 36980, + "shil": 14370, + "shill": 32121, + "shill": 30090, + "shilpa": 47062, + "shilpa": 40690, + "shim": 11986, + "shim": 32780, + "shima": 14382, + "shimano": 48904, + "shimi": 40517, + "shimmer": 38792, + "shin": 5664, + "shin": 11784, + "shinde": 41516, + "shine": 17582, + "shine": 3780, + "shinee": 19660, + "shines": 16015, + "shing": 38641, + "shing": 1743, + "shining": 10485, + "shino": 43074, + "shiny": 12190, + "ship": 7645, + "ship": 1158, + "shipment": 28553, + "shipp": 34709, + "shipped": 15279, + "shippers": 44789, + "shipping": 5721, + "ships": 3262, + "shipwreck": 48878, + "shipy": 26828, + "shipyard": 31273, + "shir": 1956, + "shiraz": 35618, + "shire": 11975, + "shire": 2968, + "shirehour": 32456, + "shirley": 18189, + "shiro": 26048, + "shirt": 27576, + "shirt": 2523, + "shirtless": 28959, + "shirts": 5803, + "shistory": 34979, + "shiv": 18042, + "shiv": 37121, + "shiva": 33881, + "shiva": 21174, + "shka": 38944, + "shld": 49359, + "shma": 48074, + "shment": 8802, + "shments": 18822, + "sho": 719, + "sho": 13756, + "shock": 19617, + "shock": 8736, + "shocked": 15787, + "shocker": 37971, + "shockey": 22258, + "shocking": 13394, + "shocks": 31886, + "shoe": 16308, + "shoe": 7342, + "shoes": 49391, + "shoes": 4079, + "shol": 21472, + "sholm": 44139, + "shome": 42701, + "shon": 19526, + "shon": 37621, + "shone": 47173, + "shoo": 1975, + "shook": 20730, + "shoops": 29956, + "shoot": 12531, + "shoot": 3704, + "shooter": 13645, + "shooters": 31902, + "shooting": 3992, + "shootings": 26753, + "shootout": 20666, + "shoots": 14144, + "shop": 5738, + "shop": 1557, + "shopify": 47949, + "shoplocal": 21775, + "shopp": 38486, + "shoppe": 38236, + "shopped": 28088, + "shopper": 24346, + "shoppers": 22316, + "shopping": 42101, + "shopping": 4266, + "shops": 6467, + "shopsmall": 35942, + "shor": 3209, + "shore": 14717, + "shore": 5928, + "shored": 33140, + "shoreditch": 35042, + "shoreline": 34807, + "shores": 18102, + "short": 6803, + "short": 3005, + "shortage": 19910, + "shortages": 38730, + "shortcuts": 45793, + "shorten": 41711, + "shorter": 20350, + "shortest": 33717, + "shortfilm": 37204, + "shorth": 37397, + "shortlist": 28163, + "shortlisted": 20631, + "shortly": 11967, + "shorts": 9680, + "shorty": 33502, + "shot": 9805, + "shot": 2000, + "shotel": 42365, + "shotgun": 21643, + "shots": 5342, + "shou": 3890, + "shoul": 29847, + "should": 14947, + "should": 1535, + "shoulder": 8476, + "shoulders": 18738, + "shouldn": 9416, + "shour": 20025, + "shouse": 28671, + "shout": 7335, + "shout": 5214, + "shouted": 44397, + "shouting": 26464, + "shoutout": 8274, + "shouts": 26709, + "shovel": 31778, + "show": 2133, + "show": 1080, + "showbiz": 34156, + "showcas": 14290, + "showcase": 7265, + "showcased": 35786, + "showcases": 26266, + "showcasing": 17036, + "showdown": 15576, + "showed": 7150, + "shower": 7777, + "showers": 9893, + "showing": 3649, + "shown": 8506, + "showroom": 16821, + "shows": 2665, + "showtime": 40576, + "showtime": 15442, + "showyour": 46733, + "shp": 38341, + "shq": 21145, + "shr": 10118, + "shra": 21360, + "shradd": 28172, + "shraddha": 35208, + "shraddhakapoor": 40385, + "shre": 12101, + "shred": 19756, + "shred": 33017, + "shredded": 31772, + "shredding": 45534, + "shree": 37410, + "shrek": 35009, + "shrews": 26411, + "shrewsbury": 30921, + "shri": 8838, + "shri": 11424, + "shrimp": 12727, + "shrin": 24865, + "shrine": 16156, + "shrink": 34957, + "shrinking": 41243, + "shrm": 44163, + "shro": 15259, + "shroff": 32081, + "shrop": 22630, + "shropshire": 26344, + "shru": 14911, + "shrub": 41464, + "shrubs": 47975, + "shrun": 46767, + "shs": 16184, + "sht": 44210, + "shti": 38927, + "shu": 2872, + "shu": 17651, + "shua": 33771, + "shub": 40552, + "shud": 45782, + "shuff": 42641, + "shuffle": 21681, + "shui": 45473, + "shuk": 29927, + "shukla": 46829, + "shul": 30721, + "shum": 37383, + "shun": 24479, + "shun": 39594, + "shur": 41032, + "shut": 8702, + "shut": 8282, + "shutdown": 16051, + "shutout": 24385, + "shuts": 28313, + "shutt": 31866, + "shutter": 36235, + "shutter": 33902, + "shutters": 46894, + "shutting": 31383, + "shuttle": 15842, + "shwar": 41640, + "shy": 22678, + "shy": 9682, + "si": 564, + "si": 2990, + "sia": 2357, + "siam": 29686, + "siam": 48248, + "siamese": 43161, + "sian": 28510, + "sian": 6221, + "sians": 26583, + "sias": 28645, + "siber": 22206, + "siberia": 39969, + "siberian": 34058, + "sibl": 14338, + "sible": 14507, + "sibling": 43060, + "sibling": 23779, + "siblings": 17156, + "sic": 8278, + "sic": 1118, + "sica": 34125, + "sical": 33875, + "sichuan": 48950, + "sicilian": 45292, + "sicily": 23179, + "sick": 11143, + "sick": 5359, + "sickest": 47972, + "sickle": 41459, + "sickness": 28898, + "sics": 26297, + "sid": 10117, + "sid": 15119, + "sidd": 19842, + "siddi": 35227, + "side": 5869, + "side": 1145, + "sided": 21061, + "sidekick": 44683, + "sidel": 43557, + "sideline": 32056, + "sidelines": 31046, + "sider": 30581, + "siders": 41249, + "sides": 7578, + "sideshow": 46789, + "sidewalk": 23278, + "sidewalks": 43583, + "sideways": 35593, + "siding": 38758, + "sidney": 22598, + "sie": 8533, + "sie": 5685, + "sieg": 49203, + "siege": 18460, + "siegel": 48559, + "siem": 18434, + "siemens": 30147, + "siempre": 44030, + "siena": 33336, + "sienna": 40373, + "sier": 10028, + "sier": 7444, + "sierra": 13552, + "siers": 35923, + "sies": 16367, + "siest": 18323, + "sif": 29300, + "sig": 872, + "sig": 19145, + "sigh": 36303, + "sigh": 15505, + "sighs": 44579, + "sight": 16897, + "sight": 6329, + "sighted": 33034, + "sighting": 17507, + "sightings": 30004, + "sights": 17364, + "sightseeing": 34210, + "sigma": 45075, + "sigma": 15697, + "sign": 5538, + "sign": 2292, + "signage": 21156, + "signal": 10781, + "signaling": 38492, + "signalling": 48426, + "signals": 17150, + "signation": 24347, + "signature": 9189, + "signatures": 21865, + "signed": 3163, + "signee": 39778, + "signi": 34023, + "signific": 6374, + "significance": 23769, + "significant": 8735, + "significantly": 16187, + "signing": 4401, + "signingday": 40282, + "signings": 27731, + "signs": 4659, + "signup": 40791, + "sigue": 49401, + "sii": 36672, + "sik": 19974, + "sik": 22413, + "sika": 31144, + "sikh": 21829, + "sikhs": 45426, + "sil": 1556, + "sil": 8315, + "sila": 41754, + "sile": 37620, + "silen": 39048, + "silence": 8462, + "silenced": 45415, + "silent": 30352, + "silent": 8487, + "silently": 42640, + "silhou": 20589, + "silhouette": 26149, + "silic": 23830, + "silicon": 32412, + "silicon": 17888, + "silicone": 28221, + "silk": 25891, + "silk": 9743, + "silky": 29554, + "sill": 42468, + "sill": 48024, + "silly": 11883, + "silon": 31841, + "sils": 39708, + "silva": 16489, + "silve": 37697, + "silver": 7525, + "silver": 3467, + "silverado": 46160, + "silverstone": 29666, + "silvia": 37289, + "sim": 5026, + "sim": 10740, + "sima": 35871, + "simba": 39492, + "simcoe": 47148, + "sime": 28329, + "simi": 38073, + "simil": 7202, + "similar": 8547, + "similarities": 34716, + "simm": 13001, + "simmons": 14699, + "simo": 37171, + "simon": 8796, + "simon": 6668, + "simona": 46277, + "simone": 19062, + "simons": 33097, + "simp": 2542, + "simple": 19018, + "simple": 4129, + "simpler": 35489, + "simplest": 39588, + "simpli": 16868, + "simplicity": 21262, + "simplified": 36647, + "simplify": 35479, + "simply": 25637, + "simply": 6151, + "simpson": 41805, + "simpson": 11750, + "simpsons": 21092, + "sims": 14021, + "simul": 9845, + "simulated": 46395, + "simulation": 18610, + "simulator": 20821, + "simultaneous": 48816, + "simultaneously": 28575, + "sin": 1303, + "sin": 3421, + "sina": 19541, + "sinai": 33226, + "sinatra": 27262, + "sinc": 30464, + "since": 1855, + "sincere": 24513, + "sincere": 24886, + "sincerely": 25673, + "sinclair": 23100, + "sind": 39598, + "sind": 30877, + "sindh": 20754, + "sindia": 48038, + "sine": 22741, + "sine": 33793, + "sinfo": 47178, + "sing": 1387, + "sing": 1197, + "singapo": 27861, + "singapore": 28879, + "singapore": 6754, + "singer": 33880, + "singer": 5108, + "singers": 15613, + "singersongwriter": 44585, + "singh": 19445, + "singh": 5715, + "singing": 5864, + "single": 19524, + "single": 2688, + "singles": 12025, + "singleton": 46247, + "singly": 16619, + "sings": 13635, + "singul": 34003, + "singular": 44009, + "singularity": 48410, + "sinha": 29416, + "sini": 41781, + "sini": 26319, + "sinister": 31313, + "sink": 37232, + "sink": 14551, + "sinking": 27949, + "sinks": 32710, + "sinn": 36315, + "sinner": 45380, + "sinners": 43436, + "sino": 29759, + "sins": 9345, + "sinthe": 30737, + "sinu": 37351, + "sinus": 47535, + "sio": 10807, + "siob": 40954, + "siology": 46315, + "sion": 5676, + "sion": 1015, + "sional": 14533, + "sionally": 30754, + "sions": 4060, + "sioux": 44695, + "sioux": 24954, + "sip": 16096, + "sipping": 28527, + "sir": 10708, + "sir": 3846, + "sire": 28450, + "siren": 33026, + "sirens": 35907, + "siri": 13986, + "siri": 18394, + "sirius": 23574, + "sirius": 34999, + "siriusxm": 29833, + "sirloin": 46828, + "sis": 18132, + "sis": 2580, + "sisd": 27132, + "sisi": 37892, + "siss": 42929, + "sissy": 27564, + "sist": 20520, + "sista": 37448, + "sister": 17417, + "sister": 3677, + "sisterhood": 37313, + "sisters": 6404, + "sit": 7387, + "sit": 4037, + "sitcom": 30426, + "site": 26792, + "site": 1988, + "sites": 7236, + "sith": 41499, + "sito": 42613, + "sits": 12726, + "sitt": 42988, + "sitter": 40777, + "sittin": 40887, + "sitting": 4919, + "situ": 5562, + "situ": 42536, + "situated": 22030, + "situation": 7144, + "situations": 19096, + "sity": 38177, + "sity": 5477, + "siu": 40174, + "sium": 8090, + "sius": 27595, + "siva": 20991, + "sivan": 36931, + "sive": 23572, + "sive": 1875, + "sively": 10343, + "siveness": 39667, + "sives": 23896, + "sivity": 42738, + "siwon": 29055, + "six": 5968, + "six": 4093, + "sixers": 25941, + "sixteen": 28677, + "sixth": 12909, + "sixties": 44948, + "sixty": 32588, + "siya": 44440, + "size": 38377, + "size": 3235, + "sized": 9832, + "sizes": 10253, + "sizing": 28330, + "sizz": 23778, + "sizzle": 47890, + "sizzling": 35799, + "sj": 7536, + "sj": 16010, + "sjo": 42012, + "sk": 909, + "sk": 2058, + "ska": 7495, + "skag": 31948, + "skan": 46772, + "skar": 27587, + "skar": 26835, + "skate": 13740, + "skate": 12745, + "skateboard": 31777, + "skateboarding": 31352, + "skater": 30337, + "skaters": 39824, + "skates": 31479, + "skc": 44551, + "ske": 6261, + "ske": 25516, + "skel": 36564, + "skelet": 27075, + "skeletal": 37369, + "skeleton": 20062, + "skeletons": 48874, + "skell": 40801, + "skep": 27772, + "skeptical": 44934, + "sker": 37640, + "sker": 33600, + "sket": 3744, + "sketch": 11767, + "sketch": 5269, + "sketchbook": 18899, + "sketched": 38581, + "sketches": 17622, + "sketching": 23228, + "sketchy": 41582, + "skey": 37453, + "ski": 3327, + "ski": 3428, + "skid": 36574, + "skid": 32099, + "skier": 42585, + "skies": 7244, + "skiing": 14400, + "skil": 24543, + "skill": 15598, + "skill": 10604, + "skilled": 17535, + "skillet": 40568, + "skills": 4113, + "skim": 33191, + "skin": 5821, + "skin": 3575, + "skincare": 12648, + "skine": 37300, + "sking": 46215, + "skinned": 42199, + "skinner": 30261, + "skinny": 42729, + "skinny": 15457, + "skins": 11594, + "skip": 39793, + "skip": 14296, + "skipped": 40639, + "skipper": 22226, + "skipping": 34867, + "skir": 8919, + "skirt": 12386, + "skirts": 24840, + "skis": 32843, + "skit": 43573, + "skitchen": 42820, + "skittles": 43213, + "sko": 15141, + "sko": 23493, + "skoda": 38668, + "skool": 26743, + "skril": 43149, + "skrillex": 43651, + "sks": 48136, + "sku": 10836, + "skul": 17561, + "skull": 34068, + "skull": 12092, + "skulls": 31804, + "skunk": 42194, + "sky": 3075, + "sky": 2390, + "skybet": 45540, + "skye": 21475, + "skyl": 43554, + "skylar": 45411, + "skyline": 14606, + "skymap": 41734, + "skynews": 40977, + "skype": 17069, + "skyrim": 33693, + "skysports": 39845, + "skysports": 46725, + "skywalker": 32936, + "sl": 2621, + "sl": 7489, + "sla": 2725, + "sla": 26707, + "slab": 24241, + "slabs": 42818, + "slack": 37108, + "slack": 30142, + "slade": 33546, + "slain": 35972, + "slalom": 43540, + "slam": 14891, + "slam": 10131, + "slammed": 29772, + "slams": 18907, + "slan": 44663, + "slan": 47193, + "sland": 11294, + "slang": 33655, + "slap": 48830, + "slap": 21751, + "slapped": 38861, + "slaps": 46796, + "slash": 19749, + "slat": 38966, + "slate": 17919, + "slated": 36094, + "slater": 25968, + "slaugh": 26782, + "slaughter": 19815, + "slaughtered": 46615, + "slav": 47292, + "slava": 41797, + "slave": 14029, + "slavery": 15754, + "slaves": 23833, + "slaw": 28178, + "slay": 48319, + "slay": 19380, + "slayed": 44870, + "slayer": 21605, + "slaying": 27812, + "slays": 45648, + "slc": 21972, + "sle": 1709, + "sleague": 23336, + "sled": 28438, + "sledge": 48750, + "slee": 17642, + "slee": 38977, + "sleek": 23187, + "sleep": 4656, + "sleep": 3840, + "sleeper": 28709, + "sleeping": 6982, + "sleepless": 39779, + "sleepover": 39415, + "sleeps": 16610, + "sleepy": 32572, + "sleepy": 14497, + "sleet": 36948, + "sleeve": 35270, + "sleeve": 10536, + "sleeveless": 38049, + "sleeves": 19691, + "sleg": 47650, + "sleigh": 30865, + "slender": 40331, + "slept": 20388, + "sler": 14066, + "sley": 17198, + "sley": 6496, + "sli": 1811, + "sli": 44824, + "slic": 19692, + "slice": 13431, + "sliced": 28121, + "slices": 28424, + "slick": 18341, + "slide": 27828, + "slide": 8837, + "slider": 37861, + "sliders": 40700, + "slides": 15939, + "slideshow": 42817, + "sliding": 21468, + "slife": 15448, + "sliga": 21080, + "slight": 14297, + "slightly": 8456, + "sligo": 30424, + "slike": 38744, + "slim": 35226, + "slim": 12364, + "slime": 29107, + "sling": 28021, + "sling": 32607, + "slinger": 47269, + "slions": 43363, + "slip": 39785, + "slip": 12105, + "slipknot": 41816, + "slipped": 30344, + "slipper": 39644, + "slippers": 26509, + "slippery": 30814, + "slipping": 36301, + "slips": 30632, + "slist": 33749, + "slit": 47011, + "slive": 31652, + "slo": 4303, + "slo": 36083, + "sloan": 29110, + "sloane": 41553, + "slogan": 23398, + "slogans": 42795, + "slope": 22769, + "slopes": 24066, + "sloppy": 36154, + "slot": 14500, + "sloth": 30007, + "slots": 19238, + "slou": 48493, + "slovak": 23315, + "slovakia": 25994, + "sloven": 17018, + "slovenia": 21037, + "slow": 6674, + "slow": 5444, + "slowdown": 38421, + "slowed": 43793, + "slower": 29181, + "slowing": 29839, + "slowly": 9568, + "slows": 46855, + "slp": 45599, + "slr": 21325, + "sls": 33651, + "slt": 39283, + "sltd": 36388, + "slu": 7224, + "slu": 47456, + "slug": 34190, + "slugger": 48671, + "slum": 46754, + "slumber": 44295, + "slump": 35588, + "slur": 30476, + "slush": 39815, + "slv": 45526, + "sly": 28145, + "sly": 21062, + "sm": 978, + "sm": 2764, + "sma": 4357, + "sma": 11854, + "smack": 21280, + "smack": 30026, + "smackdown": 26138, + "smafia": 47686, + "smag": 32212, + "smal": 48379, + "small": 5244, + "small": 2442, + "smallbiz": 41724, + "smallbiz": 18987, + "smallbusiness": 21316, + "smalle": 18490, + "smaller": 12431, + "smallest": 18686, + "smalls": 41696, + "sman": 9612, + "smar": 3201, + "smart": 5383, + "smart": 4115, + "smartcities": 34822, + "smartcity": 33973, + "smarter": 18990, + "smartest": 37092, + "smarthome": 47726, + "smartphone": 11290, + "smartphones": 22212, + "smartwatch": 35798, + "smash": 17258, + "smash": 10332, + "smashbros": 44897, + "smashed": 18410, + "smashes": 45657, + "smashing": 19632, + "smatter": 16537, + "smb": 30446, + "smc": 31375, + "smc": 28312, + "smd": 34582, + "sme": 11758, + "sme": 15650, + "smear": 37546, + "smel": 28476, + "smell": 9688, + "smelling": 32493, + "smells": 14668, + "smelly": 46145, + "smen": 15961, + "smer": 48526, + "smere": 39629, + "smes": 26141, + "smg": 46876, + "smh": 9623, + "smi": 5655, + "smi": 40049, + "smil": 33937, + "smile": 27641, + "smile": 3490, + "smiled": 34362, + "smiles": 8726, + "smiley": 22925, + "smiling": 9200, + "smir": 24667, + "smith": 10527, + "smith": 2915, + "smiths": 27872, + "smithson": 25372, + "smithsonian": 31209, + "smm": 19510, + "smma": 42370, + "smo": 2513, + "smo": 13437, + "smobile": 38923, + "smog": 44425, + "smoke": 20381, + "smoke": 6664, + "smoked": 11161, + "smoker": 32348, + "smokers": 29571, + "smokes": 40336, + "smokey": 23670, + "smokin": 32825, + "smoking": 9038, + "smoky": 25549, + "smol": 29939, + "smol": 40403, + "smoo": 5430, + "smooth": 10958, + "smooth": 8990, + "smoother": 44271, + "smoothie": 16668, + "smoothies": 34458, + "smoothly": 32380, + "smore": 48323, + "smp": 32260, + "smriti": 49227, + "sms": 10409, + "smt": 26672, + "smtown": 26072, + "smu": 10878, + "smu": 30458, + "smug": 41021, + "smugg": 28130, + "smuggling": 34146, + "smur": 24708, + "smusic": 19191, + "smw": 44929, + "smx": 46699, + "smy": 14381, + "smyth": 44822, + "sn": 1672, + "sn": 5844, + "sna": 4032, + "snack": 47548, + "snack": 10039, + "snacking": 46474, + "snacks": 12349, + "snag": 34789, + "snag": 28043, + "snagged": 48534, + "snail": 23132, + "snails": 34928, + "snake": 30133, + "snake": 8798, + "snakes": 19605, + "snap": 4578, + "snap": 7404, + "snapback": 31234, + "snapchat": 7799, + "snapmatic": 45907, + "snapp": 10185, + "snapped": 15543, + "snapper": 31677, + "snapping": 31581, + "snaps": 16890, + "snapshot": 18243, + "snar": 30810, + "snare": 40651, + "snat": 18457, + "snatch": 35302, + "snatched": 44821, + "snation": 14362, + "snazzy": 48963, + "snc": 39918, + "sne": 3791, + "sne": 46503, + "sneak": 27871, + "sneak": 6917, + "sneaker": 31698, + "sneaker": 24781, + "sneakers": 17397, + "sneaking": 34633, + "sneakpeek": 47831, + "sneaks": 40926, + "sneaky": 21293, + "snee": 42095, + "snell": 46410, + "sner": 31424, + "snes": 26667, + "snews": 18623, + "snf": 47651, + "sng": 41549, + "snhl": 43093, + "sni": 7186, + "sni": 35570, + "snickers": 49127, + "sniff": 37841, + "snip": 42954, + "sniper": 22157, + "snippet": 37531, + "snippets": 44001, + "snl": 16011, + "sno": 8567, + "sno": 17802, + "snoo": 11352, + "snooker": 25657, + "snoop": 44503, + "snoop": 27754, + "snoopdogg": 48388, + "snoopy": 41967, + "snooze": 40718, + "snor": 16590, + "snoring": 44560, + "snorkel": 44285, + "snorkeling": 48103, + "snow": 3880, + "snow": 2583, + "snowball": 39254, + "snowboard": 33403, + "snowboarding": 32397, + "snowday": 37982, + "snowden": 32154, + "snowdon": 47107, + "snowdonia": 36088, + "snowed": 45073, + "snowfall": 21714, + "snowflake": 33447, + "snowflakes": 38618, + "snowing": 21443, + "snowman": 22668, + "snowstorm": 38777, + "snowy": 14191, + "snp": 15301, + "sns": 36343, + "snsd": 27961, + "snt": 34834, + "snu": 9694, + "snuck": 36522, + "snug": 45169, + "snuggle": 31327, + "snuggles": 48165, + "sny": 17526, + "snyder": 22106, + "snz": 37678, + "so": 759, + "so": 706, + "soa": 39584, + "soak": 24839, + "soaked": 26592, + "soaking": 26750, + "soap": 26086, + "soap": 11088, + "soaps": 40958, + "soar": 48997, + "soar": 22241, + "soaring": 27968, + "soars": 41348, + "sob": 24900, + "sob": 35507, + "sobbing": 36691, + "sober": 30969, + "sober": 24487, + "sobre": 42768, + "sobri": 49308, + "sobs": 43636, + "soc": 3253, + "soc": 7741, + "soca": 49239, + "socal": 46470, + "socal": 20450, + "soccer": 16268, + "soccer": 4233, + "socceroos": 41997, + "socent": 30831, + "sochi": 21014, + "soci": 1720, + "social": 4803, + "social": 2346, + "socialism": 23372, + "socialist": 18450, + "socialists": 43839, + "socially": 24555, + "socialmedi": 23813, + "socialmedia": 9600, + "socialmediamarketing": 31790, + "societal": 40058, + "societies": 25855, + "society": 3757, + "socio": 44319, + "socio": 42790, + "sociology": 32373, + "sock": 29801, + "sock": 18277, + "socket": 28657, + "socks": 8774, + "socorro": 46409, + "socute": 45086, + "sod": 31435, + "soda": 13533, + "sodium": 29070, + "soe": 44136, + "soe": 25498, + "soever": 34024, + "sof": 1571, + "sof": 41187, + "sofa": 15723, + "soff": 35290, + "soff": 30684, + "sofficial": 20563, + "sofi": 41537, + "sofia": 18914, + "sofinstagram": 17301, + "soft": 12778, + "soft": 3773, + "softball": 8369, + "softer": 44462, + "softhe": 23127, + "softly": 34958, + "software": 35941, + "software": 5847, + "softwitter": 11311, + "sog": 44775, + "soggy": 41168, + "sohn": 49267, + "soho": 47749, + "soho": 17592, + "soi": 40495, + "soil": 33417, + "soil": 9216, + "soils": 34891, + "soir": 43427, + "sok": 43456, + "sol": 1175, + "sol": 9941, + "sola": 40086, + "solace": 42567, + "solar": 16990, + "solar": 5199, + "solareclipse": 44727, + "sold": 33116, + "sold": 3939, + "soldi": 5098, + "soldier": 9355, + "soldiers": 7547, + "sole": 10519, + "sole": 8576, + "soleil": 33148, + "solely": 27913, + "solent": 47783, + "soles": 22682, + "soli": 3911, + "solic": 19369, + "solicitor": 45647, + "solicitors": 46000, + "solid": 30626, + "solid": 6148, + "solidar": 10415, + "solidarity": 10983, + "solidi": 46136, + "solids": 49070, + "solihull": 45293, + "solit": 37039, + "solitaire": 47257, + "solitary": 33094, + "solitude": 33199, + "solo": 17626, + "solo": 5797, + "soloist": 46391, + "solom": 15768, + "solomon": 19785, + "solos": 44868, + "solst": 20298, + "solstice": 21359, + "solu": 2487, + "solution": 4575, + "solutions": 5140, + "solve": 8917, + "solved": 13451, + "solves": 42740, + "solving": 15581, + "som": 734, + "som": 10672, + "soma": 36170, + "somal": 40281, + "somali": 26231, + "somalia": 17051, + "somaliland": 43315, + "some": 1132, + "some": 836, + "somebody": 8305, + "someday": 17127, + "somehow": 11735, + "someone": 2100, + "somer": 9656, + "somerhalder": 33990, + "somerset": 14926, + "somerville": 41409, + "somes": 38124, + "somethin": 33541, + "something": 28316, + "something": 2006, + "sometime": 21464, + "sometimes": 4237, + "somewhat": 17864, + "somewhere": 8119, + "somm": 42726, + "somme": 30625, + "sommer": 44954, + "somos": 24951, + "son": 1176, + "son": 825, + "sona": 21249, + "sonam": 40096, + "sonar": 48235, + "sonata": 37009, + "sone": 29599, + "song": 6868, + "song": 2295, + "songs": 4641, + "songwriter": 13034, + "songwriters": 39583, + "songwriting": 33567, + "songz": 49302, + "soni": 34899, + "soni": 35911, + "sonia": 20409, + "sonic": 23785, + "sonic": 9132, + "sonics": 48511, + "sonja": 46102, + "sonline": 23412, + "sonny": 43000, + "sonny": 20880, + "sono": 44109, + "sonom": 48596, + "sonoma": 26269, + "sons": 5502, + "sonsof": 46676, + "sont": 31063, + "sonthe": 40923, + "sony": 16042, + "sony": 8748, + "sonya": 39172, + "soo": 5517, + "soo": 8602, + "soom": 39771, + "soon": 27559, + "soon": 1745, + "sooner": 18968, + "sooners": 30449, + "sooo": 11526, + "soooo": 13658, + "sooooo": 21199, + "soooooo": 34859, + "soor": 46698, + "soothe": 44424, + "soothing": 27730, + "sop": 3974, + "sop": 19194, + "soph": 34963, + "sophi": 6192, + "sophia": 16790, + "sophie": 38648, + "sophie": 12357, + "sophistic": 17646, + "sophisticated": 20833, + "sophom": 13696, + "sophomore": 15242, + "sophomores": 47645, + "soprano": 28880, + "soproud": 44479, + "sor": 1852, + "sor": 16872, + "sora": 38719, + "sorbet": 39994, + "sore": 43330, + "sore": 15454, + "sored": 6731, + "soren": 38907, + "sorg": 28152, + "sori": 38588, + "sorority": 30059, + "soros": 33248, + "sorren": 44012, + "sorrow": 28020, + "sorrows": 47924, + "sorry": 25745, + "sorry": 3675, + "sorrynotsorry": 37105, + "sort": 8450, + "sorta": 34700, + "sorted": 13221, + "sorting": 19198, + "sorts": 12577, + "sory": 16257, + "sos": 25145, + "sos": 5792, + "sosa": 45433, + "sosfam": 47709, + "sot": 41542, + "sot": 34116, + "sothe": 32145, + "sotho": 45496, + "soto": 27947, + "sotto": 26047, + "sotu": 32286, + "sou": 1101, + "sou": 24293, + "sought": 18874, + "soul": 8701, + "soul": 3755, + "soulful": 30196, + "soulmate": 38130, + "souls": 10951, + "soun": 19474, + "sound": 5236, + "sound": 3608, + "soundcheck": 31394, + "soundcloud": 15190, + "sounded": 28287, + "sounders": 44933, + "sounding": 21351, + "sounds": 5694, + "soundtrack": 11389, + "soup": 7077, + "soups": 45052, + "sour": 2235, + "sour": 12049, + "source": 23698, + "source": 3634, + "sourced": 23340, + "sources": 5124, + "sourcing": 19574, + "sourdough": 29921, + "souri": 11674, + "sous": 32093, + "sousa": 46296, + "sout": 38156, + "sout": 32732, + "south": 2938, + "south": 2045, + "southafrica": 15184, + "southampton": 15767, + "southbank": 44173, + "southbound": 22932, + "southeast": 13942, + "southeastern": 26813, + "southend": 25583, + "souther": 33330, + "southern": 17704, + "southern": 5036, + "southgate": 47262, + "southkorea": 43552, + "southport": 37446, + "southside": 36436, + "southsudan": 30419, + "southwark": 39098, + "southwe": 46443, + "southwest": 13320, + "southwestern": 30157, + "souven": 20210, + "souvenir": 24811, + "souvenirs": 48460, + "souza": 29424, + "sov": 29737, + "sover": 31876, + "sovere": 17736, + "sovereign": 29418, + "sovereign": 26337, + "sovereignty": 31701, + "soviet": 14274, + "sow": 33089, + "sowe": 36130, + "soweto": 47070, + "sown": 49369, + "sox": 39556, + "sox": 8657, + "soy": 16524, + "soy": 15010, + "soybean": 34606, + "soybeans": 40840, + "soyu": 39578, + "soyuz": 43842, + "sp": 588, + "sp": 4393, + "spa": 7852, + "spa": 6692, + "spac": 10336, + "space": 7857, + "space": 2138, + "spacecraft": 25940, + "spaces": 9006, + "spaceship": 34317, + "spacex": 22511, + "spacey": 48770, + "spacious": 24769, + "spad": 45362, + "spade": 32562, + "spades": 48368, + "spaghetti": 18440, + "spain": 5083, + "spal": 26018, + "spam": 29712, + "spam": 14624, + "span": 4270, + "span": 14537, + "spandex": 41686, + "spani": 16721, + "spaniel": 35435, + "spanish": 29966, + "spanish": 6013, + "spann": 25323, + "spanning": 38638, + "spans": 45407, + "spaper": 34548, + "spar": 3378, + "spar": 34576, + "spare": 12615, + "spares": 39505, + "spark": 9555, + "spark": 11047, + "sparked": 32647, + "sparkle": 18287, + "sparkles": 36410, + "sparkling": 17893, + "sparkly": 30542, + "sparks": 15046, + "sparky": 47198, + "sparring": 42161, + "sparrow": 22888, + "spart": 10143, + "sparta": 38401, + "spartan": 26582, + "spartan": 24225, + "spartans": 20457, + "sparty": 36477, + "spas": 31714, + "spati": 19200, + "spatial": 22022, + "spaw": 31605, + "spawn": 29166, + "spay": 40634, + "spc": 20492, + "spca": 37018, + "spd": 37717, + "spd": 28307, + "spdwy": 45981, + "spe": 876, + "spe": 36676, + "speak": 20599, + "speak": 4208, + "speake": 46077, + "speaker": 25764, + "speaker": 4914, + "speakers": 7675, + "speaking": 3714, + "speaks": 5661, + "spear": 23277, + "spear": 30420, + "speare": 43859, + "spears": 20242, + "spec": 1711, + "spec": 18596, + "speci": 1969, + "special": 11422, + "special": 1689, + "specialist": 10630, + "specialists": 21719, + "speciality": 46904, + "specialized": 23265, + "specializes": 48533, + "specially": 4513, + "specials": 11983, + "specialty": 18262, + "species": 6330, + "specific": 10528, + "specifically": 17174, + "specification": 46394, + "specifications": 39705, + "specified": 48114, + "specimen": 30263, + "specimens": 42715, + "specs": 24093, + "spect": 3416, + "spectac": 7242, + "spectacle": 34342, + "spectacular": 8404, + "spectator": 32372, + "spectators": 39306, + "spective": 6633, + "spector": 48676, + "spectral": 45441, + "spectre": 35998, + "spectro": 27646, + "spectrum": 13532, + "specul": 19209, + "speculation": 30898, + "sped": 38813, + "spee": 4050, + "speech": 19556, + "speech": 4902, + "speeches": 25208, + "speechless": 23152, + "speed": 6860, + "speed": 4163, + "speeding": 27264, + "speeds": 22017, + "speedway": 11480, + "speedy": 21603, + "spel": 41887, + "spell": 22784, + "spell": 11230, + "spelled": 24339, + "spelling": 15614, + "spells": 25335, + "spelt": 38316, + "spen": 5087, + "spence": 33324, + "spencer": 27509, + "spencer": 10678, + "spend": 4664, + "spending": 5961, + "spends": 22508, + "spent": 4429, + "speople": 33035, + "sper": 8213, + "sper": 15313, + "sperm": 35781, + "sperson": 22687, + "spf": 34973, + "spg": 34623, + "sph": 28909, + "sph": 24684, + "sphe": 33691, + "spher": 18349, + "sphere": 6987, + "spheres": 37478, + "spheric": 21744, + "sphin": 39237, + "sphinx": 46487, + "spho": 20442, + "sphoto": 38594, + "sphy": 43808, + "spi": 3174, + "spi": 37080, + "spic": 17264, + "spice": 29761, + "spice": 10141, + "spiced": 24267, + "spicer": 37627, + "spices": 21194, + "spicy": 10915, + "spide": 36801, + "spider": 11963, + "spider": 7622, + "spiderman": 39808, + "spiderman": 18427, + "spiders": 23141, + "spidey": 41706, + "spie": 28573, + "spie": 28746, + "spied": 43998, + "spiegel": 45351, + "spiel": 28435, + "spiel": 37690, + "spielberg": 37569, + "spies": 25374, + "spieth": 43254, + "spike": 35306, + "spike": 15310, + "spiked": 47014, + "spikes": 29582, + "spil": 47765, + "spill": 43933, + "spill": 18006, + "spilled": 33206, + "spilling": 49006, + "spills": 35796, + "spin": 6288, + "spin": 9226, + "spinach": 14747, + "spinal": 23925, + "spine": 48221, + "spine": 19646, + "sping": 47113, + "spinner": 29924, + "spinning": 13987, + "spino": 40848, + "spinoff": 42513, + "spinrilla": 46064, + "spins": 27243, + "spion": 39604, + "spionage": 41838, + "spir": 3745, + "spiral": 19873, + "spiration": 38126, + "spire": 27439, + "spired": 40650, + "spires": 46938, + "spiri": 4024, + "spirit": 18224, + "spirit": 4071, + "spirited": 34701, + "spirits": 13192, + "spiritu": 7237, + "spiritual": 46076, + "spiritual": 9473, + "spirituality": 22165, + "spiro": 40085, + "spit": 18115, + "spit": 23177, + "spite": 26060, + "spitfire": 31126, + "spitting": 40721, + "spl": 2470, + "spl": 33052, + "spla": 4809, + "splac": 16059, + "splace": 38743, + "splash": 43641, + "splash": 11879, + "splat": 15733, + "splatoon": 22565, + "splay": 3169, + "splen": 18552, + "splend": 29861, + "splendid": 21016, + "splendor": 46262, + "splin": 38090, + "split": 25443, + "split": 9109, + "splits": 34897, + "splitting": 37210, + "splus": 40866, + "spn": 35467, + "spn": 19414, + "spnfamily": 38566, + "spo": 1261, + "spo": 21085, + "spock": 43918, + "spoil": 25600, + "spoiled": 21399, + "spoiler": 16512, + "spoilers": 18326, + "spoils": 42436, + "spoilt": 35358, + "spokane": 24528, + "spoke": 13890, + "spoke": 6518, + "spoken": 12979, + "spokesman": 31632, + "spokesperson": 26234, + "spol": 22476, + "spol": 8132, + "spoli": 34301, + "spolice": 37406, + "spon": 1715, + "spon": 48216, + "sponge": 22861, + "sponge": 24345, + "spongebob": 25089, + "spons": 5597, + "sponsor": 10424, + "sponsor": 7574, + "sponsored": 7197, + "sponsoring": 16181, + "sponsors": 11005, + "sponsorship": 17632, + "spontaneous": 32465, + "spoo": 11248, + "spooky": 15369, + "spool": 49152, + "spoon": 27001, + "spoon": 14024, + "spoons": 29661, + "spor": 1475, + "spor": 33746, + "sport": 4379, + "sport": 2364, + "sporting": 32620, + "sporting": 8944, + "sports": 6436, + "sports": 2054, + "sportsc": 40114, + "sportscar": 46931, + "sportscenter": 39157, + "sportsman": 39020, + "sportsmanship": 34858, + "sportsnet": 34144, + "sportswear": 39747, + "sporty": 33346, + "spot": 3223, + "spot": 3049, + "spotify": 7193, + "spotlight": 7901, + "spots": 7670, + "spotted": 4533, + "spotter": 30742, + "spotting": 15885, + "spouse": 24724, + "spout": 48993, + "spp": 47567, + "spr": 1536, + "spr": 19417, + "spra": 12966, + "spraw": 46590, + "spray": 37885, + "spray": 10449, + "sprayed": 40022, + "spraying": 39224, + "spre": 18740, + "spread": 20620, + "spread": 5284, + "spreading": 11821, + "spreads": 27579, + "spree": 21851, + "spri": 35498, + "spride": 26685, + "spring": 5166, + "spring": 2420, + "springbreak": 37753, + "springer": 30117, + "springfield": 16599, + "springs": 7308, + "springst": 32132, + "springsteen": 28367, + "springtime": 28285, + "springtraining": 49364, + "springwatch": 29239, + "sprink": 15817, + "sprinkle": 42897, + "sprinkler": 48754, + "sprinkles": 37326, + "sprint": 29248, + "sprint": 10751, + "sprinter": 36947, + "sprints": 36404, + "sprite": 32544, + "spro": 13902, + "spro": 37403, + "sproject": 37802, + "sproud": 37686, + "sprout": 35863, + "sprouts": 25756, + "spru": 17041, + "spruce": 23812, + "sprung": 32968, + "sps": 13869, + "spu": 23566, + "spun": 47922, + "spun": 32852, + "spur": 15206, + "spur": 20361, + "spurs": 10916, + "spursofficial": 45290, + "sput": 47521, + "spx": 20584, + "spy": 13861, + "spy": 6656, + "spyder": 39952, + "spying": 36227, + "sq": 9370, + "sq": 11590, + "sqft": 41912, + "sql": 42759, + "sql": 18938, + "sqm": 47978, + "sqn": 41209, + "squ": 1653, + "squad": 13892, + "squad": 4234, + "squadron": 18579, + "squads": 36590, + "square": 19314, + "square": 3999, + "squared": 32967, + "squares": 26972, + "squash": 13312, + "squat": 44628, + "squat": 30680, + "squats": 40213, + "sque": 9721, + "sque": 8097, + "squee": 14420, + "squeeze": 21684, + "squeezed": 40413, + "squid": 42057, + "squid": 22553, + "squir": 9683, + "squire": 48090, + "squirrel": 14004, + "squirrels": 26623, + "squish": 42607, + "squishy": 47001, + "sr": 3437, + "sr": 5428, + "srbachchan": 32353, + "src": 23445, + "sre": 17748, + "sri": 11051, + "sri": 9276, + "sridevi": 46301, + "srilan": 15559, + "srilanka": 16922, + "srin": 26818, + "srinagar": 33671, + "srini": 41899, + "sriracha": 42743, + "sris": 27851, + "srisri": 32966, + "srk": 44982, + "srk": 11216, + "srl": 33808, + "srp": 43004, + "srs": 41764, + "srsly": 44179, + "srt": 28139, + "sru": 44152, + "srugby": 40526, + "ss": 690, + "ss": 632, + "ssa": 6088, + "ssal": 31330, + "ssal": 35936, + "ssb": 37511, + "ssc": 21692, + "ssc": 20364, + "ssd": 23107, + "sse": 9030, + "sse": 8938, + "ssed": 38755, + "ssed": 1804, + "ssel": 17402, + "ssel": 19373, + "sseldorf": 47792, + "ssell": 42388, + "ssels": 8355, + "ssen": 39408, + "ssen": 22645, + "sser": 20445, + "sses": 1802, + "ssett": 44103, + "ssf": 33239, + "ssg": 40707, + "ssh": 48866, + "ssi": 834, + "ssi": 14953, + "ssia": 22238, + "ssian": 31218, + "ssible": 47099, + "ssic": 27774, + "ssic": 17077, + "ssie": 7572, + "ssier": 26422, + "ssil": 15026, + "ssin": 42660, + "ssing": 2112, + "ssion": 16050, + "ssion": 1627, + "ssional": 13727, + "ssionism": 24787, + "ssionist": 27682, + "ssions": 4137, + "ssive": 2734, + "ssively": 28060, + "ssl": 32195, + "ssler": 30287, + "ssly": 24904, + "ssn": 39116, + "ssnhq": 47998, + "sso": 25900, + "sso": 7914, + "ssoccer": 32546, + "sson": 36124, + "sson": 7271, + "ssor": 35152, + "ssp": 31101, + "ssr": 39880, + "sss": 11176, + "ssss": 30676, + "ssss": 15880, + "sssss": 24298, + "sst": 40396, + "ssu": 35351, + "ssummit": 49301, + "ssus": 31286, + "ssw": 36937, + "ssy": 22519, + "ssy": 8661, + "st": 522, + "st": 545, + "sta": 1363, + "sta": 2745, + "stab": 7726, + "stab": 29974, + "stabbed": 24534, + "stabbing": 25474, + "stabil": 42576, + "stabili": 23903, + "stability": 16716, + "stable": 44427, + "stable": 10492, + "stables": 34218, + "stac": 10175, + "stacey": 41653, + "stacey": 24262, + "stache": 23616, + "stack": 24723, + "stack": 11257, + "stacked": 24990, + "stacking": 39836, + "stacks": 24734, + "stacy": 26628, + "stad": 15832, + "stad": 16485, + "stade": 38198, + "stadi": 26587, + "stadion": 48815, + "stadium": 3390, + "stadiums": 38852, + "stadt": 22713, + "staf": 2367, + "staff": 31188, + "staff": 2813, + "staffer": 38494, + "staffers": 44994, + "staffing": 32932, + "stafford": 25006, + "staffordshire": 29198, + "staffs": 36098, + "stag": 12088, + "stag": 20277, + "stage": 23182, + "stage": 2170, + "staged": 19906, + "stages": 12297, + "staggering": 37315, + "staging": 27026, + "stagram": 19503, + "stags": 45936, + "stain": 3933, + "stain": 14603, + "stained": 13751, + "staining": 32523, + "stainless": 12320, + "stains": 32008, + "stair": 7240, + "stair": 17662, + "staircase": 22777, + "stairs": 9577, + "stairway": 45559, + "stak": 39144, + "stake": 15955, + "stake": 7937, + "stakeholder": 39122, + "stakeholders": 22968, + "stakes": 7519, + "staking": 47082, + "stal": 3861, + "stal": 5535, + "stale": 42471, + "stalert": 25450, + "stalin": 28346, + "stalk": 40826, + "stalk": 14878, + "stalker": 26777, + "stalking": 24721, + "stalks": 45886, + "stall": 24636, + "stall": 12058, + "stalled": 40362, + "stallion": 28273, + "stallions": 44787, + "stallone": 40969, + "stalls": 25427, + "stam": 4663, + "stamatic": 30904, + "stamford": 27843, + "stamina": 48753, + "stamp": 28694, + "stamp": 12771, + "stampcollecting": 42852, + "stamped": 38356, + "stampede": 25384, + "stamps": 13827, + "stan": 2203, + "stan": 2434, + "stana": 33311, + "stanbul": 11231, + "stance": 48900, + "stance": 3542, + "stances": 15054, + "stand": 1819, + "stand": 2087, + "standalone": 44887, + "standard": 35780, + "standard": 5807, + "standardi": 30247, + "standards": 9022, + "standby": 36184, + "standing": 39934, + "standing": 2862, + "standings": 19835, + "standoff": 31821, + "standout": 23131, + "standre": 48309, + "stands": 6446, + "standup": 35108, + "standup": 24964, + "standwith": 19540, + "stanford": 36219, + "stanford": 15087, + "stang": 12536, + "stani": 38228, + "stanis": 37711, + "stanley": 19048, + "stanley": 10079, + "stanleycup": 28662, + "stans": 26564, + "stant": 41576, + "stant": 4906, + "stanton": 25400, + "stap": 10438, + "staple": 22695, + "staples": 23646, + "stapleton": 45228, + "star": 993, + "star": 1565, + "starbuck": 48519, + "starbucks": 9499, + "starch": 47837, + "starcraft": 48871, + "stardom": 44616, + "stardust": 34337, + "stare": 18094, + "stared": 47772, + "stares": 37916, + "starfish": 44283, + "stargate": 41099, + "stargazing": 49328, + "staring": 13800, + "stark": 40446, + "stark": 15353, + "starlight": 32197, + "starling": 46205, + "starmagic": 48023, + "starplus": 37815, + "starr": 19186, + "starred": 24180, + "starrer": 41311, + "starring": 6660, + "starry": 30963, + "stars": 2895, + "starship": 37166, + "start": 17466, + "start": 1572, + "started": 2760, + "starter": 7800, + "starters": 22222, + "starting": 2530, + "startrek": 30642, + "startrek": 15349, + "starts": 3105, + "startu": 6996, + "startup": 18049, + "startup": 5882, + "startups": 9056, + "starve": 46957, + "starving": 30473, + "starwar": 17287, + "starwars": 26239, + "starwars": 7887, + "starz": 25928, + "stas": 19866, + "stash": 27711, + "stasy": 45942, + "stat": 3004, + "stat": 15216, + "state": 3492, + "state": 1295, + "statec": 33931, + "stated": 19629, + "statedept": 41458, + "statefair": 40305, + "statement": 5401, + "statements": 19513, + "staten": 38263, + "stateof": 35195, + "states": 22125, + "states": 4218, + "statesman": 35301, + "stateu": 44248, + "statewide": 29561, + "stati": 9622, + "static": 16363, + "stating": 35147, + "station": 13498, + "station": 2631, + "stationary": 29493, + "stationed": 47618, + "stationery": 33851, + "stations": 10051, + "statistical": 29349, + "statistics": 14165, + "stats": 7294, + "statu": 32481, + "statue": 8222, + "statues": 24363, + "status": 6414, + "stau": 28550, + "staur": 3709, + "stav": 20285, + "stax": 32235, + "stay": 4714, + "stay": 2277, + "stayed": 13805, + "staying": 8993, + "stays": 13311, + "staytuned": 39285, + "stc": 29859, + "std": 30477, + "ste": 795, + "ste": 2686, + "stea": 46614, + "stead": 16101, + "stead": 11031, + "steadily": 35049, + "steady": 12937, + "steak": 26955, + "steak": 8913, + "steakhouse": 35031, + "steaks": 30655, + "steal": 37070, + "steal": 10181, + "stealing": 14242, + "steals": 20224, + "stealth": 25327, + "steam": 10962, + "steam": 6972, + "steamboat": 41121, + "steamed": 29007, + "steamer": 49075, + "steaming": 43746, + "steampunk": 24130, + "steamy": 43104, + "stec": 46713, + "stech": 48949, + "stech": 32455, + "sted": 20426, + "sted": 1356, + "stee": 31793, + "steed": 48293, + "steel": 6938, + "steel": 4726, + "steele": 19460, + "steelers": 14430, + "steen": 42851, + "steen": 18625, + "steep": 28648, + "steep": 20714, + "steer": 27612, + "steering": 19833, + "stef": 29158, + "stefan": 15004, + "stefan": 18829, + "stefani": 38319, + "stefano": 30719, + "steff": 30075, + "stein": 13653, + "stein": 5818, + "steiner": 36314, + "stel": 9102, + "stel": 10798, + "stell": 22355, + "stella": 46178, + "stella": 17869, + "stellar": 13810, + "stellen": 42754, + "stem": 24342, + "stem": 6761, + "stemc": 40486, + "stems": 31503, + "sten": 7652, + "sten": 7877, + "stencil": 47854, + "stennis": 45636, + "step": 15572, + "step": 3348, + "steph": 3522, + "steph": 16251, + "stephan": 37312, + "stephani": 48121, + "stephanie": 14361, + "stephen": 10421, + "stephen": 6078, + "stephenking": 46361, + "stephens": 22256, + "stephenson": 37280, + "stepped": 18384, + "stepping": 15906, + "steps": 5408, + "ster": 1022, + "ster": 881, + "stere": 9229, + "stered": 6935, + "stereo": 15992, + "stereo": 17400, + "stereotypes": 27890, + "steria": 38804, + "stering": 14175, + "sterling": 45790, + "sterling": 9378, + "stern": 36254, + "stern": 2945, + "steroids": 37670, + "sterone": 39418, + "sters": 2132, + "stery": 24232, + "stest": 8556, + "stev": 11640, + "steve": 7412, + "steve": 3803, + "steven": 10973, + "steven": 8016, + "stevens": 13877, + "stevenson": 25091, + "stevie": 42104, + "stevie": 18969, + "stew": 17906, + "stewar": 28453, + "steward": 34980, + "steward": 43355, + "stewards": 49294, + "stewardship": 36720, + "stewart": 8120, + "stfu": 47000, + "stg": 48387, + "stgeorge": 43698, + "sth": 13456, + "sth": 34004, + "sthe": 16491, + "sthel": 42863, + "sti": 860, + "sti": 12439, + "stia": 26492, + "stible": 25835, + "stic": 5868, + "stic": 1561, + "stical": 16660, + "stically": 19041, + "stick": 5483, + "stick": 4987, + "sticker": 11270, + "stickers": 11613, + "sticking": 21021, + "sticks": 10016, + "sticky": 18887, + "stics": 5449, + "stie": 38164, + "stie": 11000, + "stier": 42069, + "sties": 16428, + "stiff": 43471, + "stiff": 21441, + "stig": 4088, + "stig": 42551, + "stigate": 15390, + "stigma": 20619, + "stik": 42247, + "stil": 21790, + "stil": 37519, + "stiles": 33028, + "still": 13209, + "still": 1170, + "stills": 20259, + "stim": 18269, + "stime": 24711, + "stimul": 16434, + "stimulate": 42380, + "stimulating": 41237, + "stimulation": 39530, + "stimulus": 47283, + "stin": 2588, + "stin": 4025, + "stina": 22359, + "stine": 7098, + "sting": 19868, + "sting": 1271, + "stingly": 49332, + "stingray": 43229, + "stink": 38213, + "stinky": 44957, + "stino": 40658, + "stint": 33531, + "stion": 10812, + "stip": 39869, + "stips": 44756, + "stique": 43305, + "stir": 12416, + "stir": 19564, + "stirling": 23128, + "stirring": 39205, + "stis": 45224, + "stit": 14110, + "stitch": 30003, + "stitch": 14771, + "stitched": 36540, + "stitcher": 48204, + "stitches": 32360, + "stitching": 45208, + "stitu": 14585, + "stitutes": 40479, + "stive": 22426, + "stix": 48829, + "stjohn": 36153, + "stl": 14179, + "stl": 12527, + "stlblues": 44138, + "stlcards": 28644, + "stle": 7698, + "stles": 48638, + "stlouis": 40358, + "stlouis": 39516, + "stm": 28333, + "stn": 27175, + "sto": 928, + "sto": 5723, + "stock": 5899, + "stock": 3206, + "stocked": 23552, + "stockholm": 16024, + "stocki": 42944, + "stocking": 17335, + "stockings": 28040, + "stockmarket": 40359, + "stockport": 35569, + "stocks": 9321, + "stockton": 26130, + "stoday": 22392, + "stok": 43782, + "stoke": 31338, + "stoke": 13550, + "stoked": 13160, + "stokes": 27512, + "stol": 11401, + "stol": 6700, + "stole": 10995, + "stolen": 8704, + "stolic": 45020, + "stom": 2343, + "stom": 38068, + "stoma": 43545, + "stomach": 14722, + "stomp": 40165, + "stomping": 46144, + "ston": 4101, + "ston": 1839, + "stone": 7694, + "stone": 2441, + "stoned": 36248, + "stonehenge": 42417, + "stoner": 35131, + "stoner": 29115, + "stones": 42659, + "stones": 6885, + "stonewall": 39688, + "stoney": 44198, + "stony": 41717, + "stony": 35691, + "stoo": 24505, + "stood": 9151, + "stool": 34413, + "stool": 22314, + "stop": 6005, + "stop": 1691, + "stopbrexit": 48680, + "stopp": 15738, + "stopped": 6015, + "stopper": 32147, + "stoppers": 34457, + "stopping": 10735, + "stops": 9822, + "stopthe": 26463, + "stor": 809, + "stor": 17740, + "storage": 6824, + "store": 17769, + "store": 2183, + "stored": 28257, + "stores": 6370, + "storey": 24025, + "storians": 34628, + "stories": 3784, + "storing": 40087, + "stork": 46452, + "storm": 7434, + "storm": 2819, + "stormed": 45939, + "stormhour": 12161, + "storming": 24842, + "storms": 6464, + "stormtrooper": 49218, + "stormy": 20075, + "stors": 7178, + "story": 6512, + "story": 1134, + "storyline": 37079, + "storymonth": 23717, + "storyteller": 35882, + "storytelling": 14457, + "storytime": 44197, + "stos": 19281, + "stou": 37168, + "stour": 37361, + "stour": 21928, + "stout": 16550, + "stove": 21423, + "stow": 44284, + "stow": 17046, + "stowe": 34196, + "stown": 28071, + "stown": 7939, + "stp": 30576, + "stpatrick": 21343, + "stpatricksday": 22747, + "str": 807, + "str": 15913, + "stra": 1894, + "stra": 6253, + "strack": 46861, + "strada": 31134, + "strade": 48968, + "straigh": 31016, + "straight": 22114, + "straight": 4241, + "strain": 16887, + "strains": 38067, + "strait": 22946, + "straits": 41984, + "stral": 23289, + "stralia": 42510, + "stran": 18411, + "strand": 18214, + "strand": 17826, + "stranded": 22975, + "strang": 11138, + "strange": 33380, + "strange": 7288, + "strangely": 37566, + "stranger": 35541, + "stranger": 14149, + "strangers": 20684, + "strangerthings": 43271, + "strangest": 46740, + "strap": 13946, + "strapped": 40922, + "straps": 31213, + "stras": 36814, + "stras": 42125, + "strasbourg": 39576, + "strat": 11345, + "strat": 32925, + "strata": 47278, + "strate": 3532, + "strate": 28758, + "strategi": 49102, + "strategic": 10246, + "strategically": 45706, + "strategies": 9942, + "strategist": 37180, + "strategy": 5637, + "strates": 45724, + "stratford": 23955, + "strath": 21997, + "stration": 3156, + "strato": 28878, + "strauss": 32033, + "strava": 34625, + "stravel": 43494, + "straw": 7430, + "straw": 16438, + "strawberries": 17796, + "strawberry": 10233, + "straws": 33048, + "stray": 30784, + "stray": 15712, + "stre": 1079, + "stre": 19652, + "stread": 27797, + "streak": 11749, + "streaks": 42092, + "stream": 8659, + "stream": 3322, + "streamed": 26280, + "streamer": 25178, + "streamers": 19937, + "streaming": 6278, + "streamline": 44917, + "streams": 13545, + "stree": 35082, + "stree": 32438, + "streep": 38701, + "street": 4839, + "street": 2012, + "streetart": 12948, + "streetcar": 34268, + "streetfood": 44486, + "streetphotography": 20786, + "streets": 6058, + "streetstyle": 39118, + "streetwear": 37298, + "strel": 39685, + "stren": 4349, + "streng": 4472, + "strength": 15475, + "strength": 5959, + "strengthen": 16318, + "strengthened": 47131, + "strengthening": 23475, + "strengthens": 40280, + "strengths": 29268, + "stress": 17297, + "stress": 5843, + "stressed": 16497, + "stresses": 32112, + "stressful": 24268, + "stressing": 35917, + "stret": 12265, + "stretch": 10064, + "stretched": 29393, + "stretches": 32231, + "stretching": 24423, + "stri": 1493, + "stri": 27795, + "stria": 39620, + "strial": 30217, + "strian": 12924, + "stric": 2607, + "strick": 25181, + "strickland": 48939, + "strict": 21585, + "strictly": 16475, + "stride": 36024, + "strides": 37355, + "stries": 18171, + "strife": 46473, + "strike": 20774, + "strike": 5767, + "striker": 12448, + "strikers": 33465, + "strikes": 9280, + "striking": 13392, + "string": 25512, + "string": 9696, + "strings": 15699, + "strip": 9317, + "stripe": 19368, + "striped": 22192, + "stripes": 14239, + "stripped": 26602, + "stripper": 45759, + "stripping": 48588, + "strips": 19000, + "strive": 22140, + "striving": 37671, + "stro": 3121, + "stro": 6186, + "stroke": 44621, + "stroke": 10403, + "strokes": 26595, + "strol": 30123, + "stroll": 15924, + "stroller": 47076, + "strolling": 40911, + "strom": 14707, + "stron": 4165, + "strong": 10436, + "strong": 2389, + "stronger": 27760, + "stronger": 9245, + "strongertogether": 38532, + "strongest": 16171, + "strongh": 38678, + "strongly": 15507, + "strophy": 47912, + "strou": 48425, + "stroud": 39895, + "strous": 23752, + "stru": 1666, + "struc": 3311, + "struck": 10861, + "struction": 12497, + "structural": 16899, + "structure": 5285, + "structured": 27147, + "structures": 14171, + "structuring": 37496, + "strugg": 5176, + "struggle": 8443, + "struggled": 32921, + "struggles": 17446, + "struggling": 12135, + "struly": 34118, + "strum": 37632, + "strung": 46033, + "strust": 23920, + "strut": 48375, + "stry": 17325, + "stry": 2245, + "sts": 1088, + "stu": 858, + "stu": 23531, + "stuart": 32054, + "stuart": 11723, + "stub": 27066, + "stubborn": 38955, + "stuck": 6596, + "stud": 22368, + "stud": 13319, + "studded": 29153, + "studen": 44156, + "student": 14681, + "student": 2556, + "students": 1712, + "studi": 5691, + "studied": 21369, + "studies": 6426, + "studio": 17798, + "studio": 3155, + "studios": 6231, + "studs": 27571, + "study": 21051, + "study": 3123, + "studyabroad": 45425, + "studying": 8826, + "stuff": 46072, + "stuff": 3487, + "stuffed": 11781, + "stuffing": 31612, + "stuffs": 43455, + "stuk": 32424, + "stumb": 16784, + "stumble": 39045, + "stumbled": 21776, + "stump": 32064, + "stun": 3088, + "stun": 37959, + "stunned": 34034, + "stunner": 29965, + "stunning": 3769, + "stunningly": 47515, + "stuns": 43796, + "stunt": 19905, + "stunts": 40118, + "stupi": 18975, + "stupid": 42600, + "stupid": 8085, + "stupidity": 33766, + "stur": 10676, + "sturdy": 43780, + "stures": 27223, + "sturgeon": 31580, + "sturi": 21747, + "sturridge": 45331, + "stutt": 30444, + "stuttgart": 32219, + "stv": 27060, + "stv": 9708, + "stweet": 46832, + "stweets": 39174, + "stx": 42548, + "sty": 1421, + "sty": 2920, + "style": 12356, + "style": 1844, + "styled": 17974, + "styles": 6948, + "styli": 38577, + "styling": 14597, + "stylish": 10378, + "stylist": 15928, + "styn": 41394, + "su": 605, + "su": 2937, + "sua": 42448, + "suarez": 21437, + "suave": 47305, + "sub": 1783, + "sub": 7765, + "subaru": 21319, + "subjec": 16090, + "subject": 10300, + "subjects": 22099, + "subli": 16350, + "sublime": 22367, + "submarine": 19968, + "submer": 27156, + "submerged": 43171, + "submission": 16571, + "submissions": 21566, + "submit": 10423, + "submitted": 15189, + "submitting": 38788, + "subram": 49207, + "subs": 16398, + "subscri": 5838, + "subscribe": 9839, + "subscribed": 44867, + "subscriber": 36292, + "subscribers": 17337, + "subscription": 17979, + "subscriptions": 47162, + "subsequ": 33598, + "subsequent": 44323, + "subsi": 14856, + "subsidi": 45029, + "subsidiary": 45506, + "subsidies": 37685, + "subsidy": 47462, + "substan": 17487, + "substance": 19309, + "substances": 36834, + "substantial": 27171, + "substantially": 47577, + "substitu": 18529, + "substitute": 25340, + "subtitles": 39479, + "subtle": 16536, + "subur": 12517, + "suburb": 37664, + "suburban": 23570, + "suburbs": 25317, + "subway": 12196, + "suc": 1869, + "succe": 7981, + "succeed": 13556, + "succeeded": 41077, + "succes": 39019, + "success": 3695, + "success": 3034, + "successes": 29436, + "successful": 4670, + "successfully": 9934, + "succession": 38491, + "successive": 41319, + "successor": 34774, + "succu": 45253, + "succul": 25671, + "succulent": 35236, + "such": 2046, + "suction": 42786, + "sud": 8067, + "sud": 33714, + "sudan": 31149, + "sudan": 13474, + "sudanese": 42837, + "sudbury": 32488, + "sudden": 10833, + "sudden": 15433, + "suddenly": 11076, + "sue": 14045, + "sue": 6641, + "sued": 22225, + "suede": 21036, + "sues": 17105, + "suf": 21204, + "suf": 22579, + "sufc": 37091, + "suff": 4866, + "suffe": 13510, + "suffer": 13557, + "suffered": 14766, + "suffering": 10140, + "suffers": 22389, + "sufficient": 28410, + "suffol": 13775, + "suffolk": 46408, + "suffolk": 15685, + "suffra": 34596, + "suffrage": 39567, + "sufi": 39756, + "sug": 3189, + "suga": 28757, + "sugar": 12418, + "sugar": 5574, + "sugge": 6345, + "suggest": 13356, + "suggested": 18790, + "suggesti": 15033, + "suggesting": 29792, + "suggestion": 23741, + "suggestions": 16052, + "suggests": 13333, + "suho": 32744, + "sui": 24972, + "suici": 16372, + "suicidal": 37165, + "suicide": 31310, + "suicide": 8247, + "suing": 18309, + "suisse": 35964, + "suit": 11887, + "suit": 3940, + "suitable": 17476, + "suitcase": 27792, + "suite": 9346, + "suited": 25919, + "suites": 21523, + "suits": 9949, + "suk": 24820, + "suk": 6886, + "suka": 44017, + "suke": 25590, + "sukh": 46961, + "suki": 32704, + "sul": 1767, + "sul": 19879, + "sula": 34713, + "sula": 26143, + "sullivan": 14477, + "sully": 37752, + "sulph": 37234, + "sulphur": 47659, + "sultan": 35650, + "sultan": 17049, + "sum": 7054, + "sum": 8257, + "suma": 47938, + "sumat": 32640, + "sumatra": 47346, + "sume": 45457, + "sumi": 41248, + "summ": 1309, + "summar": 34657, + "summari": 31993, + "summary": 13435, + "summed": 34912, + "summer": 5500, + "summer": 1673, + "summers": 18254, + "summerslam": 40264, + "summertime": 19025, + "summit": 30011, + "summit": 3768, + "summon": 27622, + "summon": 39782, + "sumner": 46813, + "sumo": 33734, + "sump": 34252, + "sumptuous": 47354, + "sums": 13325, + "sun": 968, + "sun": 2176, + "sunbathing": 46994, + "sunburn": 45767, + "sund": 40735, + "sundae": 38078, + "sundance": 24128, + "sundar": 44936, + "sunday": 6649, + "sunday": 1706, + "sundayfunday": 21565, + "sundaymorning": 24809, + "sundaymotivation": 46227, + "sundays": 15827, + "sundaywith": 26469, + "sundaywithmarsha": 26662, + "sunder": 15097, + "sunderland": 45727, + "sunderland": 18851, + "sundown": 44438, + "sune": 41096, + "sunflower": 21559, + "sunflowers": 39809, + "sung": 16903, + "sung": 6047, + "sunglasses": 12906, + "suni": 17663, + "suni": 47010, + "sunil": 32861, + "sunite": 21382, + "sunited": 35276, + "sunk": 37534, + "sunken": 43473, + "sunlight": 17996, + "sunni": 44315, + "sunny": 15632, + "sunny": 5438, + "sunrise": 5610, + "suns": 18322, + "sunscreen": 29355, + "sunset": 37880, + "sunset": 3424, + "sunsets": 17721, + "sunshine": 32761, + "sunshine": 5385, + "suny": 41308, + "sup": 19078, + "sup": 8249, + "supdates": 24177, + "super": 1642, + "super": 1994, + "superb": 8930, + "superbike": 45709, + "superbowl": 47461, + "superbowl": 16467, + "supercar": 27021, + "supercars": 32185, + "supercell": 43227, + "supercharged": 47479, + "supere": 46831, + "superfood": 41715, + "supergirl": 25771, + "superhero": 14049, + "superheroes": 23334, + "superint": 17615, + "superintendent": 19020, + "superior": 13205, + "superjunior": 40475, + "superleague": 45539, + "superman": 11237, + "supermarket": 19897, + "supermarkets": 45106, + "supermodel": 41963, + "supermoon": 36571, + "supernatural": 15484, + "supernova": 39843, + "superrugby": 48717, + "supersonic": 42019, + "supersport": 46319, + "superst": 38202, + "superstar": 32551, + "superstar": 10472, + "superstars": 25797, + "supervis": 12709, + "supervised": 41316, + "supervision": 36234, + "supervisor": 20366, + "supervisors": 37958, + "superyacht": 42714, + "supp": 1023, + "supper": 15727, + "supple": 31431, + "supplement": 19924, + "supplements": 21265, + "supplied": 24106, + "supplier": 18043, + "suppliers": 24196, + "supplies": 9384, + "supply": 25074, + "supply": 6389, + "supplychain": 31224, + "supplying": 32739, + "suppo": 6941, + "suppor": 2104, + "support": 12062, + "support": 1425, + "supported": 8038, + "supporter": 12992, + "supporters": 7403, + "supportindiefilm": 43976, + "supporting": 3976, + "supportive": 18313, + "supportlocal": 43852, + "supports": 8336, + "supportsmall": 30941, + "supportsmallstreamers": 36097, + "suppose": 18924, + "supposed": 9119, + "supposedly": 32302, + "suppre": 20542, + "suppression": 36508, + "supra": 48485, + "supre": 5875, + "supremac": 28643, + "supremacist": 39005, + "supremacy": 28913, + "supreme": 35222, + "supreme": 7468, + "supt": 23625, + "sur": 1090, + "sur": 7123, + "sura": 33412, + "sura": 49125, + "surabaya": 45227, + "surance": 22184, + "surat": 30201, + "sure": 14320, + "sure": 1650, + "sured": 36869, + "surely": 11409, + "sures": 12725, + "suresh": 32118, + "suresh": 31464, + "sureshpp": 41924, + "sureshpprabhu": 42050, + "surf": 10176, + "surf": 10322, + "surface": 7744, + "surfaces": 20746, + "surfer": 24925, + "surfers": 34842, + "surfing": 15762, + "surg": 13045, + "surge": 17457, + "surgeon": 16039, + "surgeons": 26000, + "surger": 5122, + "surgeries": 34940, + "surgery": 5344, + "surgical": 16386, + "suri": 14130, + "suri": 33952, + "suring": 16817, + "suriya": 17832, + "surpass": 45494, + "surpassed": 25648, + "surplus": 29413, + "surpri": 3244, + "surprise": 5099, + "surprised": 8949, + "surprises": 16920, + "surprising": 14964, + "surprisingly": 17367, + "surreal": 18408, + "surrealism": 41773, + "surrender": 20964, + "surrendered": 44601, + "surrey": 26489, + "surrey": 14315, + "surro": 47499, + "surroun": 8250, + "surround": 26543, + "surround": 22999, + "surrounded": 13589, + "surrounding": 12544, + "surroundings": 26915, + "surrounds": 39012, + "suru": 49240, + "surve": 8952, + "surveill": 15408, + "surveillance": 15578, + "survey": 45914, + "survey": 6809, + "surveying": 33085, + "surveys": 25096, + "survi": 3440, + "surviv": 12922, + "survival": 10172, + "survive": 10431, + "survived": 13483, + "survives": 30927, + "surviving": 18609, + "survivor": 31934, + "survivor": 10944, + "survivors": 13711, + "surya": 37767, + "sus": 8091, + "sus": 3036, + "susa": 20546, + "susan": 19922, + "susan": 10168, + "suscep": 44270, + "sush": 22298, + "sushi": 11729, + "sushmaswar": 48200, + "susie": 32284, + "susp": 7971, + "suspec": 10298, + "suspect": 9065, + "suspected": 15579, + "suspects": 18265, + "suspen": 10578, + "suspend": 41007, + "suspended": 13126, + "suspends": 39535, + "suspense": 21556, + "suspension": 15417, + "suspici": 25714, + "suspicion": 34910, + "suspicious": 19862, + "sussex": 31244, + "sussex": 13266, + "sustain": 4644, + "sustain": 28156, + "sustainability": 9635, + "sustainable": 23645, + "sustainable": 7078, + "sustained": 22699, + "sustaining": 44418, + "sut": 23984, + "sut": 28956, + "sutherland": 27592, + "sutton": 39359, + "sutton": 18564, + "suv": 15985, + "suz": 9957, + "suzanne": 24617, + "suzu": 36289, + "suzuki": 16892, + "suzy": 26552, + "sv": 6508, + "sv": 17083, + "svc": 45065, + "sve": 47637, + "sven": 37786, + "sven": 45183, + "sver": 45923, + "sville": 44580, + "sville": 6741, + "svp": 28465, + "svt": 42014, + "svu": 32123, + "sw": 1220, + "sw": 4457, + "swa": 4707, + "swa": 31916, + "swach": 20862, + "swachhb": 31898, + "swachhbharat": 36927, + "swag": 8852, + "swag": 8177, + "swagg": 47702, + "swagger": 35797, + "swain": 43226, + "swal": 13433, + "swallow": 28979, + "swallowed": 46956, + "swallows": 45124, + "swam": 42539, + "swami": 25021, + "swamp": 41953, + "swamp": 16595, + "swamy": 28445, + "swan": 8215, + "swan": 12530, + "swana": 24699, + "swans": 19516, + "swansea": 16567, + "swanson": 34797, + "swap": 15234, + "swapped": 39077, + "swapping": 44702, + "swaps": 49242, + "swar": 11680, + "swarm": 31577, + "swarovski": 28515, + "swat": 32547, + "swat": 26482, + "swatch": 48053, + "sway": 26443, + "sway": 26617, + "swc": 42231, + "swe": 2350, + "swe": 38070, + "swear": 7406, + "swearing": 32627, + "sweat": 10282, + "sweat": 12663, + "sweater": 11455, + "sweaters": 31303, + "sweating": 33215, + "sweats": 39321, + "sweatshirt": 22442, + "sweaty": 28419, + "sweden": 8760, + "swedish": 11585, + "swee": 1812, + "sweek": 30017, + "sweeney": 27286, + "sweep": 23220, + "sweep": 13669, + "sweeping": 25719, + "sweeps": 26887, + "sweepstakes": 25992, + "sweet": 10957, + "sweet": 2418, + "sweetened": 45577, + "sweeter": 32873, + "sweetest": 15180, + "sweethe": 16316, + "sweetheart": 18079, + "sweetie": 24450, + "sweetness": 29713, + "sweets": 18045, + "swel": 48470, + "swell": 35538, + "swell": 21490, + "swelling": 46578, + "swept": 23311, + "swer": 30514, + "swfc": 30227, + "swfl": 46607, + "swi": 3881, + "swi": 45223, + "swick": 17159, + "swif": 28548, + "swift": 34843, + "swift": 8229, + "swild": 33909, + "swild": 38696, + "swildlife": 46818, + "swim": 4928, + "swim": 7681, + "swimmer": 25475, + "swimmers": 27776, + "swimming": 7411, + "swims": 46798, + "swimsuit": 25504, + "swimwear": 31889, + "swin": 14554, + "swin": 40798, + "swindon": 29540, + "swine": 31166, + "swing": 25292, + "swing": 7429, + "swinging": 26760, + "swings": 29141, + "swipe": 31828, + "swire": 42753, + "swirl": 35795, + "swis": 23611, + "swish": 38571, + "swiss": 37917, + "swiss": 9287, + "swit": 3726, + "switch": 22480, + "switch": 5893, + "switched": 22869, + "switches": 33569, + "switching": 21155, + "swith": 17299, + "switzer": 9835, + "switzerland": 9912, + "swivel": 48256, + "swo": 38673, + "swol": 29575, + "swollen": 36129, + "swoo": 29744, + "swood": 24158, + "swoon": 37028, + "swoop": 45661, + "sword": 33294, + "sword": 11356, + "swords": 27181, + "swork": 42722, + "sworld": 33305, + "sworn": 21130, + "sworth": 13322, + "swt": 38878, + "swx": 20597, + "sx": 9402, + "sx": 17806, + "sxsw": 13369, + "sy": 974, + "sy": 2126, + "sya": 35017, + "sycam": 34911, + "sycamore": 43086, + "syd": 4525, + "syd": 22504, + "sydney": 15878, + "sydney": 5278, + "syed": 27624, + "syfy": 32047, + "sykes": 27287, + "syl": 6452, + "sylla": 41708, + "sylvania": 12011, + "sylve": 28369, + "sylvester": 37214, + "sylvia": 25670, + "sym": 3645, + "sym": 40327, + "symb": 22987, + "symbol": 13085, + "symboli": 22019, + "symbolic": 33177, + "symbolism": 44679, + "symbols": 25476, + "symmetry": 31427, + "symp": 11468, + "sympathi": 47493, + "sympathy": 32477, + "symph": 9544, + "symphonic": 42639, + "symphony": 11180, + "sympo": 9730, + "symposium": 9971, + "symptom": 47799, + "symptoms": 12956, + "syn": 3758, + "syn": 36090, + "synago": 30945, + "synagogue": 33518, + "sync": 20081, + "synchron": 23943, + "syndic": 21098, + "syndicate": 28779, + "syndrome": 10927, + "syner": 22283, + "synergy": 32012, + "syno": 31533, + "synod": 47712, + "synopsis": 47018, + "synth": 33841, + "synth": 24462, + "synthe": 22604, + "synthesi": 33565, + "synthesis": 21602, + "synthesizer": 44077, + "synthetic": 19917, + "syou": 26742, + "syour": 21718, + "syrac": 17279, + "syracuse": 19640, + "syrah": 45364, + "syri": 18917, + "syria": 5563, + "syrian": 47562, + "syrian": 10041, + "syrians": 41392, + "syrup": 16611, + "sys": 26726, + "syste": 1933, + "system": 47813, + "system": 2422, + "systematic": 28586, + "systemic": 33807, + "systems": 4828, + "sz": 13438, + "sz": 15879, + "sze": 44507, + "szn": 48092, + "são": 45911, + "sé": 37879, + "t": 83, + "t": 339, + "ta": 648, + "ta": 1397, + "taa": 43874, + "tab": 2648, + "tab": 14724, + "tabby": 36145, + "tabern": 48991, + "tability": 15770, + "table": 12108, + "table": 2175, + "tableau": 39723, + "tables": 7822, + "tablet": 12494, + "tabletop": 46843, + "tabletop": 25773, + "tablets": 20436, + "tably": 24440, + "taboo": 38400, + "tabs": 29163, + "tac": 3145, + "tac": 22653, + "tache": 39239, + "tack": 6339, + "tack": 34446, + "tackle": 10294, + "tackled": 47218, + "tackles": 18021, + "tackling": 19628, + "taco": 31924, + "taco": 12436, + "tacoma": 25397, + "tacos": 14090, + "tactic": 40377, + "tactical": 17137, + "tactics": 16410, + "tacular": 48985, + "tad": 15890, + "tad": 19860, + "tado": 40846, + "tae": 15257, + "tae": 15580, + "taehyung": 24642, + "taek": 30753, + "taekwondo": 39963, + "taemin": 30600, + "taeyang": 45802, + "taeyeon": 27389, + "taf": 29660, + "taft": 42141, + "tag": 3456, + "tag": 3640, + "tage": 2669, + "tages": 39902, + "tagged": 12969, + "tagging": 25138, + "tagne": 47467, + "tags": 11606, + "tah": 14822, + "tah": 7090, + "tahit": 45385, + "tahoe": 26140, + "tai": 6511, + "tai": 13040, + "taiji": 30185, + "tail": 7156, + "tail": 4132, + "tailed": 20626, + "tailgate": 23168, + "tailgating": 42625, + "tailo": 27230, + "tailor": 29870, + "tailored": 28275, + "tailoring": 46357, + "tails": 16066, + "tain": 2841, + "tain": 1908, + "taine": 21214, + "taine": 32299, + "tained": 10212, + "taining": 7565, + "tainment": 30063, + "tains": 3952, + "tainted": 47211, + "taipei": 24356, + "tair": 29143, + "tairp": 43707, + "tait": 45325, + "taiwan": 36319, + "taiwan": 12626, + "taiwanese": 41416, + "taj": 28937, + "taj": 24805, + "taji": 46358, + "tak": 15070, + "tak": 14458, + "taka": 24070, + "taka": 40968, + "take": 5052, + "take": 1172, + "takeaway": 25737, + "takeaways": 32080, + "takeme": 41748, + "taken": 2807, + "takeoff": 32789, + "takeover": 11863, + "taker": 17939, + "takers": 30775, + "takes": 2633, + "takin": 30890, + "taking": 2019, + "taku": 48168, + "tal": 976, + "tal": 2066, + "tala": 29845, + "talaga": 35349, + "talbot": 30585, + "tale": 33971, + "tale": 7798, + "talent": 30435, + "talent": 5114, + "talented": 5331, + "talents": 16136, + "tales": 9469, + "tali": 12122, + "tali": 45406, + "taliban": 20788, + "talis": 36480, + "tality": 15631, + "talk": 12462, + "talk": 1841, + "talked": 10153, + "talkin": 26040, + "talking": 31463, + "talking": 2578, + "talks": 3237, + "tall": 11664, + "tall": 7771, + "talla": 21528, + "tallade": 44220, + "tallahassee": 37832, + "taller": 23470, + "tallest": 19774, + "tallinn": 45079, + "tally": 16323, + "talon": 47897, + "tam": 2661, + "tam": 12246, + "tama": 45424, + "tamanna": 48055, + "tamar": 22901, + "tamara": 35697, + "tame": 38557, + "tame": 32778, + "tamed": 40575, + "tami": 39429, + "tamil": 23046, + "tamil": 14033, + "tamilnadu": 32371, + "tamine": 42566, + "tammy": 28396, + "tampa": 10906, + "tampab": 37852, + "tamu": 34105, + "tan": 2123, + "tan": 5039, + "tana": 21396, + "tand": 20244, + "tandem": 33756, + "tane": 13344, + "tane": 24923, + "taneous": 22275, + "taneously": 24422, + "tang": 10425, + "tang": 20794, + "tanger": 31844, + "tangerine": 42045, + "tangible": 44823, + "tangle": 36568, + "tangled": 33587, + "tango": 24089, + "tani": 31374, + "tani": 32985, + "tania": 45369, + "tank": 29858, + "tank": 6172, + "tanker": 25020, + "tanks": 14223, + "tann": 19174, + "tanner": 22001, + "tanning": 27985, + "tans": 27332, + "tant": 41383, + "tant": 41695, + "tante": 48262, + "tanto": 45685, + "tany": 34410, + "tanya": 26800, + "tanz": 47399, + "tanzania": 15711, + "tao": 29084, + "tao": 18923, + "tap": 17923, + "tap": 7888, + "tapas": 27361, + "tape": 18332, + "tape": 5749, + "taped": 33219, + "tapes": 17903, + "tapestry": 33525, + "taping": 24355, + "tapp": 27644, + "tapp": 27764, + "tapped": 26649, + "tapping": 27882, + "tapro": 34415, + "taproom": 40266, + "taps": 23267, + "tar": 2002, + "tar": 6977, + "tara": 15264, + "tarak": 37813, + "taran": 32370, + "tarantino": 41180, + "tarde": 48670, + "tardis": 35410, + "tares": 34587, + "targe": 9620, + "target": 38556, + "target": 5400, + "targeted": 14968, + "targeting": 15818, + "targets": 12468, + "tari": 4238, + "tari": 38012, + "tarian": 11762, + "tarians": 42789, + "taries": 47291, + "tariff": 40220, + "tariffs": 28335, + "tariq": 42526, + "tarmac": 44294, + "taro": 26264, + "tarot": 23702, + "tart": 16707, + "tart": 14120, + "tartan": 35064, + "tarts": 29799, + "tary": 31729, + "tary": 5065, + "tarzan": 45463, + "tas": 6538, + "tas": 10163, + "tash": 35272, + "tasha": 44967, + "task": 39189, + "task": 10549, + "tasks": 19453, + "tasmania": 22429, + "tasmanian": 45102, + "tassel": 49276, + "tast": 10839, + "taste": 14314, + "taste": 5219, + "tasted": 22827, + "tasteof": 38097, + "taster": 29743, + "tastes": 13736, + "tastic": 21337, + "tasting": 7656, + "tastings": 49273, + "tasty": 43390, + "tasty": 8568, + "tat": 2652, + "tat": 21592, + "tata": 19300, + "tate": 44476, + "tate": 13295, + "tath": 27566, + "tati": 31433, + "tatiana": 48837, + "tation": 5280, + "tations": 32324, + "tator": 18791, + "tators": 37206, + "tats": 44557, + "tatt": 9232, + "tatted": 41605, + "tattoo": 15980, + "tattoo": 6325, + "tattooed": 28541, + "tattoos": 14900, + "tatum": 26103, + "tau": 6620, + "tau": 20510, + "taught": 9306, + "taun": 23910, + "taunton": 40681, + "taurus": 32881, + "taver": 37776, + "tavern": 18644, + "taw": 33868, + "taw": 40289, + "tawa": 29035, + "tawards": 14351, + "tax": 4581, + "tax": 3879, + "taxation": 36847, + "taxes": 11462, + "taxi": 25160, + "taxi": 11380, + "taxider": 47420, + "taxis": 34009, + "taxpay": 17986, + "taxpayer": 30978, + "taxpayers": 25503, + "tay": 6542, + "tay": 15073, + "taya": 38484, + "tayl": 3913, + "taylor": 9044, + "taylor": 3961, + "taylorswift": 18936, + "tayo": 33941, + "taz": 41475, + "taz": 31870, + "tb": 1990, + "tb": 7490, + "tba": 34363, + "tball": 8390, + "tball": 1467, + "tbc": 31807, + "tbd": 45548, + "tbh": 13238, + "tbi": 45868, + "tbl": 42962, + "tbli": 43664, + "tblightning": 44178, + "tbo": 34255, + "tbr": 46643, + "tbs": 37368, + "tbt": 2950, + "tc": 6820, + "tc": 5454, + "tca": 35116, + "tch": 10744, + "tch": 4048, + "tches": 42001, + "tcm": 21501, + "tcm": 26588, + "tcmparty": 24338, + "tcot": 8995, + "tcs": 39107, + "tcu": 26791, + "td": 20578, + "td": 3192, + "tdf": 21844, + "tdi": 45621, + "tdp": 47009, + "tds": 20238, + "tdsb": 29836, + "te": 600, + "te": 756, + "tea": 41053, + "tea": 3274, + "teach": 2043, + "teach": 6865, + "teacher": 18051, + "teacher": 4008, + "teachers": 5069, + "teaches": 17110, + "teaching": 5141, + "teachings": 32119, + "teal": 22821, + "team": 2085, + "team": 1027, + "teamcanada": 46636, + "teamed": 20590, + "teamgb": 40971, + "teaming": 24392, + "teammate": 17900, + "teammates": 13921, + "teams": 3891, + "teamsisd": 34703, + "teamusa": 28625, + "teamwork": 14657, + "teaparty": 33065, + "teapo": 35745, + "teapot": 40749, + "tear": 15802, + "tear": 11862, + "tearful": 46873, + "tearing": 24785, + "tears": 7688, + "teas": 23003, + "teas": 29314, + "tease": 25163, + "teased": 49122, + "teaser": 8982, + "teasers": 48990, + "teases": 28509, + "teasing": 36507, + "teat": 26376, + "teatime": 48948, + "teatro": 35756, + "teau": 24931, + "tebow": 37797, + "tec": 17381, + "tec": 11612, + "tech": 1782, + "tech": 2061, + "techcrunch": 42110, + "techn": 6252, + "technews": 31787, + "technic": 16639, + "technic": 37666, + "technical": 49231, + "technical": 7582, + "technically": 23180, + "technician": 22540, + "technicians": 35513, + "techno": 2599, + "techno": 17564, + "technological": 23068, + "technologies": 10040, + "technology": 3089, + "techs": 41353, + "ted": 4841, + "ted": 775, + "tedcruz": 27517, + "teddy": 25758, + "teddy": 11798, + "tedly": 8539, + "tedu": 42517, + "tedx": 17950, + "tedx": 41504, + "tee": 12676, + "tee": 3385, + "teed": 13692, + "teen": 5398, + "teen": 4697, + "teenage": 14069, + "teenager": 19338, + "teenagers": 25989, + "teenchoice": 28203, + "teens": 12375, + "teenth": 20249, + "teenwolf": 40067, + "teeny": 41622, + "teer": 48648, + "tees": 9641, + "teessi": 43295, + "teeth": 8225, + "tega": 29508, + "tegr": 39801, + "teh": 18720, + "teh": 29601, + "tehran": 26399, + "tein": 33223, + "tej": 46724, + "tek": 17489, + "tek": 18294, + "tekken": 29843, + "tel": 4978, + "tel": 2226, + "telang": 23469, + "telangana": 26386, + "tele": 3103, + "tele": 32851, + "telecom": 21057, + "telecommunications": 39900, + "telegram": 26780, + "telegraph": 14713, + "telephone": 17243, + "telescope": 19037, + "telethon": 49266, + "televised": 39470, + "television": 8608, + "telford": 38323, + "tell": 16069, + "tell": 2330, + "teller": 20415, + "tellers": 42707, + "telling": 5507, + "tells": 5217, + "tellu": 42511, + "telly": 31475, + "tels": 43607, + "telugu": 22927, + "tely": 5630, + "tem": 2404, + "tem": 17536, + "tema": 45881, + "teme": 43378, + "temp": 2684, + "temp": 11097, + "tempe": 36723, + "temper": 5981, + "temper": 35521, + "temperature": 9543, + "temperatures": 11575, + "tempered": 40521, + "tempest": 36053, + "templ": 16679, + "template": 18591, + "templates": 30498, + "temple": 21841, + "temple": 5620, + "temples": 24024, + "tempo": 19625, + "tempor": 4858, + "temporal": 43656, + "temporarily": 23189, + "temporary": 6513, + "temps": 11668, + "tempt": 28460, + "temptation": 30118, + "tempted": 26226, + "tempting": 34876, + "ten": 1149, + "ten": 2581, + "tenant": 16954, + "tenants": 26023, + "tenay": 45384, + "tenberg": 31329, + "tend": 17630, + "tend": 21252, + "tendency": 47277, + "tender": 23020, + "tender": 9838, + "tenderloin": 42750, + "tenders": 44741, + "tending": 35084, + "tendon": 48459, + "tends": 39962, + "tene": 24868, + "tened": 13682, + "tener": 29054, + "teneri": 28000, + "tenerife": 29401, + "teners": 41307, + "teness": 18018, + "teng": 34016, + "teng": 28474, + "tennant": 29310, + "tennes": 9514, + "tennessee": 10053, + "tennis": 31504, + "tennis": 5298, + "tenor": 30521, + "tens": 14062, + "tense": 23518, + "tension": 15221, + "tensions": 24224, + "tenstein": 49139, + "tent": 18505, + "tent": 10782, + "tentative": 48238, + "tenth": 27483, + "tention": 12191, + "tents": 30730, + "tenure": 30739, + "teo": 18665, + "tep": 31806, + "tequ": 17502, + "tequila": 18510, + "ter": 704, + "ter": 652, + "tera": 15155, + "teras": 44830, + "tere": 11329, + "tered": 49272, + "tered": 4389, + "terence": 33806, + "teresa": 19081, + "teri": 30917, + "teria": 22685, + "terie": 42276, + "tering": 7929, + "term": 40991, + "term": 4780, + "termin": 4766, + "terminal": 11816, + "terminals": 44091, + "terminator": 29609, + "terminology": 48896, + "terms": 8663, + "tern": 41572, + "tern": 12959, + "terns": 25251, + "tero": 20727, + "tero": 24697, + "terps": 41471, + "terr": 3921, + "terra": 22366, + "terra": 18816, + "terrac": 28549, + "terrace": 13820, + "terraces": 47508, + "terracotta": 45123, + "terrain": 20184, + "terran": 43726, + "terre": 33888, + "terre": 27537, + "terrell": 39494, + "terrence": 38746, + "terrestrial": 46299, + "terri": 4504, + "terri": 36722, + "terrible": 9741, + "terribly": 34558, + "terrier": 14455, + "terriers": 47047, + "terrific": 13837, + "terrified": 28204, + "terrifying": 18526, + "territ": 10720, + "territorial": 39163, + "territories": 32846, + "territory": 13936, + "terror": 9596, + "terror": 9327, + "terrori": 6836, + "terrorism": 10583, + "terrorist": 10575, + "terrorists": 12835, + "terry": 19378, + "terry": 8561, + "ters": 24102, + "ters": 1737, + "terti": 48386, + "tery": 4184, + "tes": 8019, + "tes": 3609, + "tesco": 15434, + "tese": 33320, + "tesla": 12254, + "tess": 21807, + "tess": 20840, + "tessa": 32063, + "test": 7738, + "test": 1628, + "testam": 23477, + "testament": 24609, + "tested": 10576, + "tester": 32707, + "testi": 18373, + "testic": 42364, + "testify": 33088, + "testifying": 46347, + "testim": 12553, + "testimonial": 28834, + "testimony": 18672, + "testing": 4967, + "testo": 42428, + "testosterone": 45168, + "tests": 8715, + "tet": 40468, + "tet": 13275, + "tetra": 40902, + "tetris": 45934, + "teu": 47152, + "teuk": 39979, + "teur": 27120, + "tex": 2056, + "tex": 11728, + "texan": 35287, + "texan": 38386, + "texans": 17580, + "texanscheer": 43717, + "texas": 15713, + "texas": 3403, + "texaste": 46469, + "text": 18169, + "text": 4160, + "textbook": 25952, + "textbooks": 44041, + "texted": 29004, + "textile": 19789, + "textiles": 24326, + "texting": 18600, + "texts": 12767, + "texture": 16505, + "textured": 32168, + "textures": 28063, + "tey": 32395, + "tez": 22664, + "tf": 18828, + "tf": 5001, + "tfc": 30186, + "tfl": 29918, + "tford": 22493, + "tful": 17108, + "tfw": 16741, + "tg": 7665, + "tg": 11981, + "tgif": 14483, + "th": 513, + "th": 640, + "tha": 18470, + "tha": 4715, + "thab": 38219, + "thad": 48339, + "thai": 28054, + "thai": 8825, + "thail": 7258, + "thailand": 7469, + "thak": 22801, + "thakur": 38427, + "thal": 7967, + "thal": 12323, + "thala": 17784, + "thalai": 25206, + "thalaivar": 44918, + "thalap": 39789, + "thalapathy": 45405, + "thalapathy": 23324, + "thall": 36007, + "tham": 11761, + "tham": 8896, + "thames": 43472, + "thames": 15321, + "than": 792, + "than": 1126, + "thand": 44465, + "thane": 21463, + "thang": 24870, + "thani": 31322, + "thank": 2790, + "thank": 1144, + "thanked": 32079, + "thankful": 38839, + "thankful": 6217, + "thankfully": 22089, + "thanking": 21989, + "thanks": 5672, + "thanks": 1085, + "thanksgiving": 45732, + "thanksgiving": 6167, + "thanku": 45710, + "thankyou": 18050, + "thankyou": 9911, + "thanniversary": 35564, + "thanos": 36709, + "thanx": 25095, + "thar": 14396, + "thar": 38843, + "thard": 43474, + "that": 6303, + "that": 682, + "thatcher": 32496, + "thats": 44636, + "thats": 9254, + "thaw": 26081, + "thaw": 47229, + "thbewithyou": 41067, + "thc": 20091, + "thcentury": 49111, + "thd": 28219, + "thday": 37801, + "the": 599, + "the": 518, + "thea": 15935, + "thea": 25429, + "thead": 25259, + "theal": 45728, + "thealth": 31398, + "thear": 43283, + "theart": 44678, + "theast": 8378, + "theastern": 17877, + "theat": 2263, + "theater": 39438, + "theater": 6128, + "theaters": 14689, + "theatre": 19857, + "theatre": 3292, + "theatres": 21680, + "theatrical": 26833, + "theband": 27695, + "thebeatles": 35645, + "thebest": 40883, + "thebest": 25856, + "thebig": 24732, + "theblack": 47718, + "thec": 48659, + "thed": 31405, + "thedaily": 33550, + "theday": 4408, + "thedream": 39417, + "thee": 44475, + "thee": 15108, + "theeconomist": 44518, + "theellenshow": 35342, + "thefilm": 31665, + "theflash": 25434, + "theforce": 40002, + "theforceawakens": 48033, + "theft": 13286, + "thefuture": 34287, + "thegame": 24428, + "thegood": 28594, + "thegreat": 28721, + "thei": 44522, + "their": 911, + "theirs": 29297, + "thel": 5403, + "thelast": 23495, + "thelastjedi": 47992, + "theless": 27712, + "theli": 15277, + "thelittle": 46872, + "thelo": 47036, + "thelove": 40668, + "thelove": 43200, + "them": 5435, + "them": 1180, + "themasters": 48378, + "theme": 38524, + "theme": 5849, + "themed": 10126, + "themes": 17849, + "themet": 48183, + "themovie": 27062, + "themselves": 6503, + "then": 5929, + "then": 1594, + "thenburg": 45209, + "thene": 17012, + "thenew": 24212, + "thenext": 47881, + "thenight": 43336, + "theno": 37172, + "thenorth": 34338, + "theo": 17043, + "theo": 18084, + "theod": 26653, + "theodore": 30743, + "theological": 41162, + "theology": 24095, + "theon": 34653, + "theone": 46231, + "theopen": 41438, + "theore": 22690, + "theoretical": 35585, + "theori": 34804, + "theories": 23937, + "theory": 7143, + "thepeople": 33597, + "thepersonal": 29981, + "thepersonalnetwork": 30016, + "thephoto": 18303, + "thephotohour": 18607, + "ther": 1160, + "ther": 743, + "therap": 4499, + "therapeu": 19332, + "therapeutic": 23240, + "therapeutics": 49101, + "therapies": 30179, + "therapist": 20608, + "therapists": 34763, + "therapper": 49340, + "therapy": 5257, + "there": 5283, + "there": 997, + "thereal": 8074, + "thereal": 41140, + "thereby": 43308, + "thered": 10208, + "therefore": 16865, + "theres": 18494, + "theresa": 14126, + "therese": 47996, + "theresistance": 22845, + "theri": 28967, + "theri": 45297, + "therine": 26807, + "therine": 9239, + "thering": 7891, + "therland": 25351, + "thermal": 13689, + "thermo": 22303, + "thermom": 31138, + "thermometer": 38172, + "thermost": 42391, + "thern": 10919, + "thern": 3137, + "thero": 13165, + "theroad": 29807, + "therock": 30036, + "theroy": 38146, + "thers": 1959, + "thes": 40556, + "thes": 6460, + "thescript": 47061, + "these": 40366, + "these": 1071, + "theses": 39388, + "thesimpsons": 45513, + "thesims": 34192, + "thesis": 10673, + "thessal": 41491, + "thessaloni": 41753, + "thest": 35343, + "thesun": 45617, + "theta": 27694, + "thetic": 7954, + "thetimes": 36039, + "thevamp": 33701, + "thevoice": 47206, + "thevoice": 30258, + "thewalkingdead": 18087, + "thewanted": 43008, + "theworld": 44988, + "theworld": 17475, + "thex": 35990, + "they": 15174, + "they": 889, + "theyre": 28266, + "thfc": 17729, + "thi": 2362, + "thi": 9111, + "thia": 17943, + "thiago": 44537, + "thian": 23214, + "thians": 28187, + "thibau": 48351, + "thic": 26107, + "thic": 11794, + "thick": 18417, + "thick": 11006, + "thicker": 43302, + "thickness": 40754, + "thief": 18508, + "thier": 25595, + "thierry": 32929, + "thieves": 17899, + "thigh": 47124, + "thigh": 22877, + "thighs": 30847, + "thik": 20512, + "thika": 44619, + "thill": 31266, + "thim": 42331, + "thin": 2178, + "thin": 7847, + "thine": 47192, + "thing": 7499, + "thing": 946, + "things": 30670, + "things": 1739, + "thingsto": 43924, + "thingy": 36888, + "think": 9820, + "think": 1331, + "thinkbig": 26015, + "thinkbigsundaywithmarsha": 26666, + "thinker": 34577, + "thinkers": 32779, + "thinkin": 34443, + "thinking": 3291, + "thinks": 6109, + "thinner": 47247, + "thir": 6030, + "third": 32102, + "third": 3981, + "thirds": 42582, + "thirst": 23563, + "thirsty": 39731, + "thirsty": 17521, + "thirteen": 34209, + "thirty": 20813, + "thiru": 43292, + "this": 4340, + "this": 589, + "thisday": 6532, + "thisdayin": 33641, + "thisdayinhistory": 46913, + "thisi": 7299, + "thisis": 14887, + "thismorning": 36245, + "thistle": 29039, + "thistory": 28904, + "thium": 21804, + "thletics": 17765, + "thm": 10407, + "thman": 30079, + "thms": 19874, + "thn": 44155, + "thn": 45587, + "thnx": 25480, + "tho": 1325, + "tho": 5025, + "thof": 18943, + "thofjuly": 21613, + "thol": 29319, + "thole": 31029, + "tholes": 42465, + "thology": 9881, + "thom": 2585, + "thom": 24094, + "thomas": 12574, + "thomas": 3888, + "thome": 21289, + "thomp": 37274, + "thompson": 42181, + "thompson": 8535, + "thomson": 24151, + "thon": 38776, + "thon": 8924, + "thong": 37058, + "thood": 15623, + "thor": 4130, + "thor": 13691, + "thora": 46866, + "thorn": 12957, + "thorn": 18466, + "thorne": 18025, + "thorns": 33650, + "thornton": 23592, + "thorough": 15294, + "thorough": 34788, + "thoroughbred": 43248, + "thoroughly": 19750, + "thorpe": 18099, + "thos": 41965, + "those": 1753, + "thot": 33736, + "thou": 1513, + "thou": 17781, + "though": 2846, + "thought": 23948, + "thought": 2449, + "thoughtful": 19592, + "thoughts": 3618, + "thour": 27125, + "thousand": 9344, + "thousands": 7089, + "thouse": 40318, + "thouse": 7819, + "thoven": 23078, + "thr": 1111, + "thr": 19138, + "thra": 17761, + "thra": 32797, + "thrash": 38262, + "thre": 1607, + "thread": 31108, + "thread": 8815, + "threads": 24957, + "threat": 7527, + "threat": 7212, + "threaten": 26097, + "threatened": 16391, + "threatening": 16400, + "threatens": 20555, + "threats": 12766, + "three": 21615, + "three": 2097, + "thren": 41776, + "thresh": 29779, + "threshold": 33791, + "threw": 12746, + "thri": 8713, + "thrift": 27779, + "thrill": 21023, + "thrilled": 7879, + "thriller": 9653, + "thrilling": 20101, + "thrills": 39829, + "thrive": 17669, + "thriving": 22677, + "thro": 2101, + "thro": 28624, + "throat": 16371, + "thrombo": 47585, + "throne": 15999, + "thrones": 8072, + "throp": 34939, + "throttle": 37139, + "through": 6091, + "through": 1417, + "throughout": 6721, + "throughs": 48278, + "throw": 3315, + "throw": 6293, + "throwback": 6001, + "throwback": 5058, + "throwbackthursday": 6326, + "thrower": 40199, + "throwing": 9734, + "thrown": 15079, + "throws": 14723, + "thru": 23856, + "thru": 6162, + "thrush": 46133, + "thrust": 40202, + "ths": 2079, + "tht": 23554, + "thu": 3837, + "thu": 14153, + "thub": 25660, + "thug": 37212, + "thug": 18137, + "thugs": 27686, + "thul": 28368, + "thulhu": 37560, + "thum": 14679, + "thumb": 19514, + "thumb": 18674, + "thumbnail": 32365, + "thumbs": 17599, + "thun": 32267, + "thunder": 6161, + "thunder": 8951, + "thunderbird": 45131, + "thunderbirds": 44286, + "thunderbolt": 43596, + "thunderstorm": 12005, + "thunderstorms": 19525, + "thunt": 46763, + "thur": 1837, + "thur": 21704, + "thurman": 41291, + "thurs": 9908, + "thursday": 11218, + "thursday": 2221, + "thursdaymotivation": 39375, + "thursdays": 21444, + "thursdaythoughts": 14866, + "thurst": 33970, + "thus": 12457, + "thusi": 9488, + "thwaite": 48469, + "thweeksary": 30871, + "thx": 5913, + "thy": 7804, + "thy": 3362, + "thyme": 29805, + "thyro": 25174, + "thyroid": 32558, + "ti": 555, + "ti": 2605, + "tia": 6709, + "tial": 2826, + "tially": 14503, + "tian": 23011, + "tian": 8125, + "tians": 35182, + "tiara": 38322, + "tib": 47868, + "tibet": 19927, + "tibet": 22234, + "tibetan": 24057, + "tible": 11453, + "tic": 890, + "tic": 1550, + "tica": 9669, + "tical": 34191, + "tical": 4342, + "tically": 13375, + "ticals": 30861, + "tice": 3122, + "tich": 48769, + "tician": 43358, + "ticism": 26491, + "tick": 24640, + "tick": 15617, + "ticket": 25740, + "ticket": 4500, + "ticketing": 44432, + "tickets": 2015, + "ticking": 35842, + "tickle": 42999, + "ticks": 40269, + "tico": 17670, + "ticon": 45996, + "tics": 2419, + "ticul": 15538, + "ticus": 44277, + "tid": 26002, + "tid": 23727, + "tidal": 21949, + "tide": 15698, + "tide": 9105, + "tides": 25524, + "tidy": 23858, + "tie": 14072, + "tie": 3422, + "tied": 9889, + "tiem": 34762, + "tien": 47538, + "tiene": 43438, + "tier": 14390, + "tier": 6598, + "tierney": 45693, + "tiers": 24604, + "ties": 25556, + "ties": 2499, + "tiest": 18300, + "tiesto": 46367, + "tif": 23216, + "tiff": 11112, + "tiff": 20699, + "tiffany": 30467, + "tiffany": 14446, + "tification": 43923, + "tified": 40854, + "tiful": 29123, + "tify": 6677, + "tig": 31999, + "tiger": 11954, + "tiger": 6531, + "tigers": 6934, + "tigh": 31365, + "tight": 25763, + "tight": 9123, + "tighten": 46653, + "tighter": 48193, + "tightly": 37568, + "tights": 29581, + "tijuana": 45273, + "tik": 24986, + "tik": 32403, + "tiki": 30107, + "til": 6124, + "til": 1763, + "tile": 26217, + "tile": 8227, + "tiles": 10607, + "tility": 38180, + "till": 17462, + "till": 4267, + "tilla": 26063, + "tillerson": 47738, + "tilly": 41199, + "tilt": 23601, + "tim": 1292, + "tim": 3863, + "timate": 4754, + "timb": 26627, + "timber": 14441, + "timber": 16246, + "timberlake": 28274, + "timbers": 39911, + "timberwolves": 41190, + "time": 3764, + "time": 788, + "timed": 32727, + "timehop": 19944, + "timel": 23549, + "timelapse": 48154, + "timeless": 15558, + "timeline": 11492, + "timely": 19250, + "timeout": 41536, + "timer": 19725, + "timers": 44574, + "times": 26445, + "times": 1661, + "timesnow": 45487, + "timesof": 32522, + "timesofindia": 44182, + "timetable": 31971, + "timeto": 29187, + "timing": 13624, + "timm": 22444, + "timmy": 33252, + "timo": 13390, + "timo": 33777, + "timothy": 42087, + "timothy": 18560, + "timp": 42166, + "tin": 1310, + "tin": 5420, + "tina": 9257, + "tinder": 24287, + "tine": 22341, + "ting": 7451, + "ting": 694, + "tinged": 44829, + "tings": 35332, + "tini": 26839, + "tink": 39278, + "tinker": 45272, + "tinker": 40910, + "tino": 20538, + "tins": 37359, + "tint": 40497, + "tinted": 42618, + "tiny": 21716, + "tiny": 5591, + "tio": 27562, + "tion": 2274, + "tion": 740, + "tional": 22460, + "tional": 2986, + "tionality": 24514, + "tionally": 12409, + "tionary": 8381, + "tione": 44318, + "tioned": 9083, + "tioning": 15528, + "tionist": 25732, + "tions": 1371, + "tious": 14255, + "tip": 15383, + "tip": 4623, + "tipoff": 44521, + "tipp": 32294, + "tipped": 31878, + "tipper": 38095, + "tipperary": 45612, + "tipping": 27827, + "tips": 3173, + "tipton": 48809, + "tiptuesday": 42112, + "tique": 37772, + "tir": 25467, + "tir": 38462, + "tire": 29128, + "tire": 9362, + "tired": 6533, + "tireless": 39835, + "tirelessly": 41548, + "tires": 15533, + "tiring": 42630, + "tiru": 36033, + "tis": 7839, + "tis": 7394, + "tise": 13745, + "tisgarh": 40538, + "tish": 45148, + "tish": 28784, + "tism": 27113, + "tiss": 28155, + "tissue": 15368, + "tissues": 32172, + "tist": 7902, + "tista": 25580, + "tists": 25944, + "tit": 1991, + "tit": 13202, + "tita": 40936, + "titan": 13496, + "titan": 15516, + "titanic": 20729, + "titanium": 24409, + "titans": 13066, + "titi": 17434, + "titi": 48504, + "title": 28033, + "title": 3644, + "titled": 9939, + "titles": 9780, + "tito": 26838, + "titus": 36102, + "tium": 21975, + "tiv": 1835, + "tiva": 41886, + "tive": 14640, + "tive": 1420, + "tively": 9883, + "tiveness": 20955, + "tives": 7570, + "tivity": 9859, + "tivo": 32162, + "tix": 5835, + "tiz": 19376, + "tj": 18890, + "tj": 18988, + "tk": 22344, + "tk": 20676, + "tko": 37347, + "tks": 38739, + "tl": 14325, + "tl": 8190, + "tland": 30697, + "tlap": 41976, + "tlc": 22047, + "tle": 39141, + "tle": 5825, + "tles": 39363, + "tless": 17427, + "tlot": 41080, + "tls": 47367, + "tly": 37483, + "tly": 1646, + "tm": 9430, + "tm": 7789, + "tman": 20796, + "tmc": 35263, + "tment": 26485, + "tml": 39445, + "tmltalk": 42260, + "tmnt": 32444, + "tmobile": 34901, + "tmr": 35906, + "tmrw": 16496, + "tms": 44496, + "tmund": 23801, + "tmw": 45827, + "tmz": 37248, + "tn": 3827, + "tn": 7248, + "tna": 21150, + "tnam": 8079, + "tner": 34922, + "tness": 35212, + "tney": 9523, + "tng": 35898, + "tnt": 20659, + "tnx": 38220, + "to": 580, + "to": 531, + "toa": 17916, + "toad": 26096, + "toast": 24654, + "toast": 10920, + "toasted": 23533, + "toaster": 39061, + "toasty": 44726, + "tob": 24260, + "tobac": 12611, + "tobacco": 13905, + "tobago": 39482, + "tobe": 17534, + "tobe": 28740, + "tober": 18162, + "tober": 2925, + "toberfest": 26249, + "tobi": 40335, + "tobi": 48374, + "tobias": 32464, + "tobin": 42466, + "toby": 29659, + "toby": 18333, + "toc": 41907, + "toc": 30643, + "tock": 25274, + "tod": 38239, + "tod": 33568, + "toda": 47141, + "todas": 36150, + "today": 11800, + "today": 721, + "todayin": 32957, + "todays": 13513, + "todayshow": 29739, + "todd": 10398, + "todd": 9951, + "toddler": 17772, + "toddlers": 36719, + "toddy": 38926, + "todo": 48857, + "todo": 23087, + "todos": 33355, + "toe": 47756, + "toe": 11344, + "toes": 16511, + "tof": 6659, + "toff": 27319, + "toffee": 34880, + "tofficial": 47953, + "tofthe": 23678, + "toftheday": 20566, + "tofu": 24692, + "tog": 45715, + "toge": 1903, + "together": 17858, + "together": 1952, + "togo": 26729, + "tography": 33968, + "toh": 26851, + "toi": 7472, + "toi": 26941, + "toid": 49124, + "toile": 43148, + "toilet": 11071, + "toilets": 24027, + "toire": 39534, + "tok": 16690, + "tok": 27010, + "token": 32634, + "token": 17134, + "tokens": 23562, + "tokyo": 35038, + "tokyo": 6667, + "tol": 4678, + "tol": 32962, + "told": 3527, + "tole": 15677, + "toledo": 19812, + "toler": 12150, + "tolerance": 20377, + "tolerant": 38536, + "tolerate": 35556, + "tolkien": 32989, + "toll": 44090, + "toll": 14155, + "tollywood": 42016, + "tology": 34799, + "tom": 999, + "tom": 2435, + "toma": 42360, + "toma": 44710, + "tomas": 35944, + "tomas": 27178, + "tomat": 12041, + "tomato": 9867, + "tomatoes": 13004, + "tomb": 37187, + "tomb": 15582, + "tombs": 48613, + "tombstone": 45729, + "tome": 24137, + "tome": 24283, + "tomi": 46290, + "tomlin": 46649, + "tomlinson": 17484, + "tommorow": 42871, + "tommy": 16573, + "tommy": 8876, + "tomo": 31223, + "tomo": 34434, + "tomor": 1277, + "tomorrow": 19728, + "tomorrow": 1293, + "tomorrowland": 34951, + "tomorrows": 32258, + "tomorrowspaper": 35005, + "tomorrowspaperstoday": 35190, + "tomp": 43544, + "tompkins": 49068, + "toms": 10545, + "tomy": 18730, + "ton": 838, + "ton": 917, + "tona": 13459, + "tone": 32366, + "tone": 8408, + "toned": 29426, + "toner": 40614, + "tones": 14744, + "tong": 21510, + "tonga": 37882, + "tongue": 44820, + "tongue": 13626, + "tongues": 39837, + "toni": 17766, + "toni": 17171, + "tonic": 17808, + "tonics": 34647, + "tonight": 1009, + "tonights": 23312, + "tonite": 13449, + "tonka": 42781, + "tonline": 45867, + "tonne": 42450, + "tonnes": 24813, + "tons": 7555, + "tony": 9150, + "tony": 4767, + "tonyawards": 46068, + "too": 1843, + "too": 1256, + "took": 2280, + "tool": 13718, + "tool": 5999, + "toolbox": 46599, + "toolkit": 29849, + "tools": 5771, + "toom": 27550, + "toon": 24664, + "toon": 19701, + "toonami": 48336, + "toons": 35345, + "toor": 42590, + "tooth": 15316, + "tooth": 12030, + "toothbrush": 36841, + "toothpaste": 37322, + "tooting": 42969, + "top": 5534, + "top": 1253, + "topaz": 46125, + "tope": 32149, + "tope": 42239, + "topeka": 46884, + "topia": 29618, + "topic": 8720, + "topical": 37464, + "topics": 11916, + "topless": 37415, + "topo": 23008, + "topoli": 30152, + "topp": 19529, + "topped": 12588, + "topper": 31780, + "toppers": 41651, + "topping": 21071, + "toppings": 47554, + "topps": 20201, + "tops": 8154, + "topshop": 40953, + "topus": 21495, + "tor": 937, + "tor": 1208, + "tora": 45147, + "torah": 37945, + "toral": 45282, + "torch": 31921, + "torch": 15820, + "tore": 38066, + "tore": 19385, + "tored": 38046, + "torg": 33214, + "tori": 17689, + "tori": 17539, + "toria": 23732, + "torial": 28029, + "torian": 48399, + "tories": 14193, + "torino": 29178, + "torio": 34235, + "torn": 8572, + "torn": 18023, + "tornad": 24676, + "tornado": 9062, + "tornadoes": 28254, + "toro": 17892, + "toron": 37407, + "toronto": 16866, + "toronto": 4514, + "torpe": 34093, + "torpedo": 46582, + "torquay": 45738, + "torque": 31940, + "torre": 39563, + "torre": 38009, + "torrent": 42317, + "torrential": 41158, + "torres": 16049, + "tors": 2546, + "tortilla": 32683, + "torto": 24170, + "tortoise": 30178, + "torture": 16013, + "tortured": 29900, + "tory": 29390, + "tory": 4214, + "tos": 6094, + "tosc": 37719, + "tose": 38154, + "tosh": 17109, + "toshi": 31744, + "toss": 19656, + "tossed": 31296, + "tot": 4618, + "tot": 23659, + "total": 13507, + "total": 4445, + "totally": 5440, + "totals": 25772, + "tote": 48145, + "tote": 19031, + "totem": 45376, + "totes": 37199, + "tothe": 12222, + "toto": 39823, + "tots": 24978, + "totten": 14360, + "tottenham": 14889, + "tou": 1879, + "tou": 29261, + "touch": 9480, + "touch": 4526, + "touchdown": 18664, + "touchdowns": 37905, + "touched": 13190, + "touches": 14832, + "touching": 14088, + "touchscreen": 39095, + "tough": 12063, + "tough": 5499, + "tougher": 33722, + "toughest": 23773, + "toughness": 45522, + "toulou": 27145, + "toulouse": 30267, + "tour": 2710, + "tour": 1760, + "tourde": 39247, + "toured": 27654, + "touri": 4224, + "touring": 11853, + "tourism": 23661, + "tourism": 6556, + "tourist": 12123, + "tourists": 15546, + "tournament": 4097, + "tournaments": 23058, + "tourney": 12603, + "tours": 8948, + "tous": 37424, + "tout": 22300, + "touts": 41274, + "tov": 28970, + "tow": 11557, + "tow": 18653, + "toward": 8508, + "towards": 4447, + "towed": 45419, + "towel": 15953, + "towels": 26578, + "tower": 26669, + "tower": 4730, + "towering": 39444, + "towers": 12701, + "towie": 44613, + "towin": 45819, + "towing": 36963, + "town": 4068, + "town": 1605, + "townfc": 33981, + "townhall": 33408, + "townhouse": 40178, + "towns": 14173, + "townsend": 26826, + "township": 14622, + "townsville": 47330, + "towork": 48233, + "tox": 7742, + "tox": 16145, + "toxic": 27436, + "toxic": 12348, + "toxicity": 41234, + "toxin": 48899, + "toxins": 36618, + "toy": 14387, + "toy": 5988, + "toya": 37602, + "toyo": 7644, + "toyota": 8908, + "toys": 39508, + "toys": 7162, + "tp": 23760, + "tp": 15188, + "tpp": 29411, + "tps": 35246, + "tq": 43066, + "tr": 635, + "tr": 6337, + "tra": 752, + "tra": 2483, + "trac": 2266, + "trace": 48611, + "trace": 14767, + "traced": 47956, + "traces": 30913, + "tracey": 25558, + "tracing": 27897, + "track": 10887, + "track": 2700, + "tracked": 27049, + "tracker": 18123, + "tracking": 10428, + "tracklist": 39777, + "tracks": 7579, + "tract": 4690, + "traction": 10644, + "tractor": 14607, + "tractors": 37854, + "tracy": 32984, + "tracy": 15508, + "trad": 48716, + "trad": 38037, + "trade": 10457, + "trade": 3629, + "traded": 18860, + "trademark": 25011, + "trader": 17700, + "traders": 19112, + "trades": 18519, + "trading": 40083, + "trading": 6520, + "tradio": 20689, + "tradition": 20838, + "tradition": 8784, + "traditional": 41113, + "traditional": 5604, + "traditionally": 35532, + "traditions": 18016, + "traf": 3227, + "trafal": 32461, + "trafalgar": 36969, + "traff": 31571, + "traffic": 12080, + "traffic": 3399, + "trafficking": 15983, + "trafford": 22912, + "trage": 12430, + "tragedy": 14082, + "tragic": 14828, + "tragically": 39599, + "trail": 11523, + "trail": 4921, + "trailblazer": 41015, + "trailblazers": 35954, + "trailer": 4700, + "trailers": 24862, + "trailing": 37427, + "trails": 10633, + "train": 9122, + "train": 3231, + "trained": 10874, + "trainee": 25795, + "trainees": 30382, + "trainer": 9767, + "trainers": 18871, + "training": 34508, + "training": 2199, + "trains": 9541, + "trait": 35160, + "traitor": 31760, + "traitors": 42633, + "traits": 25748, + "trajec": 42042, + "trak": 24065, + "tral": 14609, + "tram": 9800, + "tram": 17500, + "tramp": 46289, + "trampol": 32905, + "trampoline": 42800, + "tramrahim": 35220, + "tran": 1357, + "tran": 22031, + "trance": 30584, + "trance": 18671, + "trancefamily": 39630, + "trane": 35779, + "tranqu": 18912, + "tranquil": 35764, + "tranquility": 36688, + "trans": 1826, + "trans": 8126, + "transaction": 24881, + "transactions": 21653, + "transat": 37872, + "transatlantic": 40703, + "transc": 21073, + "transcend": 47087, + "transcript": 39008, + "transcription": 48765, + "transfer": 22659, + "transfer": 7134, + "transferred": 29700, + "transferring": 40924, + "transfers": 21621, + "transform": 8142, + "transform": 12288, + "transformation": 34204, + "transformation": 7832, + "transformational": 47135, + "transformationtuesday": 36511, + "transformative": 38106, + "transformed": 17453, + "transformer": 38235, + "transformers": 17843, + "transforming": 44470, + "transforming": 19251, + "transforms": 30312, + "transgender": 17732, + "transi": 32236, + "transit": 10174, + "transiti": 22939, + "transition": 11391, + "transitional": 41519, + "transitioning": 43586, + "transitions": 39374, + "transl": 12243, + "translate": 22655, + "translated": 20752, + "translates": 36334, + "translating": 42156, + "translation": 12153, + "translations": 41367, + "translator": 36230, + "translucent": 49052, + "transm": 18861, + "transmission": 16103, + "transmitted": 48605, + "transmitter": 40457, + "transp": 11726, + "transpa": 18524, + "transparen": 16108, + "transparency": 16828, + "transparent": 19017, + "transpl": 16038, + "transplant": 41871, + "transplant": 18771, + "transplantation": 45207, + "transpor": 19406, + "transport": 10231, + "transport": 7362, + "transportation": 10911, + "transported": 29089, + "transporter": 43568, + "transporting": 42259, + "trap": 36224, + "trap": 9677, + "trape": 42435, + "trapped": 15592, + "traps": 28517, + "tras": 30638, + "trash": 39215, + "trash": 9798, + "traum": 22263, + "trauma": 13846, + "traumati": 46613, + "traumatic": 29958, + "trav": 7586, + "trav": 46955, + "trave": 35357, + "travel": 2824, + "travel": 1949, + "travelblog": 35957, + "travelblogger": 25494, + "travelchat": 46455, + "traveled": 20384, + "traveler": 17794, + "travelers": 20644, + "travelgram": 40069, + "traveling": 9365, + "travelled": 23428, + "traveller": 22546, + "travellers": 29583, + "travelling": 11190, + "travelphotography": 22808, + "travelpics": 32293, + "travels": 11472, + "traveltips": 36260, + "traveltuesday": 16713, + "traverse": 35058, + "travi": 46971, + "travis": 27441, + "travis": 12287, + "traw": 42288, + "trax": 34421, + "tray": 38470, + "tray": 14621, + "trays": 39798, + "trc": 41803, + "tre": 975, + "tre": 6033, + "treach": 46005, + "tread": 26182, + "tread": 35658, + "treadmill": 37780, + "treas": 8591, + "treason": 28103, + "treasure": 9922, + "treasured": 48068, + "treasurer": 26985, + "treasures": 16500, + "treasury": 20956, + "treat": 3968, + "treat": 3901, + "treated": 9772, + "treating": 13842, + "treatment": 4869, + "treatments": 15839, + "treats": 8878, + "treaty": 19967, + "treble": 33194, + "trecht": 33812, + "tree": 13354, + "tree": 2677, + "treehouse": 42387, + "trees": 4682, + "trek": 13236, + "trek": 8136, + "trekking": 25293, + "trell": 35159, + "tremb": 44043, + "tremend": 14659, + "tremendous": 15988, + "tren": 2579, + "trench": 23846, + "trenches": 38723, + "trend": 19986, + "trend": 6643, + "trending": 6087, + "trends": 7015, + "trendsetter": 46666, + "trendy": 23072, + "trent": 45885, + "trent": 15548, + "trenton": 37470, + "tres": 23569, + "tress": 4733, + "tresses": 24273, + "trevor": 23437, + "trevor": 13219, + "trex": 42114, + "trey": 36670, + "trey": 16939, + "tri": 924, + "tri": 9618, + "triad": 45602, + "trial": 5991, + "trials": 10992, + "triangle": 14615, + "triathlon": 18080, + "trib": 45151, + "tribal": 16629, + "tribe": 19943, + "tribe": 11365, + "tribeca": 35184, + "tribes": 26546, + "tribu": 3028, + "tribun": 14311, + "tribunal": 32911, + "tribune": 18556, + "tribute": 5493, + "tributes": 15537, + "tric": 9511, + "tric": 4081, + "trich": 39519, + "trick": 17177, + "trick": 8172, + "tricks": 13177, + "tricky": 22319, + "trics": 31437, + "trident": 35491, + "tridge": 18722, + "tried": 4554, + "tries": 4315, + "trife": 48962, + "trigge": 30509, + "trigger": 16158, + "triggered": 30924, + "triggers": 37319, + "tright": 29915, + "tril": 40626, + "trill": 39297, + "trilli": 39350, + "trillion": 20160, + "trilo": 15183, + "trilogy": 16862, + "trim": 14182, + "trimmed": 40657, + "trin": 6628, + "trinidad": 26244, + "trinity": 30744, + "trinity": 12267, + "trio": 10263, + "trip": 23421, + "trip": 2529, + "tripad": 37189, + "tripadvisor": 38708, + "triple": 16519, + "triple": 7673, + "triplets": 48601, + "tripod": 36141, + "tripoli": 40095, + "trippin": 43073, + "tripping": 35229, + "trippy": 35137, + "trips": 12292, + "tris": 29690, + "trish": 40511, + "trish": 37179, + "trisha": 39152, + "tristan": 25497, + "trit": 37087, + "triton": 45437, + "triu": 14782, + "trium": 21065, + "triumph": 26507, + "triumph": 15307, + "triumphant": 41918, + "trivi": 21228, + "trivia": 10642, + "triviatuesday": 45499, + "trix": 41017, + "tro": 1046, + "tro": 3332, + "trock": 44368, + "trojan": 30653, + "trojans": 25310, + "trol": 10306, + "troll": 39737, + "troll": 17103, + "trolley": 25124, + "trolling": 28552, + "trolls": 20890, + "tromb": 32390, + "trombone": 44423, + "tron": 19057, + "tron": 10684, + "tronic": 34258, + "tronics": 34397, + "troom": 23691, + "troop": 12492, + "troop": 24054, + "trooper": 18327, + "troopers": 23576, + "troops": 10109, + "trop": 31585, + "trope": 41150, + "trophies": 20998, + "trophy": 42676, + "trophy": 6502, + "tropic": 21794, + "tropic": 36736, + "tropical": 41699, + "tropical": 8686, + "tropics": 36940, + "tros": 40456, + "trose": 36022, + "trot": 30453, + "trotter": 38287, + "trou": 5181, + "troubad": 49037, + "trouble": 25669, + "trouble": 7848, + "troubled": 25568, + "troubles": 27254, + "trough": 39761, + "troupe": 34803, + "trous": 19727, + "trousers": 23172, + "trout": 14853, + "trove": 45350, + "trow": 46914, + "troy": 26283, + "troy": 12819, + "trs": 24770, + "tru": 931, + "tru": 25326, + "truck": 14781, + "truck": 4629, + "trucker": 45918, + "truckers": 43404, + "trucking": 26208, + "trucks": 9569, + "trude": 39017, + "trudeau": 15752, + "true": 13096, + "true": 2328, + "truec": 37583, + "truelove": 45711, + "truffle": 23064, + "truffles": 37057, + "truly": 4545, + "trum": 11766, + "trum": 11399, + "truman": 29414, + "trump": 9124, + "trump": 1797, + "trumpet": 23681, + "trumpp": 45550, + "trumprussia": 39135, + "trumps": 29793, + "trumptrain": 43595, + "trun": 16163, + "trun": 46661, + "trunk": 18347, + "trunks": 38531, + "truro": 43507, + "truss": 46080, + "trust": 17691, + "trust": 3876, + "truste": 17356, + "trusted": 16538, + "trustee": 30803, + "trustees": 28853, + "trusting": 33221, + "trusts": 27507, + "trustworthy": 46840, + "trusty": 37955, + "truth": 21335, + "truth": 4319, + "truths": 27179, + "trx": 31620, + "try": 4487, + "try": 1209, + "tryin": 31085, + "trying": 2551, + "tryna": 15702, + "tryout": 43832, + "tryouts": 28053, + "ts": 2290, + "ts": 590, + "tsa": 25977, + "tsal": 20438, + "tsb": 45015, + "tsc": 37437, + "tsch": 38778, + "tsd": 20611, + "tse": 49144, + "tsfor": 42654, + "tsford": 32823, + "tsh": 42872, + "tshirt": 14907, + "tshirts": 29377, + "tsi": 40048, + "tsi": 37867, + "tsk": 43600, + "tsla": 35681, + "tsm": 43452, + "tsman": 20046, + "tsn": 44921, + "tsn": 26896, + "tson": 42353, + "tson": 47140, + "tsp": 34230, + "tsu": 13950, + "tsu": 20175, + "tsun": 19155, + "tsunami": 24286, + "tsville": 29080, + "tt": 971, + "tt": 1402, + "tta": 2646, + "ttc": 27668, + "tte": 23105, + "tte": 3070, + "tted": 15163, + "tten": 11351, + "tten": 17479, + "tter": 18691, + "tter": 5165, + "tters": 6318, + "ttes": 9293, + "tti": 5237, + "ttin": 36589, + "tting": 1188, + "ttino": 47389, + "ttip": 46993, + "ttle": 9253, + "ttm": 46838, + "tto": 8759, + "tto": 8105, + "tton": 10562, + "ttot": 12480, + "ttp": 30828, + "ttr": 47589, + "tts": 11570, + "ttt": 17256, + "tttt": 33119, + "ttu": 44006, + "ttv": 24281, + "tty": 11457, + "tty": 1856, + "tu": 764, + "tu": 5760, + "tua": 41344, + "tual": 4799, + "tuan": 37297, + "tub": 34907, + "tub": 15450, + "tube": 38229, + "tube": 3308, + "tuber": 30371, + "tuberculo": 42606, + "tuberculosis": 43129, + "tubes": 22870, + "tubing": 40794, + "tubs": 41705, + "tubular": 48786, + "tuc": 14456, + "tuc": 43871, + "tuck": 22398, + "tucked": 26923, + "tucker": 39703, + "tucker": 15726, + "tucket": 32677, + "tucson": 17250, + "tudor": 24547, + "tue": 17515, + "tues": 2283, + "tues": 12113, + "tuesday": 10209, + "tuesday": 2519, + "tuesdaymotivation": 25432, + "tuesdays": 23195, + "tuesdaythoughts": 17988, + "tuf": 44510, + "tuff": 38868, + "tug": 47032, + "tug": 27902, + "tuition": 21129, + "tuk": 39271, + "tuk": 14993, + "tul": 9069, + "tul": 40837, + "tula": 36332, + "tulane": 44893, + "tulip": 28389, + "tulips": 30886, + "tulsa": 18850, + "tum": 12932, + "tum": 8843, + "tumb": 8831, + "tumble": 38284, + "tumbler": 48790, + "tumbling": 46226, + "tumblr": 11841, + "tummy": 26053, + "tumor": 22616, + "tumors": 39894, + "tumour": 45129, + "tun": 1415, + "tun": 21349, + "tuna": 15037, + "tundra": 39899, + "tune": 11427, + "tune": 3300, + "tuned": 5898, + "tunein": 16809, + "tuner": 42905, + "tunes": 31688, + "tunes": 10810, + "tunesapp": 32550, + "tung": 47940, + "tung": 31092, + "tuni": 16270, + "tunic": 43495, + "tuning": 19585, + "tunisia": 23346, + "tunnel": 11096, + "tunnels": 29814, + "tuous": 28738, + "tup": 37956, + "tup": 4507, + "tupac": 31506, + "tups": 44855, + "tur": 985, + "tur": 17182, + "tura": 16127, + "tural": 45143, + "tural": 4261, + "turb": 18973, + "turban": 48515, + "turbine": 26880, + "turbines": 38863, + "turbo": 23578, + "turbo": 13668, + "turbul": 31100, + "turbulent": 47871, + "ture": 4321, + "ture": 941, + "tured": 3987, + "turer": 11993, + "turers": 16956, + "tures": 2400, + "turf": 36762, + "turf": 12510, + "turi": 11896, + "turin": 36251, + "turing": 5812, + "turismo": 30202, + "turk": 8254, + "turk": 32507, + "turkey": 35977, + "turkey": 4790, + "turkeys": 37991, + "turkish": 48199, + "turkish": 9278, + "turks": 34344, + "turmeric": 34044, + "turmoil": 37751, + "turn": 5522, + "turn": 2105, + "turnaround": 32719, + "turnbull": 27863, + "turned": 3771, + "turner": 42867, + "turner": 8777, + "turning": 4976, + "turno": 21377, + "turnout": 11654, + "turnover": 30794, + "turnpike": 38301, + "turns": 3185, + "turnt": 28887, + "turntable": 37953, + "turnup": 30591, + "turo": 29224, + "turquo": 19390, + "turquoise": 19899, + "turt": 13716, + "turtle": 35943, + "turtle": 10912, + "turtles": 17862, + "tus": 24828, + "tus": 7079, + "tusc": 17909, + "tuscal": 42638, + "tuscaloosa": 44375, + "tuscan": 42865, + "tuscany": 20885, + "tuss": 31741, + "tut": 35121, + "tutor": 10054, + "tutor": 27858, + "tutorial": 12857, + "tutorials": 30973, + "tutoring": 37532, + "tutti": 46880, + "tutu": 35845, + "tux": 28720, + "tux": 49186, + "tuxedo": 40173, + "tv": 3197, + "tv": 1583, + "tvc": 49190, + "tvd": 25889, + "tvmiaw": 38554, + "tvn": 44232, + "tvs": 27114, + "tvtime": 19947, + "tvxq": 43968, + "tw": 966, + "tw": 12842, + "twa": 46954, + "twain": 30689, + "twal": 48126, + "tware": 5707, + "twc": 41217, + "twd": 29440, + "twd": 19343, + "twdfamily": 38218, + "twe": 18365, + "tweak": 48870, + "tweaks": 42661, + "twee": 1330, + "tweed": 26904, + "tweeps": 14928, + "tweet": 11826, + "tweet": 1842, + "tweeta": 32024, + "tweetapicture": 40596, + "tweeted": 7841, + "tweeter": 32876, + "tweeters": 31713, + "tweeting": 8901, + "tweets": 3560, + "tweetyour": 45033, + "twel": 14476, + "twelf": 39443, + "twelfth": 44072, + "twell": 38722, + "twell": 30162, + "twelve": 19694, + "twent": 27027, + "twenti": 35167, + "twenty": 13016, + "twentyon": 39609, + "twentyonepilots": 40007, + "twer": 13923, + "twerk": 28506, + "twi": 5537, + "twice": 6970, + "twick": 34326, + "twickenham": 39619, + "twil": 12804, + "twili": 35754, + "twilight": 46366, + "twilight": 14512, + "twill": 43703, + "twin": 9342, + "twin": 6769, + "twine": 42775, + "twinkle": 36545, + "twinning": 30156, + "twinpeaks": 32042, + "twins": 8040, + "twist": 10589, + "twisted": 18233, + "twister": 45933, + "twists": 34149, + "twit": 1643, + "twit": 18704, + "twitart": 27709, + "twitch": 13251, + "twitch": 9153, + "twitter": 7546, + "twitter": 1989, + "twitterkurds": 32722, + "twitterstorians": 35389, + "two": 17211, + "two": 1237, + "twol": 31964, + "twood": 40404, + "twood": 13245, + "twp": 33283, + "twright": 46778, + "twt": 6825, + "twx": 26830, + "twy": 45861, + "tx": 6636, + "tx": 5200, + "txhsfb": 34757, + "txlege": 26995, + "txst": 40761, + "txt": 24595, + "txwx": 22995, + "ty": 1260, + "ty": 744, + "tya": 41273, + "tycoon": 36803, + "tye": 43097, + "tyfree": 41215, + "tyga": 41952, + "tying": 22559, + "tyl": 47537, + "tyler": 14787, + "tyler": 7058, + "tym": 45772, + "tyne": 27000, + "tyne": 29729, + "tyour": 16823, + "type": 15673, + "type": 3877, + "typed": 40753, + "typeface": 44969, + "types": 7543, + "typewriter": 42180, + "typho": 17486, + "typhoon": 21110, + "typic": 21648, + "typical": 9854, + "typically": 23175, + "typing": 20102, + "typo": 18831, + "typo": 29076, + "typography": 24332, + "tyr": 15590, + "tyran": 46921, + "tyranny": 35402, + "tyre": 38330, + "tyre": 16864, + "tyres": 21376, + "tyrone": 30226, + "tyson": 16616, + "tz": 7710, + "tz": 4983, + "tzer": 45267, + "tzky": 47127, + "tzman": 46032, + "tzu": 34354, + "té": 27208, + "té": 39694, + "u": 84, + "u": 340, + "ua": 34075, + "ua": 8441, + "uaap": 46753, + "uaap": 43774, + "uab": 35587, + "uae": 9752, + "ual": 1921, + "ually": 10767, + "uan": 33062, + "uas": 38339, + "uav": 30303, + "ub": 18430, + "ub": 13494, + "uba": 29768, + "ubc": 42479, + "ubc": 29455, + "ube": 30892, + "uber": 25896, + "uber": 10668, + "ubi": 26758, + "ubio": 32867, + "ubiquit": 48129, + "ubis": 28248, + "ubisoft": 32051, + "ubs": 43851, + "ubun": 28184, + "ubuntu": 30791, + "uc": 4903, + "uc": 12438, + "uca": 30942, + "ucc": 44844, + "ucc": 29138, + "ucci": 30746, + "uccino": 30409, + "ucd": 44746, + "ucd": 43514, + "ucf": 24414, + "uch": 19465, + "uch": 22394, + "uchi": 37473, + "uci": 46354, + "uci": 28925, + "uck": 34189, + "ucl": 12013, + "ucl": 13647, + "ucla": 37667, + "ucla": 17259, + "ucn": 49036, + "uconn": 30549, + "ud": 6560, + "ud": 5765, + "uda": 22800, + "udaipur": 49385, + "uddin": 43035, + "ude": 37016, + "ude": 35194, + "ue": 16696, + "ue": 1190, + "uefa": 19189, + "uel": 24231, + "uer": 45951, + "ues": 2526, + "uf": 17777, + "uf": 19230, + "ufc": 20396, + "ufc": 6490, + "uff": 45701, + "ufo": 19443, + "ufos": 48234, + "ug": 3754, + "ug": 16061, + "uga": 16056, + "ugand": 25965, + "uganda": 11125, + "ugandan": 44206, + "ugby": 30658, + "ugh": 39736, + "ugh": 12755, + "ugliest": 43543, + "ugly": 36070, + "ugly": 8159, + "ugu": 18144, + "uh": 17661, + "uh": 9219, + "uhc": 44974, + "uhh": 35938, + "uhhh": 45270, + "uhm": 35614, + "uhur": 29434, + "uhuru": 35690, + "ui": 17326, + "ui": 11458, + "uil": 29395, + "uit": 30696, + "uit": 47584, + "uj": 33266, + "uji": 39672, + "uk": 2294, + "uk": 1432, + "uka": 23294, + "uke": 48836, + "uke": 28577, + "uked": 48987, + "uki": 37435, + "uki": 9009, + "ukin": 34996, + "ukip": 20360, + "uklabour": 36902, + "ukmfg": 38764, + "uko": 33562, + "ukone": 24682, + "ukrain": 15468, + "ukraine": 7768, + "ukrainian": 16927, + "ukrunchat": 34481, + "uku": 29541, + "uku": 36082, + "ukulele": 39094, + "ul": 914, + "ul": 6625, + "ula": 34104, + "ula": 9506, + "ular": 4927, + "ulary": 21701, + "ulate": 20467, + "ulation": 32896, + "ule": 35616, + "ules": 26274, + "ulf": 49331, + "uli": 41841, + "uli": 22174, + "ull": 33254, + "ulla": 30577, + "ullah": 45310, + "ullivan": 45252, + "ulls": 37418, + "ulo": 46084, + "ulo": 36738, + "ulous": 42490, + "ulous": 4281, + "ulously": 20167, + "ulster": 29709, + "ulster": 24639, + "ult": 4380, + "ulti": 11925, + "ulties": 21884, + "ultimat": 16522, + "ultimate": 34684, + "ultimate": 5377, + "ultimatefan": 48372, + "ultimatefanlive": 48644, + "ultimately": 23023, + "ultr": 25636, + "ultra": 11398, + "ultra": 8118, + "ultram": 44519, + "ultrasound": 29717, + "ulture": 22272, + "ulty": 8036, + "ulu": 41815, + "ulu": 15659, + "ulum": 17235, + "uly": 33220, + "ulysses": 46114, + "um": 1622, + "um": 1008, + "uma": 29982, + "uma": 9256, + "uman": 27112, + "umar": 25656, + "umass": 39390, + "umatic": 45006, + "umb": 7493, + "umber": 19195, + "umbrel": 34773, + "umbrella": 17143, + "umbrellas": 42782, + "umbria": 39287, + "umc": 39491, + "umd": 42067, + "ume": 38480, + "umen": 42832, + "uments": 25924, + "umer": 23539, + "umes": 21403, + "umi": 48772, + "umi": 15458, + "umich": 41294, + "umin": 31542, + "umm": 26129, + "umm": 21215, + "ummer": 47628, + "ummm": 33665, + "umni": 31739, + "ump": 22224, + "umpire": 36214, + "ums": 8643, + "umu": 39788, + "un": 569, + "un": 2271, + "una": 6385, + "unable": 17793, + "unacceptable": 25234, + "unanim": 20800, + "unanimous": 33520, + "unanimously": 31798, + "unanswered": 43611, + "unarmed": 41541, + "unas": 41366, + "unavailable": 48430, + "unaware": 33347, + "unbeat": 37056, + "unbeatable": 40267, + "unbeaten": 19228, + "unbeliev": 11383, + "unbelievable": 13306, + "unbelievably": 33781, + "unborn": 37257, + "unboxing": 32866, + "unbreakable": 32956, + "unbroken": 49271, + "unc": 24921, + "unc": 15322, + "uncanny": 32556, + "uncertain": 30384, + "uncertainty": 23956, + "unch": 1527, + "unchanged": 34272, + "uncharted": 34560, + "unci": 25521, + "unciation": 34117, + "uncle": 31537, + "uncle": 8002, + "unclear": 32955, + "uncles": 45335, + "uncomfortable": 22470, + "uncommon": 34888, + "uncondition": 46561, + "unconditional": 31112, + "unconscious": 34791, + "unconstitutional": 43585, + "unconventional": 39440, + "uncover": 33031, + "uncovered": 28234, + "uncture": 38736, + "uncut": 41056, + "und": 9762, + "und": 9732, + "unda": 39932, + "undant": 25377, + "unday": 29338, + "unde": 45226, + "undead": 40105, + "undecided": 49368, + "undefeated": 15326, + "undeni": 38424, + "under": 1473, + "under": 1798, + "underage": 45669, + "underattack": 35075, + "undercover": 21595, + "underdog": 44266, + "undere": 21675, + "underestim": 23348, + "underestimate": 31794, + "undergo": 31545, + "undergoing": 26419, + "undergrad": 38331, + "undergraduate": 24320, + "underground": 9396, + "undering": 30826, + "underlying": 31812, + "undermine": 42839, + "underneath": 20857, + "underrated": 19494, + "unders": 20376, + "understand": 47582, + "understand": 4600, + "understanding": 7522, + "understands": 21607, + "understatement": 38296, + "understood": 17303, + "undertaker": 40144, + "undertaking": 49067, + "undertale": 48283, + "underthe": 41161, + "underwater": 14760, + "underway": 6273, + "underwear": 21154, + "underwood": 21474, + "underworld": 34760, + "undi": 23845, + "undisclosed": 39334, + "undo": 35454, + "undocumented": 35414, + "undoub": 38836, + "undoubtedly": 42204, + "undp": 26691, + "une": 4522, + "une": 10966, + "unearth": 32716, + "unearthed": 36632, + "unemp": 15139, + "unemployed": 32721, + "unemployment": 19350, + "unes": 6394, + "unesco": 16216, + "uneven": 43204, + "unex": 9484, + "unexpe": 10802, + "unexpec": 31829, + "unexpected": 12293, + "unexpectedly": 35622, + "unf": 29285, + "unfair": 22193, + "unfinished": 26526, + "unfit": 45367, + "unfold": 38681, + "unfollow": 38797, + "unfor": 14010, + "unforgettable": 16173, + "unfortun": 10194, + "unfortunate": 22361, + "unfortunately": 12863, + "unfpa": 45048, + "ung": 10439, + "ung": 4334, + "unga": 19151, + "ungsoo": 25582, + "unh": 25365, + "unhappy": 26528, + "unhcr": 43451, + "unhealthy": 30994, + "uni": 1107, + "uni": 5926, + "unic": 7648, + "unicef": 38286, + "unicef": 19259, + "unicorn": 15660, + "unicorns": 35183, + "unidenti": 33707, + "unidentified": 35563, + "unification": 45036, + "unified": 20876, + "uniform": 11075, + "uniforms": 17838, + "unil": 32388, + "unilever": 48654, + "uniof": 21218, + "union": 14210, + "union": 3503, + "unions": 18353, + "unis": 30482, + "unis": 39266, + "unisex": 27609, + "unison": 46694, + "unit": 28522, + "unit": 5695, + "unite": 15078, + "unite": 11305, + "uniteblue": 20935, + "united": 10898, + "united": 2690, + "unitedstates": 39636, + "unitedway": 47486, + "unites": 32061, + "uniting": 31318, + "units": 10394, + "unity": 38300, + "unity": 8581, + "univ": 36680, + "univ": 14896, + "univer": 15574, + "univers": 5855, + "universal": 19148, + "universal": 8754, + "universe": 6104, + "universi": 41692, + "universit": 26019, + "universities": 16408, + "university": 40728, + "university": 2182, + "universityof": 46158, + "unk": 5542, + "unknown": 8685, + "unl": 43807, + "unlawful": 42305, + "unle": 19677, + "unlea": 23893, + "unleash": 26706, + "unleashed": 27955, + "unless": 10602, + "unlike": 16694, + "unlikely": 18904, + "unlimited": 11015, + "unlock": 18649, + "unlocked": 16770, + "unlocking": 40810, + "unlucky": 35029, + "unlv": 42283, + "unmanned": 36751, + "unmatched": 46054, + "unn": 38364, + "unnamed": 44985, + "unnecessary": 24100, + "unner": 31481, + "unning": 43282, + "unnoticed": 42807, + "uno": 32446, + "uno": 17078, + "unofficial": 22506, + "unpacking": 43589, + "unpaid": 32811, + "unparalleled": 44396, + "unplugged": 31724, + "unpopular": 40232, + "unprece": 23054, + "unprecedented": 23344, + "unpredictable": 38684, + "unra": 45150, + "unreal": 46980, + "unreal": 15636, + "unrelated": 38644, + "unreleased": 29654, + "unrest": 36452, + "uns": 25908, + "unsafe": 32071, + "unsc": 36395, + "unseen": 19069, + "unsigned": 39346, + "unsolved": 40836, + "unsplash": 46196, + "unstable": 34730, + "unstopp": 22105, + "unstoppable": 23484, + "unsuccessful": 47478, + "unsung": 33015, + "unsure": 26396, + "unt": 19654, + "unt": 6537, + "until": 1942, + "untitled": 21309, + "unto": 19801, + "untold": 32206, + "untouch": 44509, + "untouched": 42764, + "unused": 29636, + "unusual": 12613, + "unusually": 36465, + "unve": 6685, + "unveil": 20483, + "unveiled": 13572, + "unveiling": 20327, + "unveils": 15057, + "unwanted": 25285, + "unwind": 34064, + "unya": 37142, + "uo": 30874, + "uo": 36162, + "uof": 11155, + "uoft": 37329, + "uon": 48144, + "uous": 40185, + "up": 1083, + "up": 705, + "upa": 31727, + "upbeat": 39201, + "upcoming": 4196, + "upcycled": 46552, + "upd": 3226, + "update": 2491, + "updated": 5974, + "updates": 4904, + "updating": 22792, + "uper": 38082, + "uper": 33056, + "upfront": 42064, + "upgrade": 10365, + "upgraded": 18577, + "upgrades": 21253, + "upgrading": 34368, + "uph": 14128, + "uphill": 42767, + "uphol": 26195, + "uphold": 43897, + "upholstery": 44556, + "upl": 41939, + "uplift": 45389, + "uplifting": 29546, + "upload": 13968, + "uploaded": 16793, + "uploading": 30145, + "upon": 23524, + "upon": 5067, + "upp": 19549, + "upp": 45946, + "upper": 22465, + "upper": 7067, + "upri": 15982, + "upright": 29818, + "uprising": 26006, + "upro": 28922, + "ups": 6926, + "upscale": 47501, + "upset": 11214, + "upsets": 42637, + "upside": 15362, + "upstairs": 21387, + "upstate": 33335, + "upstream": 45517, + "upthe": 31510, + "upto": 26575, + "upton": 31910, + "uptown": 23807, + "upward": 32526, + "upwards": 34915, + "uq": 39591, + "ur": 565, + "ur": 1775, + "ura": 29337, + "ura": 3544, + "urable": 40194, + "ural": 23547, + "ural": 33948, + "uran": 16197, + "uranium": 29850, + "urban": 7931, + "urban": 5800, + "urbanart": 40834, + "urd": 47880, + "urday": 19742, + "urdu": 29976, + "ure": 5514, + "ure": 726, + "ured": 4210, + "urer": 20864, + "ures": 2288, + "urg": 35995, + "urge": 14852, + "urged": 23790, + "urgency": 47612, + "urgent": 13693, + "urgently": 34534, + "urges": 16692, + "urging": 27748, + "uri": 11052, + "uri": 8699, + "urie": 46429, + "urin": 45245, + "urine": 28864, + "uring": 1351, + "url": 23464, + "urn": 38075, + "uro": 17343, + "uro": 5925, + "urology": 48585, + "urope": 14918, + "urs": 4794, + "urself": 31942, + "urst": 19181, + "urstruly": 34751, + "urstrulymahesh": 35314, + "ursula": 38390, + "urt": 24309, + "uru": 16322, + "uru": 11768, + "uruguay": 27931, + "urus": 14246, + "urve": 24583, + "ury": 8642, + "ury": 2106, + "us": 904, + "us": 718, + "usa": 9491, + "usa": 2547, + "usability": 46736, + "usable": 22890, + "usaf": 25017, + "usage": 19137, + "usaid": 34507, + "usair": 36742, + "usairforce": 42179, + "usarmy": 19132, + "usatoday": 40263, + "usav": 36056, + "usb": 10281, + "usc": 13346, + "usc": 14995, + "uscg": 43932, + "usd": 7485, + "usda": 25829, + "use": 4419, + "use": 1483, + "used": 32289, + "used": 2026, + "useful": 9784, + "useless": 20154, + "usemb": 39700, + "user": 21248, + "user": 7031, + "username": 28162, + "users": 7433, + "uses": 5282, + "useum": 45189, + "usf": 32385, + "usf": 28942, + "usgs": 35103, + "ush": 12001, + "ush": 18335, + "usher": 27411, + "ushi": 47734, + "usi": 25540, + "usic": 34909, + "usic": 16753, + "using": 1996, + "usky": 45778, + "usl": 42113, + "usm": 40041, + "usmc": 21678, + "usmnt": 30662, + "usn": 40579, + "usnavy": 24500, + "usnews": 43752, + "uso": 21539, + "usopen": 21782, + "usp": 26651, + "usps": 39980, + "usrc": 33274, + "uss": 11545, + "uss": 9260, + "ussia": 29553, + "ussoccer": 42828, + "ussr": 32697, + "ust": 35501, + "ust": 24725, + "usu": 4254, + "usu": 40434, + "usual": 6129, + "usually": 8296, + "usur": 45582, + "uswnt": 35255, + "ut": 1419, + "ut": 3641, + "uta": 42706, + "uta": 25925, + "utah": 27474, + "utah": 9312, + "utc": 18196, + "utd": 10493, + "ute": 16856, + "ute": 3130, + "uten": 32089, + "uter": 39197, + "utes": 2850, + "uth": 48819, + "uth": 44750, + "uti": 24568, + "util": 28824, + "utili": 17015, + "utilities": 27210, + "utility": 14941, + "utilize": 36861, + "utilized": 47604, + "utilizing": 40212, + "utm": 47853, + "utmost": 42352, + "uto": 18866, + "uto": 13683, + "utopia": 34433, + "utpol": 42605, + "utr": 48726, + "utrecht": 37216, + "uts": 11740, + "utsa": 37528, + "utt": 17096, + "uttar": 40168, + "uttarak": 33755, + "uttarakhand": 35655, + "utter": 18769, + "utter": 24558, + "utterly": 21353, + "utto": 42183, + "utv": 36351, + "utz": 45320, + "uu": 5702, + "uu": 14553, + "uuu": 44355, + "uuu": 27656, + "uuuu": 16720, + "uuuu": 40797, + "uv": 23777, + "uv": 15977, + "uva": 23908, + "uw": 13933, + "uw": 19166, + "uwe": 48785, + "uwu": 35544, + "ux": 9251, + "ux": 6213, + "uy": 31929, + "uy": 48113, + "uz": 19398, + "uz": 36991, + "uzbe": 43007, + "uzbekistan": 45024, + "uzzi": 48210, + "v": 85, + "v": 341, + "va": 4648, + "va": 1892, + "vaa": 37488, + "vable": 23088, + "vac": 3125, + "vac": 34085, + "vaca": 48215, + "vacancies": 26333, + "vacancy": 21247, + "vacant": 25262, + "vacation": 28336, + "vacation": 6561, + "vacations": 29002, + "vacay": 44716, + "vacc": 13342, + "vaccin": 19164, + "vaccinated": 48134, + "vaccination": 32518, + "vaccine": 47780, + "vaccine": 17493, + "vaccines": 25860, + "vach": 46211, + "vacu": 16058, + "vacuum": 18420, + "vad": 11880, + "vada": 46759, + "vader": 21908, + "vae": 39384, + "vag": 13015, + "vague": 42154, + "vah": 26921, + "vai": 26893, + "vai": 36802, + "vail": 21189, + "vain": 25538, + "vais": 28719, + "vaj": 34206, + "vak": 16288, + "vak": 41597, + "val": 1214, + "val": 1560, + "vala": 48525, + "valdez": 40617, + "vale": 35554, + "vale": 10820, + "valedic": 43525, + "valen": 12630, + "valence": 30225, + "valenci": 34183, + "valencia": 16559, + "valent": 3655, + "valent": 15300, + "valentin": 48631, + "valentina": 43741, + "valentine": 11208, + "valentine": 5876, + "valentines": 10259, + "valentinesday": 12369, + "valentino": 29624, + "valeri": 31951, + "valerie": 25592, + "valet": 45749, + "vali": 8230, + "valiant": 33804, + "valid": 15126, + "validation": 32536, + "valkyrie": 42326, + "vall": 23523, + "vall": 35295, + "vallarta": 47874, + "valle": 24857, + "valle": 29105, + "valley": 18354, + "valley": 3136, + "valleys": 28649, + "valor": 30930, + "vals": 7431, + "valu": 6291, + "valuable": 10056, + "valuation": 25894, + "value": 41358, + "value": 4602, + "valued": 17801, + "values": 8857, + "valve": 17001, + "valves": 33517, + "vam": 9983, + "vamo": 46718, + "vamos": 30346, + "vamp": 10680, + "vampi": 47017, + "vampire": 47576, + "vampire": 13220, + "vampires": 30868, + "vamps": 44810, + "van": 2446, + "van": 2451, + "vana": 20543, + "vanc": 6320, + "vance": 31447, + "vancou": 6750, + "vancouver": 31904, + "vancouver": 7208, + "vand": 11691, + "vandalism": 45664, + "vander": 16264, + "vanderbilt": 33524, + "vandy": 39268, + "vane": 43828, + "vaness": 13328, + "vanessa": 16836, + "vangogh": 47849, + "vanguard": 27916, + "vani": 15396, + "vani": 26459, + "vania": 10998, + "vanilla": 11974, + "vanished": 43783, + "vanishing": 48296, + "vanity": 48353, + "vanity": 22938, + "vans": 11711, + "vant": 26298, + "vantage": 31749, + "vanu": 42892, + "vanuatu": 48766, + "vap": 10462, + "vape": 25423, + "vape": 20219, + "vaping": 29403, + "vapor": 37167, + "vapor": 30729, + "vapori": 46183, + "var": 3187, + "var": 12998, + "vara": 47492, + "varan": 36585, + "varanasi": 39364, + "vard": 21866, + "vard": 8773, + "vardy": 47371, + "vare": 38159, + "vares": 42895, + "vargas": 32752, + "vari": 3354, + "variable": 26416, + "varian": 34334, + "variant": 20293, + "variants": 38312, + "variation": 26420, + "variations": 29025, + "varied": 32334, + "varies": 32543, + "varieties": 23805, + "variety": 8396, + "various": 7395, + "varsity": 43716, + "varsity": 8574, + "varun": 48120, + "varun": 22069, + "vary": 18855, + "varying": 36456, + "vas": 5669, + "vas": 5995, + "vasc": 40995, + "vascular": 19218, + "vase": 20431, + "vasi": 49092, + "vast": 24413, + "vast": 16414, + "vastly": 48257, + "vat": 11588, + "vat": 18363, + "vatican": 21030, + "vation": 37884, + "vau": 6391, + "vaugh": 25158, + "vaughan": 21392, + "vaughn": 29013, + "vaul": 27469, + "vault": 15240, + "vaus": 40217, + "vaux": 27403, + "vauxhall": 29173, + "vaw": 47952, + "vay": 48000, + "vaz": 38142, + "vb": 29365, + "vb": 8778, + "vball": 38329, + "vc": 28670, + "vc": 7952, + "vcs": 43528, + "vcu": 40102, + "vd": 9515, + "vday": 42055, + "ve": 673, + "ve": 563, + "vea": 43798, + "veal": 36616, + "veau": 24419, + "vec": 19912, + "vector": 40453, + "vector": 21533, + "ved": 19515, + "ved": 1102, + "veda": 44401, + "vedere": 45660, + "vedi": 47971, + "vee": 35708, + "vee": 17073, + "veen": 22432, + "veer": 21243, + "veer": 22058, + "veg": 9048, + "veg": 16460, + "vega": 22930, + "vegan": 15705, + "vegan": 5615, + "vegans": 48514, + "vegas": 20288, + "vegas": 4413, + "vege": 6219, + "vegetable": 15725, + "vegetables": 14119, + "vegetarian": 14600, + "vegetation": 33947, + "veggie": 19401, + "veggies": 16767, + "vehic": 3973, + "vehicle": 5299, + "vehicles": 8361, + "veil": 23516, + "vein": 29169, + "veins": 28867, + "veit": 30620, + "vel": 942, + "vel": 1287, + "vela": 34898, + "veld": 34011, + "veled": 15370, + "veli": 49166, + "veling": 37970, + "vell": 21173, + "vell": 32997, + "velo": 14357, + "velo": 33850, + "velocity": 23811, + "vels": 5109, + "velve": 37849, + "velvet": 11063, + "vely": 1708, + "vember": 3477, + "vement": 3129, + "vements": 11104, + "ven": 1240, + "ven": 1638, + "vena": 47442, + "vend": 10851, + "vending": 29202, + "vendor": 21261, + "vendors": 20353, + "vene": 5365, + "veness": 10516, + "venetian": 34336, + "venezia": 34139, + "venezu": 10939, + "venezuela": 12839, + "venezuelan": 34699, + "veng": 31526, + "venge": 27757, + "vengeance": 32057, + "veni": 31142, + "venice": 11010, + "vening": 47532, + "venison": 40037, + "venom": 42491, + "venom": 21588, + "vens": 20884, + "vent": 4373, + "vent": 5687, + "ventil": 39522, + "ventilation": 35066, + "venting": 15731, + "vention": 4122, + "vents": 12833, + "ventu": 48217, + "ventura": 20921, + "venture": 37046, + "venture": 12543, + "ventures": 20829, + "venue": 5097, + "venues": 18120, + "venus": 14691, + "ver": 624, + "ver": 667, + "vera": 13350, + "verage": 3725, + "verb": 34952, + "verbal": 26522, + "verbally": 39985, + "verbs": 45687, + "verde": 16935, + "verdi": 42306, + "verdict": 18030, + "vere": 11135, + "vere": 34707, + "vered": 2868, + "verge": 23913, + "veri": 11638, + "verification": 33521, + "verified": 22555, + "verify": 34722, + "vering": 4630, + "veriz": 19707, + "verizon": 21532, + "verma": 41261, + "vermont": 19241, + "vern": 2214, + "vern": 12586, + "verne": 45553, + "vernon": 18348, + "vero": 45217, + "vero": 38208, + "verona": 31819, + "veronic": 39551, + "veronica": 24039, + "vers": 1219, + "vers": 2094, + "versa": 35765, + "versace": 25422, + "versail": 29857, + "versailles": 32129, + "versary": 2940, + "versatile": 18110, + "versatility": 41340, + "verse": 39466, + "verse": 3131, + "verses": 30769, + "versi": 8934, + "version": 3273, + "versions": 16190, + "versity": 1906, + "verst": 42484, + "verstappen": 45064, + "versus": 14548, + "versy": 18522, + "vert": 11742, + "verte": 35158, + "verted": 48173, + "verti": 30459, + "vertical": 14293, + "vertigo": 42477, + "verton": 40632, + "verts": 37265, + "very": 11698, + "very": 1070, + "veryday": 37944, + "verything": 45174, + "ves": 9616, + "ves": 1003, + "vesmatter": 47636, + "vespa": 46029, + "vessel": 16387, + "vessels": 22822, + "vest": 31657, + "vest": 12473, + "vesti": 40349, + "vests": 41906, + "vet": 12294, + "vet": 5951, + "veter": 4330, + "veteran": 20797, + "veteran": 8814, + "veterans": 7092, + "veteransday": 26409, + "veterin": 43959, + "veterinary": 25458, + "veto": 36570, + "vets": 13113, + "vette": 17045, + "vettel": 28700, + "vevo": 35141, + "vex": 36187, + "vex": 43978, + "vey": 34792, + "vey": 3884, + "vez": 35987, + "vez": 17226, + "vf": 25966, + "vfl": 33726, + "vfx": 30149, + "vg": 40591, + "vg": 22346, + "vh": 46953, + "vh": 23847, + "vhs": 21932, + "vi": 603, + "vi": 4259, + "via": 1048, + "viable": 25752, + "viadu": 37012, + "viaduct": 39113, + "vial": 39951, + "vian": 40487, + "vian": 16124, + "vibe": 37974, + "vibe": 12813, + "vibes": 7764, + "vibr": 9527, + "vibrant": 14270, + "vibration": 37456, + "vibrations": 43660, + "vic": 1555, + "vic": 4412, + "vica": 46168, + "vicar": 43899, + "vice": 43572, + "vice": 6931, + "vicente": 39411, + "vices": 8332, + "vich": 24143, + "vici": 46670, + "vicious": 25177, + "vick": 15116, + "vick": 29704, + "vickers": 48452, + "vicki": 34927, + "vicky": 37176, + "vicky": 25788, + "victi": 6861, + "victim": 9133, + "victims": 7131, + "victor": 2423, + "victor": 10690, + "victori": 17555, + "victoria": 39286, + "victoria": 6127, + "victorian": 12350, + "victorias": 47791, + "victories": 24577, + "victorious": 24033, + "victory": 36668, + "victory": 4127, + "vid": 17233, + "vid": 9284, + "vida": 19015, + "vidal": 36678, + "vide": 1334, + "vide": 45244, + "video": 9478, + "video": 1455, + "videogame": 35097, + "videogames": 21149, + "videos": 6081, + "vids": 23035, + "vidy": 29639, + "vidya": 45264, + "vie": 922, + "vie": 8538, + "vien": 36493, + "vienna": 12670, + "vier": 15352, + "vier": 11987, + "viera": 21114, + "viernes": 33826, + "vies": 22458, + "viest": 31979, + "viet": 17558, + "viet": 13128, + "vietnam": 19558, + "vietnam": 8623, + "vietnamese": 22382, + "view": 12004, + "view": 1093, + "viewed": 7226, + "viewer": 15061, + "viewers": 14275, + "viewing": 7124, + "viewpoint": 41604, + "views": 2758, + "vig": 8549, + "vig": 45083, + "vigil": 21538, + "vigil": 19896, + "vigilant": 43026, + "vigne": 40447, + "vigne": 34581, + "vigo": 44097, + "vigor": 26781, + "vii": 17759, + "viii": 20414, + "vijay": 12014, + "vijay": 10823, + "vijaysethu": 47966, + "vik": 10764, + "vik": 17181, + "vika": 39562, + "vikas": 37116, + "viking": 26663, + "viking": 15897, + "vikings": 11713, + "vikram": 41136, + "vikram": 24314, + "viktor": 36101, + "vil": 1338, + "vil": 3000, + "vila": 37505, + "vile": 27247, + "vill": 10481, + "vill": 45698, + "villa": 3203, + "villa": 7754, + "village": 34584, + "village": 4331, + "villagers": 34283, + "villages": 17621, + "villain": 15425, + "villains": 25271, + "villanova": 44025, + "villar": 35164, + "villas": 28907, + "ville": 11110, + "ville": 1930, + "villen": 46177, + "villi": 36907, + "vimeo": 48720, + "vin": 1379, + "vin": 2558, + "vina": 35682, + "vinai": 37396, + "vinaigrette": 39876, + "vinay": 43952, + "vince": 32429, + "vince": 6236, + "vincen": 33402, + "vincent": 29069, + "vincent": 10357, + "vinci": 30199, + "vind": 20275, + "vindic": 39582, + "vine": 8471, + "vine": 7721, + "vinegar": 23834, + "vines": 21268, + "vineyard": 16527, + "vineyards": 23082, + "ving": 5375, + "ving": 903, + "vingne": 42579, + "vings": 22510, + "vini": 48119, + "vinnie": 40885, + "vinny": 36794, + "vino": 14509, + "vinod": 43348, + "vins": 34820, + "vinson": 45945, + "vintag": 10936, + "vintage": 13654, + "vintage": 3266, + "viny": 40990, + "vinyl": 22835, + "vinyl": 5754, + "vio": 11913, + "vio": 20324, + "viol": 3164, + "viola": 27438, + "violate": 44875, + "violated": 38192, + "violating": 37554, + "violation": 22919, + "violations": 21969, + "violence": 5450, + "violent": 11565, + "violently": 47758, + "violet": 16118, + "violets": 42861, + "violin": 17058, + "violinist": 36299, + "vion": 35496, + "vious": 6418, + "viously": 7149, + "vip": 45714, + "vip": 7111, + "viper": 27401, + "vips": 41149, + "vir": 1790, + "vir": 25319, + "vira": 35910, + "viral": 11653, + "virat": 32473, + "virgil": 39076, + "virgin": 5651, + "virgin": 12103, + "virgini": 43426, + "virginia": 6728, + "virgo": 39978, + "viro": 32301, + "viron": 38309, + "virtu": 7977, + "virtual": 18059, + "virtual": 7790, + "virtually": 22475, + "virtualreality": 32608, + "virtue": 26860, + "virtues": 42167, + "virtuoso": 47027, + "virus": 11808, + "viruses": 34830, + "vis": 1301, + "vis": 5337, + "visa": 12802, + "visas": 41228, + "vise": 24977, + "vised": 14810, + "vish": 12024, + "vish": 29124, + "vishal": 33648, + "vishnu": 37816, + "visi": 1409, + "visibility": 15921, + "visible": 36658, + "visible": 8626, + "vising": 37439, + "vision": 11147, + "vision": 2515, + "visional": 24627, + "visionary": 22959, + "visions": 13804, + "visit": 3388, + "visit": 1600, + "visitation": 44370, + "visited": 5580, + "visiting": 4680, + "visitor": 13881, + "visitors": 9160, + "visits": 8489, + "visitscotland": 28760, + "visitspain": 48860, + "vism": 15514, + "viso": 46732, + "visor": 24217, + "vist": 21436, + "vista": 13865, + "visu": 7739, + "visual": 17004, + "visual": 7195, + "visualization": 28500, + "visualize": 45057, + "visually": 25743, + "visuals": 21315, + "viswas": 36513, + "viswasam": 47664, + "vit": 4056, + "vit": 35580, + "vita": 15700, + "vital": 32525, + "vital": 10585, + "vitality": 36385, + "vitam": 9856, + "vitamin": 13675, + "vitamins": 22582, + "vito": 36725, + "vity": 4893, + "vitz": 26188, + "vius": 41571, + "viv": 21827, + "viv": 35363, + "viva": 17399, + "vival": 35920, + "vive": 18980, + "vive": 24004, + "vivek": 36243, + "vivi": 11625, + "vivian": 30129, + "vivid": 22984, + "vivo": 28091, + "vivo": 25888, + "vix": 28976, + "vix": 34811, + "vixen": 38757, + "vixx": 32106, + "viz": 28251, + "viz": 31786, + "vj": 45439, + "vj": 30827, + "vk": 41893, + "vl": 37580, + "vl": 36442, + "vla": 23686, + "vlad": 41089, + "vladi": 19320, + "vladimir": 21702, + "vlive": 46797, + "vlog": 18894, + "vm": 16204, + "vm": 20269, + "vma": 35666, + "vmas": 30236, + "vmware": 29615, + "vn": 47098, + "vn": 25076, + "vo": 947, + "vo": 3951, + "voc": 4105, + "voc": 20855, + "vocab": 21346, + "vocabulary": 23804, + "vocal": 34037, + "vocal": 13147, + "vocali": 19134, + "vocalist": 22102, + "vocals": 17666, + "vocation": 20521, + "vocational": 33751, + "vod": 11820, + "vod": 35854, + "vodaf": 28436, + "vodafone": 38695, + "vodka": 13646, + "vogel": 44960, + "vogue": 24418, + "vogue": 13178, + "voic": 29185, + "voice": 13179, + "voice": 3386, + "voiced": 34352, + "voiceof": 44966, + "voiceover": 41979, + "voices": 9144, + "void": 21561, + "voip": 42762, + "voir": 16036, + "vol": 1343, + "vol": 7945, + "volatile": 41022, + "volatility": 32355, + "volcan": 9916, + "volcanic": 24072, + "volcano": 14581, + "volcanoes": 38055, + "voli": 40138, + "volk": 13432, + "volkswag": 14407, + "volkswagen": 15342, + "volley": 7130, + "volley": 34656, + "volleyball": 7458, + "volo": 44791, + "vols": 20404, + "volt": 26430, + "volta": 29879, + "volta": 33480, + "voltage": 23118, + "voltron": 39314, + "volu": 3563, + "volume": 8284, + "volumes": 22651, + "volun": 3356, + "voluntar": 48823, + "voluntary": 23815, + "volunte": 3556, + "volunteer": 32331, + "volunteer": 7114, + "volunteered": 34000, + "volunteering": 14902, + "volunteers": 5939, + "volution": 24043, + "volved": 42888, + "volvo": 39991, + "volvo": 16906, + "vom": 24198, + "vomit": 46485, + "von": 11269, + "von": 8497, + "voo": 19497, + "voodoo": 26869, + "voor": 34291, + "voor": 34464, + "vor": 8338, + "vor": 5308, + "vore": 18215, + "vortex": 30071, + "vos": 16863, + "vot": 48558, + "vote": 6830, + "vote": 2187, + "voted": 6454, + "votel": 41379, + "voter": 44474, + "voter": 14065, + "voters": 8925, + "votes": 6693, + "voting": 5756, + "vou": 11045, + "voucher": 18190, + "vouchers": 23384, + "vous": 10636, + "vow": 34787, + "vows": 21677, + "vox": 29215, + "vox": 22692, + "voy": 10622, + "voy": 15021, + "voyage": 16299, + "voyager": 29669, + "vp": 32758, + "vp": 3896, + "vpn": 38212, + "vr": 16840, + "vr": 5921, + "vre": 44500, + "vre": 17501, + "vs": 11385, + "vs": 1547, + "vsco": 26752, + "vsco": 32822, + "vscocam": 34694, + "vsky": 37791, + "vss": 31919, + "vt": 31732, + "vt": 10291, + "vu": 8664, + "vu": 13230, + "vue": 43915, + "vue": 19313, + "vuel": 31312, + "vuelta": 43856, + "vuitton": 26705, + "vul": 6856, + "vulcan": 34767, + "vulner": 11213, + "vulnerability": 28797, + "vulnerable": 14332, + "vulture": 34593, + "vultures": 47197, + "vv": 19264, + "vv": 35686, + "vw": 28650, + "vw": 13250, + "vx": 47644, + "vy": 11566, + "vy": 5157, + "w": 86, + "w": 342, + "wa": 869, + "wa": 2663, + "waa": 35874, + "wab": 19893, + "wab": 36852, + "wac": 27445, + "wac": 37947, + "wack": 22880, + "wack": 38270, + "wacky": 34318, + "waco": 36035, + "wad": 11133, + "wad": 30451, + "wada": 40006, + "wade": 40237, + "wade": 14180, + "wadi": 37253, + "waf": 17638, + "wafc": 49086, + "waff": 13940, + "waffle": 20375, + "waffles": 24205, + "wag": 5764, + "wag": 19177, + "wage": 10716, + "wager": 43430, + "wages": 19114, + "wagner": 18081, + "wagon": 13260, + "wagons": 47944, + "wags": 48580, + "wah": 24812, + "wah": 18014, + "wahl": 27500, + "wahlberg": 35151, + "wahoo": 47995, + "wai": 11469, + "wai": 21569, + "waifu": 46551, + "waikiki": 44907, + "wain": 28358, + "wain": 20120, + "wainwright": 45878, + "waist": 36946, + "waist": 18459, + "wait": 10021, + "wait": 1885, + "waite": 24272, + "waited": 18492, + "waiter": 32946, + "waitin": 44482, + "waiting": 2680, + "waitress": 39760, + "waitrose": 37164, + "waits": 21361, + "waiver": 42866, + "waj": 49367, + "wak": 11172, + "wak": 36015, + "waka": 42696, + "wake": 10501, + "wake": 5731, + "wakefield": 26358, + "wakes": 29108, + "wakeup": 26328, + "wakeup": 35380, + "wakeupamerica": 37474, + "waking": 13025, + "wal": 1056, + "wal": 6903, + "wala": 16468, + "walang": 49180, + "walcott": 45744, + "wald": 46930, + "wald": 15724, + "walden": 39311, + "waldo": 32440, + "waldorf": 38227, + "wale": 41247, + "wale": 20336, + "wales": 25383, + "wales": 5110, + "walgreens": 38490, + "wali": 37576, + "wali": 14768, + "walia": 44455, + "walk": 8588, + "walk": 2374, + "walkaway": 48255, + "walked": 8667, + "walker": 24735, + "walker": 6150, + "walkers": 23366, + "walkin": 45792, + "walking": 12644, + "walking": 3941, + "walkingdead": 14948, + "walkout": 47470, + "walks": 8192, + "walkway": 36614, + "wall": 4316, + "wall": 2569, + "walla": 26007, + "walla": 39982, + "wallabies": 48926, + "wallace": 12535, + "wallart": 36223, + "walled": 36567, + "waller": 45340, + "wallet": 12154, + "wallets": 38550, + "walleye": 49099, + "wallis": 42206, + "wallpaper": 10560, + "wallpapers": 29841, + "walls": 8258, + "wallstreet": 45341, + "wally": 26024, + "walmart": 11972, + "walnut": 16310, + "walnuts": 38294, + "walsall": 42935, + "walsh": 12856, + "walt": 23535, + "walt": 14312, + "waltdisneyworld": 36505, + "walter": 31156, + "walter": 10645, + "walters": 25532, + "waltham": 42742, + "waltham": 45581, + "walton": 19485, + "waltz": 35982, + "wam": 20503, + "wamy": 46970, + "wan": 2060, + "wan": 4557, + "wana": 30830, + "wand": 14636, + "wand": 28559, + "wanda": 25070, + "wander": 12985, + "wander": 24473, + "wandered": 46593, + "wanderers": 27540, + "wandering": 22597, + "wanderlust": 16129, + "wane": 27459, + "wang": 19731, + "wang": 11900, + "wani": 21674, + "wankers": 42189, + "wann": 23622, + "wanna": 35940, + "wanna": 3836, + "wannabe": 40730, + "wannaone": 44832, + "want": 18356, + "want": 1280, + "wanted": 3146, + "wanting": 12801, + "wants": 3107, + "wap": 27393, + "wap": 30368, + "waq": 47512, + "war": 984, + "war": 2238, + "wara": 21631, + "warbler": 33891, + "warcraft": 13660, + "ward": 7728, + "ward": 1460, + "warden": 27798, + "wardly": 30780, + "wardro": 14247, + "wardrobe": 15020, + "wards": 2593, + "ware": 7416, + "ware": 4476, + "wareagle": 35716, + "warehouse": 13054, + "wareness": 41601, + "wareness": 35870, + "wares": 30692, + "warfare": 15739, + "warhammer": 26832, + "warhol": 27554, + "wari": 20977, + "wark": 46346, + "wark": 15164, + "warlock": 42455, + "warm": 14725, + "warm": 3616, + "warmed": 36695, + "warmer": 14328, + "warmest": 30910, + "warming": 8606, + "warmly": 45322, + "warmongers": 33205, + "warms": 32917, + "warmth": 19636, + "warmup": 29904, + "warmups": 44094, + "warn": 19360, + "warned": 16409, + "warner": 28564, + "warner": 13402, + "warning": 4994, + "warnings": 18098, + "warns": 14086, + "waron": 38947, + "warp": 32411, + "warped": 32125, + "warran": 17392, + "warrant": 22554, + "warrants": 45677, + "warranty": 23999, + "warren": 23143, + "warren": 9234, + "warri": 4109, + "warrington": 31203, + "warrior": 18998, + "warrior": 8148, + "warriors": 6421, + "wars": 3931, + "warsaw": 21072, + "warship": 47846, + "wart": 43535, + "wart": 7346, + "wartime": 42998, + "warts": 21781, + "warwick": 23081, + "warwick": 22215, + "warwickshire": 36766, + "wary": 36213, + "was": 3398, + "was": 739, + "wasabi": 47334, + "wash": 3363, + "wash": 7810, + "washed": 14092, + "washer": 24085, + "washes": 38950, + "washing": 13029, + "washington": 16774, + "washington": 4365, + "washingtondc": 40225, + "washingtonpost": 28426, + "wasn": 5044, + "wasnt": 29607, + "wasp": 24889, + "wasps": 35300, + "wassup": 45708, + "wast": 28886, + "waste": 18157, + "waste": 6065, + "wasted": 18278, + "wasteland": 44035, + "wastewater": 34463, + "wasting": 25577, + "wat": 800, + "wat": 10621, + "wata": 42509, + "watch": 7046, + "watch": 1239, + "watchdog": 35303, + "watched": 5775, + "watcher": 35971, + "watchers": 28443, + "watches": 9521, + "watchin": 32432, + "watching": 2113, + "water": 2505, + "water": 1573, + "watercolor": 14211, + "watercolour": 18377, + "waterfall": 16403, + "waterfalls": 26692, + "waterford": 24448, + "waterfront": 16605, + "waterhouse": 45072, + "watering": 19871, + "waterloo": 17465, + "watermelon": 19889, + "waterproof": 17613, + "waters": 7753, + "watershed": 33204, + "waterstones": 45014, + "waterways": 37395, + "watford": 23162, + "watfordfc": 37328, + "wati": 27966, + "watkins": 22539, + "watson": 35490, + "watson": 9294, + "watt": 22899, + "watt": 15805, + "wattpad": 32351, + "watts": 14750, + "wau": 9479, + "wav": 6054, + "wave": 17530, + "wave": 4535, + "waved": 44657, + "waver": 25997, + "waves": 7882, + "waving": 26545, + "wavy": 31941, + "waw": 22039, + "wawrinka": 48414, + "wawx": 47387, + "wax": 18789, + "wax": 11910, + "waxing": 38781, + "way": 3079, + "way": 923, + "wayback": 47822, + "wayne": 23632, + "wayne": 7003, + "ways": 1248, + "waz": 20889, + "waz": 48835, + "wb": 10726, + "wb": 12377, + "wba": 22675, + "wbb": 14482, + "wbc": 26745, + "wbo": 49053, + "wbz": 35471, + "wc": 4842, + "wc": 5755, + "wcc": 47166, + "wcc": 34926, + "wcpo": 46624, + "wcs": 39916, + "wcvb": 32709, + "wcw": 9041, + "wd": 15998, + "wd": 7494, + "wdw": 40334, + "we": 598, + "we": 649, + "wea": 37146, + "wea": 47301, + "weak": 12128, + "weak": 10128, + "weaker": 39735, + "weakness": 21448, + "weaknesses": 43487, + "weal": 14759, + "wealth": 33150, + "wealth": 7904, + "wealthy": 22617, + "weap": 6156, + "weapon": 42612, + "weapon": 10537, + "weapons": 10007, + "wear": 12206, + "wear": 2839, + "wearab": 22983, + "wearable": 44943, + "wearable": 24973, + "wearables": 30319, + "weare": 4264, + "weare": 27867, + "weareall": 45980, + "wearec": 43620, + "wearen": 45635, + "weareone": 16149, + "weareoneexo": 16448, + "wearethe": 40242, + "wearing": 3309, + "wears": 11869, + "weary": 38766, + "weasel": 44308, + "weather": 8808, + "weather": 2237, + "weathercee": 44980, + "weatherchannel": 42138, + "weav": 22260, + "weave": 22450, + "weaver": 20297, + "weaving": 27131, + "web": 2055, + "web": 4601, + "webb": 15708, + "webber": 34248, + "webcam": 24211, + "webcam": 22589, + "webcamtoy": 27719, + "webcast": 28256, + "webcomic": 34286, + "webcomics": 39811, + "webdesign": 20470, + "webdev": 37000, + "webdevelopment": 47553, + "weber": 20179, + "webin": 8460, + "webinar": 8921, + "webinars": 47755, + "webpage": 46964, + "webs": 32829, + "webseries": 44819, + "website": 3364, + "websites": 19278, + "webster": 19471, + "websummit": 48069, + "wec": 33152, + "wechat": 46124, + "wed": 1687, + "wed": 3478, + "wedd": 7576, + "wedding": 11204, + "wedding": 3101, + "weddings": 15964, + "wedge": 21446, + "wedges": 33179, + "wedne": 2380, + "wednesday": 9311, + "wednesday": 2689, + "wednesdaymotivation": 37860, + "wednesdays": 24943, + "wednesdaywisdom": 11445, + "wedo": 43432, + "weds": 19107, + "wee": 716, + "wee": 8288, + "weed": 36935, + "weed": 8015, + "weeds": 26326, + "week": 1286, + "week": 994, + "weekday": 29244, + "weekdays": 44330, + "weekend": 17205, + "weekend": 1456, + "weekender": 36547, + "weekends": 14564, + "weekly": 34652, + "weekly": 5885, + "weeknd": 29925, + "weeks": 2898, + "weeksary": 24628, + "ween": 17517, + "ween": 1599, + "weep": 39270, + "weeping": 36629, + "weer": 32491, + "weet": 17742, + "weets": 13454, + "wef": 23313, + "weg": 47867, + "weg": 47561, + "wego": 44784, + "wego": 28220, + "weh": 48458, + "weh": 40313, + "weho": 47798, + "wei": 6958, + "wei": 20952, + "weibo": 20613, + "weigh": 10565, + "weigh": 17346, + "weighed": 33210, + "weighing": 24455, + "weighs": 20481, + "weight": 12723, + "weight": 3868, + "weighted": 43179, + "weightlifting": 36164, + "weightloss": 20359, + "weights": 21374, + "weil": 43720, + "weiler": 42203, + "wein": 29134, + "wein": 37684, + "weiner": 38822, + "weinstein": 34367, + "weir": 11299, + "weir": 25517, + "weird": 27981, + "weird": 5613, + "weirdest": 29482, + "weirdo": 32476, + "weis": 26251, + "weiser": 34833, + "weiss": 24794, + "wel": 1267, + "wel": 8042, + "welch": 25820, + "welcom": 11578, + "welcome": 18318, + "welcome": 1881, + "welcomed": 12590, + "welcomes": 9304, + "welcometo": 47511, + "welcoming": 8775, + "weld": 39776, + "welding": 24956, + "welfare": 12129, + "well": 3277, + "well": 1123, + "wellbeing": 14273, + "weller": 40921, + "welling": 49165, + "wellington": 15389, + "wellness": 40574, + "wellness": 9904, + "wells": 42705, + "wells": 9804, + "welove": 13573, + "welp": 28391, + "wels": 20852, + "welsh": 19173, + "welsh": 10977, + "welt": 38595, + "welter": 37115, + "welterweight": 39617, + "wemb": 15213, + "wembley": 16579, + "wen": 6590, + "wen": 11278, + "wend": 15166, + "wendell": 42091, + "wendy": 31616, + "wendy": 14074, + "wenger": 21105, + "went": 18633, + "went": 2437, + "wentworth": 36423, + "wentz": 39179, + "wer": 6316, + "wer": 2980, + "were": 15461, + "were": 1365, + "wered": 6605, + "weren": 13611, + "werewolf": 32001, + "werk": 30176, + "werner": 29917, + "wers": 7110, + "wes": 18620, + "wes": 14738, + "wesle": 29606, + "wesley": 17332, + "wesleyan": 32509, + "wesome": 33292, + "wess": 44431, + "west": 2973, + "west": 1593, + "westbound": 29208, + "westbrook": 26948, + "westchester": 36675, + "westcoast": 44610, + "westend": 44815, + "wester": 9846, + "western": 17079, + "western": 4463, + "westfield": 32309, + "westh": 36798, + "westin": 43232, + "westlake": 41535, + "westminster": 15158, + "weston": 22771, + "westside": 33762, + "westwood": 26371, + "westworld": 42287, + "wet": 12406, + "wet": 6682, + "weta": 40946, + "wethenorth": 45281, + "wethepeople": 48030, + "wether": 33794, + "wether": 48405, + "wetland": 37357, + "wetlands": 26547, + "wett": 41971, + "wetter": 43957, + "wewant": 39280, + "wewill": 37241, + "wex": 17234, + "wexford": 29876, + "wexmondays": 49042, + "wey": 30376, + "wey": 19781, + "weymouth": 41433, + "wf": 14576, + "wf": 22313, + "wfa": 44606, + "wfc": 36431, + "wfp": 35193, + "wftv": 47075, + "wg": 21091, + "wg": 25857, + "wga": 32354, + "wgn": 48828, + "wh": 573, + "wh": 13844, + "wha": 18994, + "wha": 25884, + "whal": 38967, + "whale": 37083, + "whale": 11650, + "whales": 17722, + "wham": 42506, + "whar": 15517, + "wharf": 22452, + "wharton": 43320, + "what": 4268, + "what": 768, + "whatcha": 37160, + "whate": 6695, + "whatever": 6743, + "whati": 23500, + "whats": 9263, + "whats": 13084, + "whatsapp": 10119, + "whatsoever": 39928, + "whatson": 35632, + "whatyou": 30508, + "whe": 2009, + "whead": 34583, + "wheat": 20505, + "wheat": 10303, + "wheaton": 46933, + "wheel": 7360, + "wheel": 6744, + "wheelchair": 17713, + "wheeler": 18405, + "wheeling": 34839, + "wheels": 8025, + "whel": 9792, + "whelan": 40715, + "when": 8753, + "when": 827, + "whenever": 10500, + "where": 7052, + "where": 1234, + "whereabouts": 47808, + "whereas": 42234, + "wheres": 46345, + "wherever": 14103, + "whereyou": 46837, + "whether": 5903, + "whew": 39016, + "whey": 34556, + "whi": 4295, + "whi": 33129, + "which": 1448, + "whiche": 48719, + "whichever": 49138, + "whil": 8499, + "while": 1519, + "whilst": 8596, + "whim": 27766, + "whimsical": 42282, + "whip": 14412, + "whipped": 22323, + "whipping": 41567, + "whir": 20873, + "whirl": 30962, + "whirlwind": 47771, + "whis": 6024, + "whiskey": 41381, + "whiskey": 11610, + "whisky": 37567, + "whisky": 12599, + "whisp": 21986, + "whispe": 30356, + "whisper": 27616, + "whisperer": 41368, + "whispering": 42599, + "whispers": 29133, + "whist": 13640, + "whistle": 23972, + "whistle": 19746, + "whistleblower": 40410, + "whistler": 29633, + "whit": 4398, + "whit": 31498, + "whitaker": 35851, + "whitby": 30858, + "white": 4699, + "white": 1579, + "whiteboard": 40839, + "whitec": 24575, + "whitehall": 42827, + "whitehead": 43560, + "whitehouse": 20776, + "whitening": 35540, + "whitepaper": 42713, + "whites": 35886, + "whites": 18835, + "whitesox": 28816, + "whitewater": 49350, + "whitfield": 48404, + "whitley": 40564, + "whitman": 32394, + "whitney": 43021, + "whitney": 18048, + "whitt": 33784, + "whittaker": 47595, + "whl": 25801, + "who": 2969, + "who": 822, + "whoa": 16943, + "whoever": 11137, + "whois": 41884, + "whole": 10360, + "whole": 2954, + "wholefoods": 42840, + "wholesale": 18306, + "wholesome": 35959, + "whom": 38158, + "whom": 12873, + "whoo": 20003, + "whoo": 49290, + "whoop": 22060, + "whoops": 28433, + "whopping": 34384, + "whore": 31690, + "whos": 41460, + "whos": 27130, + "whose": 6933, + "whouse": 45927, + "whs": 26292, + "wht": 32470, + "whufc": 31695, + "whun": 18272, + "why": 11040, + "why": 1182, + "whyte": 42386, + "wi": 820, + "wi": 5585, + "wib": 45303, + "wic": 7834, + "wich": 9759, + "wich": 5238, + "wichita": 22566, + "wick": 6798, + "wick": 6479, + "wicked": 32579, + "wicked": 12825, + "wicker": 38096, + "wicket": 19180, + "wickets": 22110, + "wicklow": 39039, + "wicz": 30121, + "wid": 11886, + "wid": 20886, + "wide": 19341, + "wide": 3184, + "widely": 16195, + "widening": 46598, + "wider": 21263, + "widesp": 20598, + "widespread": 21258, + "widget": 43906, + "wido": 28068, + "widow": 19949, + "widows": 42129, + "width": 23571, + "wie": 21378, + "wie": 9131, + "wielding": 47272, + "wien": 38131, + "wiener": 40567, + "wies": 42788, + "wif": 37572, + "wife": 3607, + "wifey": 35282, + "wifi": 11026, + "wig": 23690, + "wig": 12216, + "wigan": 23130, + "wiggins": 32329, + "wiggle": 47812, + "wight": 41278, + "wight": 15545, + "wigs": 31207, + "wii": 8005, + "wiiu": 40980, + "wiki": 10373, + "wiki": 24265, + "wikileaks": 28731, + "wikipedia": 15176, + "wil": 1352, + "wil": 20581, + "wilbur": 43069, + "wilcox": 43231, + "wild": 2780, + "wild": 3220, + "wildatlantic": 35500, + "wildatlanticway": 35776, + "wildcard": 37360, + "wildcat": 49077, + "wildcat": 25870, + "wildcats": 15909, + "wilde": 23498, + "wilder": 14343, + "wilder": 23499, + "wilderness": 16506, + "wildest": 43028, + "wildfire": 22788, + "wildfires": 29184, + "wildflower": 27628, + "wildflower": 33181, + "wildflowerhour": 31302, + "wildflowers": 29136, + "wildlife": 13298, + "wildlife": 5250, + "wildlifephotography": 32307, + "wildlifewednesday": 48537, + "wildly": 35981, + "wildoz": 40113, + "wiley": 32747, + "wilhelm": 39696, + "wilkes": 39548, + "wilkins": 36986, + "wilkinson": 26797, + "will": 5062, + "will": 751, + "willam": 43276, + "willard": 44920, + "wille": 48739, + "willem": 38044, + "willi": 2256, + "william": 8420, + "william": 4705, + "williams": 38452, + "williams": 4075, + "williamsburg": 30683, + "williamson": 20793, + "willie": 13907, + "willing": 34160, + "willing": 11718, + "willingness": 40573, + "willis": 18491, + "willow": 33887, + "willow": 15665, + "wills": 26913, + "willy": 34502, + "willy": 19599, + "wilmington": 28052, + "wilms": 47879, + "wilshere": 48359, + "wilson": 23629, + "wilson": 5622, + "wilt": 23394, + "wilt": 47357, + "wilton": 46638, + "wiltshire": 28025, + "wim": 8662, + "wim": 27580, + "wimble": 11752, + "wimbledon": 12229, + "win": 831, + "win": 1225, + "winchester": 20647, + "wind": 6812, + "wind": 3630, + "winder": 44454, + "winder": 46245, + "winding": 22390, + "windmill": 34084, + "windo": 3110, + "window": 26675, + "window": 4879, + "windows": 5437, + "winds": 12668, + "winds": 7012, + "windshield": 33002, + "windsor": 44322, + "windsor": 12884, + "windy": 13446, + "wine": 7375, + "wine": 2604, + "winelover": 26357, + "winemaker": 41588, + "wineoclock": 43846, + "wineries": 49349, + "winery": 15500, + "wines": 8263, + "winetasting": 41288, + "winewednesday": 35447, + "wing": 8141, + "wing": 1340, + "winged": 24993, + "winger": 22727, + "winget": 44578, + "wings": 5178, + "wink": 34455, + "wink": 25859, + "winkle": 36430, + "winn": 38104, + "winne": 46273, + "winner": 32961, + "winner": 2520, + "winners": 4320, + "winni": 13018, + "winnie": 29022, + "winning": 42099, + "winning": 2577, + "winnings": 46490, + "winnipeg": 14369, + "winona": 49202, + "wins": 46839, + "wins": 2718, + "winslow": 39658, + "winston": 14848, + "winter": 7340, + "winter": 2541, + "winters": 21587, + "wintry": 39504, + "wip": 10447, + "wipe": 26761, + "wiped": 31822, + "wipes": 33463, + "wir": 16849, + "wir": 44838, + "wire": 7558, + "wire": 7794, + "wired": 18935, + "wireless": 9103, + "wires": 24311, + "wiring": 36434, + "wirral": 34675, + "wis": 3392, + "wis": 20405, + "wiscon": 9857, + "wisconsin": 10265, + "wisdom": 42474, + "wisdom": 5425, + "wise": 19116, + "wise": 5558, + "wisely": 26173, + "wiser": 44859, + "wish": 11328, + "wish": 2412, + "wished": 25883, + "wishes": 6045, + "wishing": 5307, + "wishlist": 31969, + "wit": 584, + "wit": 8531, + "witch": 20139, + "witch": 10083, + "witchcraft": 35065, + "witcher": 33684, + "witches": 21673, + "with": 1435, + "with": 593, + "withdra": 24696, + "withdraw": 31670, + "withdrawal": 25765, + "withdrawn": 46687, + "withdraws": 48637, + "wither": 39655, + "witherspoon": 45409, + "within": 4154, + "withme": 44670, + "without": 32836, + "without": 2193, + "withstand": 42236, + "withthe": 36872, + "withus": 30572, + "withyou": 30351, + "witne": 12096, + "witness": 8793, + "witnessed": 20187, + "witnesses": 22778, + "witnessing": 33618, + "wits": 30938, + "witt": 38194, + "witt": 17168, + "witter": 31597, + "witty": 29970, + "witz": 44186, + "witz": 13265, + "wiv": 48925, + "wives": 14378, + "wiwx": 44461, + "wiz": 7730, + "wiz": 23178, + "wizar": 49121, + "wizard": 30490, + "wizard": 14295, + "wizards": 19140, + "wizkid": 40146, + "wj": 19739, + "wj": 35453, + "wk": 11512, + "wk": 11528, + "wkend": 42336, + "wknd": 20851, + "wks": 25508, + "wku": 43377, + "wl": 13299, + "wl": 9613, + "wm": 20268, + "wm": 15790, + "wn": 1186, + "wn": 757, + "wnba": 32358, + "wned": 8628, + "wns": 12950, + "wnt": 22484, + "wny": 24833, + "wo": 1613, + "wo": 11132, + "woah": 17751, + "wob": 35984, + "woc": 39011, + "wod": 41522, + "woes": 27860, + "wof": 45671, + "woj": 48931, + "wok": 28912, + "woke": 9331, + "woken": 43697, + "woking": 43931, + "wol": 2798, + "wol": 48622, + "wold": 42399, + "wolf": 9453, + "wolf": 5916, + "wolfe": 24989, + "wolff": 34369, + "wolfgang": 34061, + "wolfpack": 30887, + "wolve": 45101, + "wolver": 14334, + "wolverhampton": 34518, + "wolverine": 23353, + "wolverines": 42003, + "wolves": 9372, + "wom": 1087, + "womack": 48980, + "woman": 15716, + "woman": 2308, + "womanc": 35630, + "womancrush": 37721, + "womancrushwednesday": 39714, + "womanin": 30562, + "womaninbiz": 36482, + "womb": 37023, + "women": 3648, + "women": 1507, + "womenin": 13062, + "womeninscience": 41343, + "womeninstem": 29380, + "womenintech": 31470, + "womenof": 48421, + "womens": 12822, + "womens": 14408, + "womensart": 38548, + "womensday": 13956, + "womenshi": 22887, + "womenshistorymonth": 24982, + "womensmarch": 30102, + "won": 1528, + "won": 1749, + "wonder": 2070, + "wonder": 3936, + "wondercon": 46944, + "wondered": 15550, + "wonderful": 2582, + "wonderfully": 23245, + "wondering": 8360, + "wonderland": 13874, + "wonders": 14048, + "wonderwoman": 31000, + "wondo": 38402, + "wondr": 46771, + "wong": 17876, + "wonka": 43463, + "wont": 43174, + "wont": 15952, + "woo": 1867, + "woo": 9322, + "wood": 3269, + "wood": 1704, + "woodbridge": 49074, + "wooden": 48226, + "wooden": 9057, + "woodland": 44314, + "woodland": 17447, + "woodlands": 32430, + "woodley": 40566, + "woodpecker": 32684, + "woods": 6267, + "woodson": 48967, + "woodstock": 29486, + "woodward": 27419, + "woodwork": 47386, + "woodworking": 29267, + "woody": 38627, + "woody": 17144, + "woof": 34234, + "woof": 24028, + "woohoo": 20172, + "wook": 29192, + "wool": 9967, + "wool": 13283, + "woolf": 43728, + "woolly": 47722, + "woon": 33126, + "wooo": 43217, + "woop": 31884, + "woot": 22466, + "wor": 641, + "worcester": 22172, + "worcester": 19580, + "worcestershire": 38440, + "worcestershirehour": 43644, + "word": 8272, + "word": 2653, + "wordof": 33500, + "wordoftheday": 43594, + "wordpress": 15193, + "words": 31007, + "words": 2709, + "wore": 8953, + "work": 1636, + "work": 951, + "workday": 29735, + "worked": 5410, + "worker": 8098, + "workers": 4795, + "workflow": 28502, + "workforce": 14672, + "workin": 31825, + "workin": 26323, + "working": 20806, + "working": 1699, + "workinprogress": 46086, + "workout": 6773, + "workouts": 22779, + "workplace": 11959, + "workplaces": 47383, + "works": 2322, + "workshop": 3832, + "workshops": 12262, + "workspace": 34470, + "worl": 5221, + "world": 2334, + "world": 1002, + "worlda": 46627, + "worldbank": 36759, + "worldbookday": 31191, + "worldcup": 42525, + "worldcup": 8650, + "worlden": 44668, + "worldenviron": 47115, + "worldenvironmentday": 47522, + "worldly": 36268, + "worldo": 41698, + "worldof": 22636, + "worldre": 33951, + "worlds": 7691, + "worldseries": 26695, + "worldtour": 23202, + "worldwater": 41176, + "worldwaterday": 44520, + "worldwide": 6214, + "worm": 33709, + "worm": 10945, + "worms": 20231, + "worn": 9037, + "worried": 11911, + "worries": 17684, + "worry": 7534, + "worrying": 24058, + "worse": 8236, + "worsen": 46344, + "worshi": 31840, + "worship": 46399, + "worship": 9023, + "worst": 5719, + "wort": 30209, + "worth": 10671, + "worth": 2450, + "worthing": 39929, + "worthit": 40830, + "worthless": 44736, + "worths": 44633, + "worthwhile": 36295, + "worthy": 8881, + "worx": 44973, + "wot": 24863, + "wou": 5279, + "would": 39873, + "would": 1311, + "wouldn": 5878, + "wouldnt": 41595, + "wound": 19231, + "wounded": 14859, + "wounds": 21290, + "woven": 19830, + "wow": 22191, + "wow": 2781, + "woz": 44558, + "wozni": 47782, + "wp": 15378, + "wp": 13302, + "wpg": 35048, + "wps": 33386, + "wq": 45195, + "wr": 1189, + "wr": 8028, + "wra": 3852, + "wra": 46004, + "wral": 49050, + "wrangler": 30923, + "wrap": 7094, + "wrapped": 9875, + "wrapping": 15223, + "wraps": 18236, + "wrath": 29783, + "wray": 48943, + "wrc": 16004, + "wre": 3168, + "wreath": 23091, + "wrec": 20879, + "wreck": 28775, + "wreck": 15017, + "wrecked": 32695, + "wreckem": 45676, + "wrecking": 36956, + "wrecks": 45545, + "wren": 20191, + "wren": 31970, + "wrench": 30980, + "wrest": 4177, + "wrestle": 17097, + "wrestle": 28086, + "wrestlemania": 18849, + "wrestler": 19790, + "wrestlers": 25902, + "wrestling": 31292, + "wrestling": 5904, + "wrexham": 34479, + "wri": 7667, + "wri": 42007, + "wright": 28616, + "wright": 6991, + "wrights": 43711, + "wrigley": 33538, + "wrink": 22201, + "wrinkle": 46642, + "wrinkles": 35525, + "wrist": 19243, + "wrist": 16139, + "wristband": 36890, + "wristbands": 44864, + "writ": 2902, + "write": 28874, + "write": 4946, + "writer": 27886, + "writer": 4422, + "writers": 18742, + "writers": 7307, + "writerslife": 25007, + "writes": 8023, + "writing": 16053, + "writing": 2979, + "writingcommunity": 39178, + "writings": 36259, + "written": 5231, + "wro": 5447, + "wrong": 18381, + "wrong": 3669, + "wrongly": 45642, + "wrote": 5796, + "wrought": 48125, + "wrs": 45280, + "ws": 6300, + "ws": 799, + "wsb": 30681, + "wsbtv": 38394, + "wsj": 19764, + "wski": 12548, + "wsl": 43706, + "wsoc": 40253, + "wson": 33954, + "wsop": 41231, + "wsu": 44674, + "wsu": 32913, + "wsw": 43285, + "wt": 15873, + "wt": 12255, + "wta": 25984, + "wtc": 39718, + "wtf": 6891, + "wth": 23021, + "wthr": 45269, + "wti": 47345, + "wto": 36406, + "wts": 32159, + "wu": 9710, + "wu": 9837, + "wud": 43870, + "wul": 35154, + "wunder": 36661, + "wur": 24040, + "wurst": 44409, + "wusa": 40021, + "wut": 28590, + "wv": 18920, + "wv": 14743, + "wvu": 44878, + "wvu": 25879, + "ww": 3181, + "ww": 4491, + "wwc": 26505, + "wwdc": 47441, + "wwe": 12112, + "wwe": 5290, + "wwen": 23308, + "wwenetwork": 37228, + "wwenxt": 39898, + "wwer": 32038, + "wwf": 23332, + "wwfc": 42681, + "wwg": 35322, + "wwi": 20194, + "wwii": 10261, + "www": 26074, + "www": 9667, + "wwwbigbaldhead": 30761, + "wwww": 34224, + "wwww": 25200, + "wwwww": 48268, + "wwx": 47431, + "wx": 18192, + "wx": 3561, + "wy": 4665, + "wy": 7625, + "wyatt": 21660, + "wyd": 33113, + "wye": 48436, + "wye": 43751, + "wylie": 49330, + "wyn": 11802, + "wyn": 17504, + "wynn": 36117, + "wynne": 35951, + "wynonna": 41456, + "wynonnaearp": 43755, + "wyoming": 18693, + "x": 87, + "x": 343, + "xa": 24831, + "xan": 45530, + "xander": 45601, + "xavi": 36342, + "xavier": 41044, + "xavier": 18567, + "xb": 33678, + "xbox": 18063, + "xbox": 7748, + "xboxone": 27410, + "xc": 12515, + "xchange": 49132, + "xd": 6380, + "xe": 42886, + "xe": 19183, + "xen": 15568, + "xer": 49005, + "xf": 35274, + "xfactor": 25211, + "xfinity": 35107, + "xford": 34732, + "xh": 45771, + "xham": 25284, + "xi": 2467, + "xi": 7376, + "xia": 19854, + "xia": 20724, + "xian": 42570, + "xiao": 49318, + "xiaomi": 27477, + "xico": 38469, + "xide": 17398, + "xie": 40122, + "xie": 15976, + "xii": 36525, + "xiii": 28199, + "xim": 11217, + "xin": 27053, + "xin": 41517, + "xing": 14383, + "xion": 24164, + "xis": 35793, + "xit": 5316, + "xiumin": 36563, + "xiv": 16125, + "xj": 42453, + "xl": 36529, + "xl": 8833, + "xley": 38223, + "xm": 18626, + "xma": 48805, + "xmas": 48848, + "xmas": 6425, + "xmen": 28708, + "xn": 25388, + "xo": 26936, + "xo": 9000, + "xon": 29186, + "xon": 8482, + "xox": 11531, + "xox": 34050, + "xoxo": 13313, + "xp": 15651, + "xper": 32200, + "xperia": 37615, + "xpo": 44377, + "xpress": 31809, + "xq": 40606, + "xr": 26276, + "xrp": 26965, + "xs": 16397, + "xt": 1052, + "xtina": 45520, + "xton": 32666, + "xton": 10597, + "xtra": 26969, + "xtre": 27025, + "xtreme": 33483, + "xu": 42063, + "xu": 37198, + "xv": 17768, + "xvi": 44031, + "xx": 5675, + "xx": 3553, + "xxl": 29777, + "xxx": 33923, + "xxx": 8352, + "xxxx": 32035, + "xxxx": 22819, + "xxxxx": 44195, + "xy": 20023, + "xy": 11443, + "y": 88, + "y": 344, + "ya": 5018, + "ya": 1430, + "yaa": 48847, + "yaa": 34498, + "yaan": 34680, + "yab": 27737, + "yach": 9039, + "yacht": 43806, + "yacht": 12859, + "yachts": 29260, + "yad": 13276, + "yad": 40047, + "yadav": 26650, + "yaf": 38019, + "yag": 35081, + "yah": 16170, + "yah": 12381, + "yaho": 37929, + "yahoo": 38152, + "yahoo": 16846, + "yak": 11014, + "yak": 29074, + "yaki": 44677, + "yaku": 29572, + "yakuza": 42628, + "yal": 16198, + "yal": 13418, + "yale": 39926, + "yale": 17157, + "yall": 9210, + "yam": 6666, + "yam": 19318, + "yama": 23512, + "yamaha": 18854, + "yan": 3949, + "yan": 4788, + "yana": 18698, + "yand": 38609, + "yang": 23818, + "yang": 12605, + "yani": 26439, + "yankee": 21554, + "yankees": 11889, + "yann": 40246, + "yann": 38657, + "yao": 45231, + "yap": 48700, + "yap": 34468, + "yar": 6786, + "yar": 23071, + "yard": 20234, + "yard": 4313, + "yards": 7550, + "yarmouth": 45941, + "yarn": 19702, + "yarra": 46824, + "yas": 8168, + "yas": 20570, + "yash": 30216, + "yash": 37836, + "yasi": 37700, + "yasss": 23873, + "yat": 29443, + "yat": 34965, + "yates": 27677, + "yatra": 38932, + "yav": 41275, + "yaw": 31989, + "yawn": 48643, + "yay": 20614, + "yay": 6712, + "yaya": 37608, + "yaz": 19348, + "yaz": 42252, + "yb": 41785, + "yb": 27615, + "yc": 11931, + "ycle": 38089, + "yd": 29896, + "yd": 9534, + "yday": 15899, + "yds": 24819, + "ye": 693, + "ye": 4582, + "yea": 13687, + "yeah": 29405, + "yeah": 3908, + "year": 5163, + "year": 935, + "yearbook": 21636, + "yearling": 48392, + "yearly": 24541, + "yearof": 31944, + "yearofthe": 47899, + "years": 30864, + "years": 1151, + "yearsof": 14932, + "yearswith": 45249, + "yeast": 25819, + "yeats": 44903, + "yed": 28137, + "yed": 3301, + "yee": 18114, + "yee": 23108, + "yeezy": 24901, + "yeg": 16854, + "yeg": 11976, + "yegfood": 48711, + "yeh": 21331, + "yel": 3323, + "yel": 48164, + "yell": 30824, + "yelled": 39199, + "yelling": 26581, + "yellow": 12059, + "yellow": 4481, + "yellowstone": 29241, + "yelp": 31674, + "yemen": 29276, + "yemen": 12513, + "yemeni": 44656, + "yemi": 42267, + "yen": 29602, + "yen": 17960, + "yeo": 32292, + "yeo": 43830, + "yeol": 15808, + "yeon": 16602, + "yep": 10964, + "yer": 15491, + "yer": 2371, + "yers": 3722, + "yes": 21620, + "yes": 1958, + "yess": 42778, + "yess": 40189, + "yesss": 36210, + "yessss": 45620, + "yester": 1905, + "yesterday": 1926, + "yesterdays": 36238, + "yesung": 38527, + "yet": 2296, + "yeti": 34228, + "yev": 39855, + "yew": 34660, + "yey": 45447, + "yg": 16396, + "ygk": 44758, + "ygo": 46166, + "yh": 41978, + "yi": 5826, + "yi": 14762, + "yield": 16825, + "yields": 24856, + "yikes": 25094, + "yin": 26476, + "yin": 23543, + "ying": 42933, + "ying": 910, + "yixing": 32120, + "yk": 30965, + "yl": 2656, + "yl": 4045, + "ylan": 41875, + "ylde": 42850, + "yle": 32305, + "yle": 10770, + "ylene": 34239, + "yler": 48081, + "yles": 42860, + "ylon": 22375, + "ylor": 48468, + "ym": 1786, + "ym": 19587, + "yman": 29077, + "ymc": 47101, + "ymca": 22369, + "yment": 8199, + "ymes": 39968, + "ymi": 5271, + "ymm": 37133, + "ymoun": 41426, + "ymouth": 36429, + "yn": 2823, + "yn": 4100, + "yne": 18238, + "ynes": 18020, + "ynn": 10499, + "ynna": 48292, + "ynwa": 27372, + "yo": 586, + "yo": 3497, + "yoda": 31922, + "yof": 5966, + "yofficial": 21818, + "yofthe": 43983, + "yog": 34985, + "yog": 36539, + "yoga": 25872, + "yoga": 5523, + "yogh": 32626, + "yoghurt": 33491, + "yogi": 22766, + "yogur": 16137, + "yogurt": 16819, + "yoh": 48880, + "yoke": 41969, + "yoko": 25929, + "yoko": 32256, + "yokohama": 42409, + "yol": 19387, + "yol": 35218, + "yolanda": 43845, + "yolo": 20905, + "yom": 34718, + "yom": 44527, + "yon": 10147, + "yon": 7604, + "yong": 27960, + "yong": 20887, + "yonge": 48592, + "yoo": 25842, + "yoo": 20775, + "yoon": 30863, + "yoon": 22113, + "yoona": 32736, + "yoongi": 24037, + "yor": 2028, + "yor": 21132, + "york": 5318, + "york": 2705, + "yorker": 23865, + "yorkers": 41041, + "yorks": 39093, + "yorkshi": 43367, + "yorkshire": 27007, + "yorkshire": 8633, + "yoruba": 46083, + "yos": 35607, + "yosemite": 25893, + "yoshi": 22920, + "yoshi": 25354, + "yot": 22875, + "yotes": 46157, + "yotpo": 26113, + "you": 1562, + "you": 592, + "youare": 33879, + "youcan": 32498, + "youknow": 47919, + "youknow": 41088, + "youn": 1596, + "young": 6939, + "young": 1888, + "younger": 10414, + "youngest": 12316, + "youngjae": 46426, + "youngster": 35881, + "youngsters": 28098, + "younow": 33831, + "your": 2130, + "your": 695, + "youre": 28344, + "youre": 19695, + "yourown": 28583, + "yours": 3834, + "yourself": 3053, + "yourselves": 19747, + "youth": 10743, + "youth": 3281, + "youthful": 37480, + "youths": 23614, + "youts": 22737, + "youtu": 13868, + "youtube": 31258, + "youtube": 3895, + "youtuber": 24720, + "youtubers": 36822, + "youu": 35055, + "youuu": 35324, + "youuuu": 47123, + "yoy": 41865, + "yp": 38370, + "yp": 34734, + "ypg": 37386, + "yql": 46122, + "yqr": 36881, + "yr": 18395, + "yr": 4333, + "yrs": 4822, + "ys": 1971, + "ys": 961, + "yser": 33121, + "ysis": 4843, + "ysl": 45681, + "ysm": 23842, + "yst": 40528, + "yt": 36777, + "yt": 14779, + "ytd": 47524, + "yte": 48172, + "yu": 3371, + "yu": 8887, + "yuan": 26236, + "yuck": 48282, + "yugo": 48231, + "yuh": 42547, + "yui": 47932, + "yuk": 17037, + "yuk": 24063, + "yuki": 34010, + "yukon": 27094, + "yul": 39832, + "yum": 6869, + "yum": 7259, + "yuma": 47566, + "yummy": 7687, + "yun": 14976, + "yun": 18288, + "yung": 44545, + "yung": 17676, + "yunho": 39748, + "yup": 13231, + "yur": 42533, + "yuri": 23823, + "yusuf": 33222, + "yuv": 36784, + "yves": 33698, + "yvon": 23327, + "yvonne": 32583, + "yvr": 29058, + "yw": 33741, + "yx": 35624, + "yxe": 34240, + "yy": 3433, + "yy": 8321, + "yya": 37444, + "yyc": 27542, + "yyc": 11741, + "yyj": 26203, + "yyy": 11514, + "yyyy": 38749, + "yyyy": 16955, + "yyyyy": 26089, + "yyyyyy": 47055, + "yz": 37579, + "yz": 46451, + "yü": 48232, + "z": 89, + "z": 345, + "za": 3710, + "za": 2186, + "zab": 22982, + "zable": 37002, + "zac": 25501, + "zac": 19159, + "zach": 13401, + "zach": 11815, + "zachary": 32401, + "zack": 30567, + "zack": 19120, + "zad": 47314, + "zad": 27838, + "zada": 34889, + "zaf": 21837, + "zafar": 46668, + "zag": 26091, + "zag": 29346, + "zagre": 34107, + "zagreb": 35355, + "zah": 23258, + "zah": 43297, + "zaha": 44408, + "zai": 44329, + "zai": 27065, + "zain": 34400, + "zain": 45366, + "zak": 13050, + "zak": 20738, + "zaki": 48091, + "zal": 20552, + "zal": 33298, + "zam": 7218, + "zam": 41578, + "zambia": 21671, + "zan": 7284, + "zan": 17835, + "zana": 39643, + "zand": 37712, + "zane": 34786, + "zani": 45373, + "zania": 15059, + "zano": 27637, + "zanzi": 47835, + "zap": 24134, + "zapp": 33504, + "zappa": 46592, + "zar": 5458, + "zar": 16392, + "zara": 24454, + "zardari": 20174, + "zas": 48261, + "zation": 3683, + "zawa": 49281, + "zay": 7102, + "zayed": 36726, + "zayn": 22292, + "zayn": 10308, + "zaynmalik": 25278, + "zazzle": 47857, + "ze": 2254, + "ze": 1298, + "zeal": 44951, + "zealand": 7618, + "zeb": 46518, + "zebra": 47394, + "zebra": 22548, + "zed": 21047, + "zed": 1993, + "zedd": 45608, + "zee": 25468, + "zee": 14080, + "zeiss": 47460, + "zeit": 37898, + "zeit": 37906, + "zek": 40829, + "zeke": 47065, + "zel": 10389, + "zel": 12027, + "zelda": 17138, + "zell": 39526, + "zen": 8518, + "zen": 3928, + "zend": 33478, + "zendaya": 35956, + "zenith": 44740, + "zens": 15298, + "zeph": 40726, + "zepp": 22977, + "zeppelin": 25408, + "zer": 6118, + "zer": 3716, + "zero": 14867, + "zero": 5848, + "zers": 9547, + "zes": 4073, + "zest": 37709, + "zet": 34098, + "zeta": 30954, + "zetta": 45993, + "zeus": 32800, + "zey": 46647, + "zh": 33389, + "zh": 41621, + "zhang": 21127, + "zhen": 37374, + "zhen": 33236, + "zhou": 17384, + "zhu": 42049, + "zi": 2651, + "zi": 5819, + "zia": 13764, + "zid": 30235, + "zidane": 34643, + "zie": 29316, + "zie": 8956, + "zieg": 40157, + "ziegler": 46812, + "ziel": 32151, + "zier": 15399, + "zies": 38001, + "ziest": 28159, + "zig": 15950, + "zig": 21345, + "ziggy": 39274, + "zik": 30125, + "zika": 28783, + "zil": 25039, + "zil": 33190, + "zilla": 17879, + "zim": 8112, + "zim": 22577, + "zimbab": 12373, + "zimbabwe": 45668, + "zimbabwe": 13583, + "zimmer": 27452, + "zimmer": 35211, + "zimmerman": 38231, + "zin": 14085, + "zin": 21278, + "zinc": 27458, + "zind": 26206, + "zindabad": 42208, + "zine": 16100, + "zing": 25062, + "zing": 3152, + "zinger": 42027, + "zio": 13906, + "zion": 31763, + "zion": 20963, + "zione": 36161, + "zionist": 33078, + "zip": 26479, + "zip": 16083, + "zipper": 33670, + "zir": 31892, + "zl": 39168, + "zlat": 32489, + "zlatan": 37877, + "zm": 43691, + "zman": 24248, + "zn": 18004, + "zo": 4397, + "zo": 5056, + "zodi": 22660, + "zodiac": 27753, + "zoe": 43114, + "zoe": 16662, + "zoey": 39871, + "zog": 40680, + "zol": 25939, + "zola": 46105, + "zom": 6623, + "zombi": 29452, + "zombie": 11819, + "zombies": 46702, + "zombies": 16517, + "zon": 15109, + "zon": 14618, + "zona": 42134, + "zone": 37197, + "zone": 4442, + "zones": 17247, + "zoning": 36790, + "zoo": 8182, + "zoo": 7147, + "zoom": 32671, + "zoom": 13909, + "zor": 17605, + "zou": 38072, + "zr": 39275, + "zs": 35248, + "zshq": 41442, + "zt": 42629, + "zu": 4091, + "zu": 14184, + "zucchini": 29873, + "zucker": 26890, + "zuckerberg": 30066, + "zul": 31146, + "zulu": 32821, + "zum": 35094, + "zuma": 23326, + "zumba": 32976, + "zun": 42440, + "zur": 17128, + "zurich": 21288, + "zw": 42188, + "zx": 31604, + "zy": 6615, + "zy": 2303, + "zyk": 39112, + "zyme": 36472, + "zyn": 45287, + "zz": 1544, + "zz": 4943, + "zza": 14642, + "zzi": 13974, + "zzie": 18635, + "zzle": 7873, + "zzled": 39075, + "zzo": 14036, + "zzy": 21275, + "zzy": 8353, + "zzz": 20055, + "zzzz": 35742, + "zzzz": 43103, + "{": 90, + "{": 346, + "{}": 39025, + "|": 91, + "|#": 31183, + "|": 347, + "|@": 41677, + "||": 7566, + "}": 92, + "}": 348, + "~": 93, + "~!": 31181, + "~\"": 48442, + "~": 349, + "~>": 43291, + "~@": 44247, + "~~": 11461, + "~~": 16671, + "~~~": 32472, + "~~~~": 28295, + "¡": 94, + "¡": 350, + "¡ï¸ı": 15113, + "¡ï¸ı": 4174, + "¡ľ": 43991, + "¢": 95, + "¢": 351, + "£": 96, + "£": 352, + "£ï¸ı": 18446, + "¤": 97, + "¤": 353, + "¥": 98, + "¥": 354, + "¦": 99, + "¦": 355, + "¦Ī": 47615, + "§": 100, + "§": 356, + "¨": 101, + "¨": 357, + "©": 102, + "©": 358, + "ª": 103, + "ª": 359, + "«": 104, + "«": 360, + "¬": 105, + "¬": 361, + "¬ë": 31736, + "®": 106, + "®": 362, + "¯": 107, + "¯": 363, + "°": 108, + "°:": 21787, + "°": 364, + "°ï¸ı": 34777, + "±": 109, + "±": 365, + "±ï¸ı": 41020, + "²": 110, + "²": 366, + "³": 111, + "³": 367, + "³ï¸ı": 22195, + "³ï¸ı": 24706, + "´": 112, + "´": 368, + "µ": 113, + "µ": 369, + "µï¸ı": 27605, + "¶": 114, + "¶": 370, + "·": 115, + "·": 371, + "¸": 116, + "¸": 372, + "¸ë": 19693, + "¹": 117, + "¹": 373, + "º": 118, + "º": 374, + "»": 119, + "»": 375, + "¼": 120, + "¼": 376, + "½": 121, + "½": 377, + "½ï¸ı": 31333, + "¾": 122, + "¾": 378, + "¿": 123, + "¿": 379, + "À": 124, + "À": 380, + "Á": 125, + "Á": 381, + "Â": 126, + "Â": 382, + "¡": 26868, + "¡": 10830, + "¡¡": 45505, + "¢": 41359, + "£": 31117, + "£": 1950, + "Â¥": 20199, + "¨": 19957, + "¨¨": 23089, + "¨¨¨¨": 41223, + "©": 31148, + "©": 5811, + "«": 14434, + "®": 30857, + "®": 8436, + "¯": 38682, + "¯": 43593, + "¯\\": 44096, + "¯\\_(": 45115, + "°": 21305, + "°": 6858, + "²": 41175, + "´": 30560, + "´": 12559, + "·": 14844, + "º": 28059, + "»": 31642, + "»": 7599, + "½": 33613, + "¿": 44559, + "¿": 17133, + "ÂŃ": 22618, + "Ã": 127, + "Ã": 383, + "á": 7261, + "á": 22229, + "án": 38340, + "án": 21385, + "â": 26170, + "ã": 19339, + "ão": 21141, + "ä": 10896, + "ä": 47276, + "än": 42787, + "Ã¥": 23176, + "æ": 42495, + "ç": 10067, + "ça": 22711, + "è": 12138, + "è": 37761, + "ère": 30272, + "ès": 41210, + "é": 3459, + "é": 4166, + "éal": 45251, + "ée": 13489, + "és": 20507, + "ê": 27515, + "ë": 29526, + "ë": 40520, + "î": 48704, + "ï": 35689, + "ñ": 6445, + "ña": 17753, + "ño": 16574, + "ños": 40104, + "ó": 8891, + "ó": 27733, + "ón": 13926, + "ô": 26815, + "ö": 7255, + "ö": 37423, + "ör": 31762, + "ø": 17483, + "ø": 45598, + "ú": 17963, + "ú": 36019, + "ü": 6522, + "ü": 47177, + "ür": 26132, + "ÃĹ": 16165, + "Ãł": 36149, + "Ãł": 21259, + "ÃŃ": 8366, + "ÃŃ": 23928, + "ÃŃa": 16609, + "ÃŃn": 33623, + "Ä": 128, + "Ä": 384, + "ı": 18562, + "ı": 41901, + "Äģ": 23134, + "Äĩ": 31719, + "Äį": 45414, + "ÄŁ": 26540, + "Å": 129, + "Å": 385, + "Å¡": 35621, + "ÅĤ": 40419, + "Åį": 41267, + "ÅŁ": 21254, + "ÅŁ": 40706, + "Æ": 130, + "Æ": 386, + "Ç": 131, + "Ç": 387, + "È": 132, + "È": 388, + "É": 133, + "É": 389, + "Ê": 134, + "Ê": 390, + "Ë": 135, + "Ë": 391, + "Ì": 136, + "Ì": 392, + "Ìĩ": 16384, + "Í": 137, + "Í": 393, + "Î": 138, + "Î": 394, + "Ï": 139, + "Ï": 395, + "Ïī": 38065, + "Ð": 140, + "Ð": 396, + "а": 16912, + "а": 27080, + "аÐ": 31090, + "в": 39813, + "е": 22176, + "и": 16701, + "иÐ": 29503, + "к": 27152, + "л": 47611, + "м": 38018, + "н": 22705, + "о": 13506, + "о": 29386, + "оÐ": 20978, + "од": 38416, + "оÑĤ": 28599, + "п": 26302, + "пÑĢи": 46321, + "пÑĢиÑĢода": 48150, + "Ñ": 141, + "Ñ": 397, + "ÑĢ": 16370, + "ÑĢи": 41092, + "ÑĢод": 47039, + "ÑĢода": 47929, + "Ñģ": 23669, + "ÑĤ": 17875, + "Ñĥ": 39729, + "ÑĦ": 27993, + "ÑĦоÑĤ": 35155, + "ÑĦоÑĤо": 38981, + "Ñĭ": 45001, + "Ò": 142, + "Ò": 398, + "Ó": 143, + "Ó": 399, + "Ô": 144, + "Ô": 400, + "Õ": 145, + "Õ": 401, + "Ö": 146, + "Ö": 402, + "×": 147, + "×": 403, + "Ø": 148, + "Ø": 404, + "ا": 6042, + "ا": 22625, + "اØ": 13189, + "ار": 40137, + "اÙ": 8453, + "اÙĦ": 12973, + "اÙħ": 47626, + "اÙĨ": 42773, + "اÙĨ": 33200, + "ب": 16378, + "ب": 35330, + "Ø©": 20915, + "ت": 18197, + "ت": 44333, + "ج": 26375, + "Ø®": 41495, + "د": 19872, + "د": 35566, + "ر": 10948, + "ر": 24933, + "رÙĬ": 43273, + "ز": 36169, + "س": 17856, + "Ø´": 28770, + "ص": 27271, + "Ø·": 32050, + "ع": 18843, + "غ": 48510, + "ØŃ": 25722, + "Ù": 149, + "Ù": 405, + "Ùģ": 24112, + "ÙĤ": 27585, + "Ùĥ": 33499, + "ÙĦ": 14251, + "ÙĦ": 37899, + "Ùħ": 12986, + "Ùħ": 29945, + "ÙĨ": 16655, + "ÙĨ": 25386, + "Ùĩ": 34274, + "Ùĩ": 31343, + "ÙĪ": 12203, + "ÙĪ": 38310, + "ÙĪر": 48242, + "ÙĬ": 12046, + "ÙĬ": 23853, + "Ú": 150, + "Ú": 406, + "Ú©": 26475, + "Û": 151, + "Û": 407, + "Ûģ": 40480, + "ÛĮ": 21452, + "ÛĮ": 32703, + "Ü": 152, + "Ü": 408, + "Ý": 153, + "Ý": 409, + "Þ": 154, + "Þ": 410, + "ß": 155, + "ß": 411, + "à": 156, + "à": 412, + "à¤": 3124, + "त": 27263, + "द": 29552, + "न": 26090, + "प": 44149, + "ब": 43599, + "म": 48254, + "म": 26774, + "य": 37299, + "र": 39136, + "र": 19052, + "ल": 30881, + "व": 39545, + "श": 43181, + "स": 28505, + "ह": 29446, + "ा": 37973, + "ा": 13343, + "ि": 26721, + "à¤Ĥ": 30833, + "à¤ķ": 22067, + "à¤Ĺ": 42598, + "à¤ľ": 39561, + "à¥": 7410, + "à¥Ģ": 45791, + "à¥Ģ": 25751, + "à¥ģ": 39653, + "à¥ĩ": 48612, + "à¥ĩ": 25130, + "à¥ĭ": 34452, + "à¥į": 19389, + "à¦": 11322, + "া": 41532, + "à§": 26339, + "à¨": 15741, + "à©": 32086, + "àª": 22990, + "à«": 48347, + "à¬": 32791, + "à®": 6022, + "த": 34691, + "ன": 43394, + "ப": 47388, + "à®®": 35463, + "à®°": 43270, + "ல": 47705, + "ா": 32831, + "ி": 27126, + "à®ķ": 36168, + "à®Ł": 45263, + "à¯": 11259, + "à¯ģ": 33115, + "à¯į": 16631, + "à°": 12100, + "à±": 23550, + "à±į": 46098, + "à²": 9992, + "ಿ": 47797, + "à³": 20745, + "à³į": 36148, + "à´": 15418, + "àµ": 27392, + "àµį": 45266, + "à¶": 29881, + "à·": 30766, + "à¸": 1777, + "ม": 26137, + "ม": 29570, + "ย": 27241, + "ย": 33091, + "ร": 32225, + "ร": 27331, + "ล": 34696, + "ล": 32746, + "ว": 26990, + "ว": 30245, + "ส": 37883, + "ส": 35737, + "ห": 33064, + "ะ": 43920, + "ะ": 49234, + "ั": 14978, + "า": 11529, + "า": 38476, + "าà¸": 12330, + "ิ": 17092, + "ี": 22421, + "ี": 20278, + "ีà¹Ī": 31511, + "ื": 47991, + "ุ": 30524, + "ู": 35273, + "à¸ģ": 30767, + "à¸ģà¸": 31474, + "à¸Ħ": 31757, + "à¸Ħà¸": 39628, + "à¸ĩ": 24603, + "à¸ĩ": 33382, + "à¸Ī": 47608, + "à¸Ĭ": 46324, + "à¸Ķ": 31107, + "à¸Ķ": 38825, + "à¸ķ": 40273, + "à¸ķ": 41108, + "à¸Ĺ": 36171, + "à¸Ļ": 17474, + "à¸Ļ": 17639, + "à¸Ļà¸": 23121, + "à¸ļ": 33859, + "à¸ļ": 39616, + "à¸ŀ": 48171, + "à¸Ń": 13398, + "à¸Ń": 32818, + "à¸Ńà¸": 14649, + "à¸Ńà¸ĩ": 46622, + "à¹": 4484, + "à¹Ģ": 13729, + "à¹Ģà¸": 14076, + "à¹ģà¸": 23916, + "à¹Ĥ": 33118, + "à¹ĥ": 40962, + "à¹Ħà¸": 31718, + "à¹ĩ": 38699, + "à¹Ī": 11722, + "à¹ī": 13123, + "à¹Į": 28353, + "à¼": 46186, + "à½": 39219, + "á": 157, + "á": 413, + "á´": 19036, + "áµ": 17330, + "áĢ": 45932, + "áĥ": 24829, + "áĥ¦": 32193, + "â": 158, + "â": 414, + "â¤": 25087, + "⤵ï¸ı": 36026, + "â¬": 7930, + "â¬ħï¸ı": 42111, + "â¬Ĩ": 27718, + "â¬Ĩï¸ı": 32798, + "â¬ĩ": 10917, + "â¬ĩ": 39370, + "â¬ĩï¸ı": 25621, + "â¬ĩï¸ı": 13984, + "â¬ĩï¸ıâ¬ĩï¸ı": 40159, + "âĢ": 728, + "âĢ¢": 9485, + "âĢ¢": 2701, + "âĢ¢âĢ¢": 15006, + "âĢ¢âĢ¢": 47575, + "âĢ¢âĢ¢âĢ¢âĢ¢": 27502, + "âĢ¢âĢ¢âĢ¢âĢ¢âĢ¢âĢ¢âĢ¢âĢ¢": 48630, + "âĢ¦": 7095, + "âĢ¦\"": 20215, + "âĢ¦..": 47779, + "âĢ¦.": 18615, + "âĢ¦/": 29842, + "âĢ¦": 959, + "âĢ¦âĢ¦": 40066, + "âĢ²": 32633, + "âĢ³": 25061, + "âĢ¼": 6578, + "âĢ¼ï¸ı": 15622, + "âĢ¼ï¸ı": 8310, + "âĢ¼ï¸ıâĢ¼ï¸ı": 33218, + "âĢĭ": 17086, + "âĢĭ": 9844, + "âĢį": 4244, + "âĢįâĻ": 5177, + "âĢįâĻĢï¸ı": 18897, + "âĢįâĻĢï¸ı": 9605, + "âĢįâĻĤ": 8832, + "âĢįâĻĤï¸ı": 21779, + "âĢįâĻĤï¸ı": 10613, + "âĢİ": 31001, + "âĢIJ": 34512, + "âĢĵ": 21070, + "âĢĵ": 1224, + "âĢĶ": 6718, + "âĢĶ": 2005, + "âĢĶ>": 26341, + "âĢĶ@": 28470, + "âĢĶâĢĶ": 10037, + "âĢĶâĢĶ": 44800, + "âĢĶâĢĶâĢĶâĢĶ": 17797, + "âĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶ": 34432, + "âĢķ": 14236, + "âģ": 1667, + "âģ£": 31089, + "âģ£": 16845, + "âģ¦": 2773, + "âģ¦": 34855, + "âģ¦@": 2859, + "âģ¦âģ¦@": 27783, + "âģ©": 20097, + "âģ©,": 48749, + "âģ©.": 35777, + "âģ©": 2918, + "âģīï¸ı": 46534, + "âģł": 23881, + "âģł": 13503, + "âģłâģł": 33488, + "âĤ": 5227, + "âĤ¬": 34919, + "âĤ¬": 6309, + "âĤ¹": 21777, + "âĥ": 2805, + "âĥ£": 11250, + "âĥ£": 3076, + "âĥ£@": 48291, + "âĦ": 8604, + "âĦ¢": 29438, + "âĦ¢": 11675, + "âĦ¹": 45462, + "âĨ": 6059, + "âĨĴ": 7481, + "âĨĵ": 41603, + "âĩ": 27228, + "âĪ": 17788, + "âī": 22684, + "âīĪ": 45451, + "âĮ": 17848, + "âĮļ": 31301, + "âĮļï¸ı": 35931, + "âı": 7960, + "âı©": 40847, + "âı°": 12714, + "âı±": 33149, + "âı³": 47617, + "âĵ": 27400, + "âĶ": 13389, + "âĶĢ": 45139, + "âĶģ": 42022, + "âķ": 17027, + "âķIJ": 48039, + "âĸ": 4168, + "âĸª": 21203, + "âĸª": 36628, + "âĸªï¸ı": 24974, + "âĸ«": 39478, + "âĸ¬": 33798, + "âĸ¬âĸ¬": 36975, + "âĸ¶": 12509, + "âĸ¶": 21126, + "âĸ¶ï¸ı": 14442, + "âĸº": 46061, + "âĸº": 12086, + "âĸ½": 45634, + "âĸł": 36791, + "âĹ": 9323, + "âĹĨ": 48961, + "âĹı": 26999, + "âĺ": 1741, + "âĺ®": 45851, + "âĺ¹": 28811, + "âĺ¹ï¸ı": 39605, + "âĺº": 5010, + "âĺº": 8703, + "âĺºâĺº": 46051, + "âĺºï¸ı": 11506, + "âĺºï¸ı": 7779, + "âĺºï¸ıâĺºï¸ı": 41315, + "âĺ¼": 38877, + "âĺĢ": 32146, + "âĺĢ": 22242, + "âĺĢï¸ı": 12817, + "âĺĢï¸ı": 8219, + "âĺĢï¸ıâĺĢï¸ı": 44550, + "âĺģ": 25195, + "âĺģï¸ı": 35197, + "âĺĥ": 38972, + "âĺħ": 9339, + "âĺħ": 10643, + "âĺħâĺħ": 12681, + "âĺħâĺħ": 36644, + "âĺħâĺħâĺħâĺħ": 34431, + "âĺħâĺħâĺħâĺħ": 44034, + "âĺħâĺħâĺħâĺħâĺħ": 45984, + "âĺĨ": 23941, + "âĺĨ": 13439, + "âĺİ": 24045, + "âĺİ": 45493, + "âĺİï¸ı": 27219, + "âĺij": 20983, + "âĺij": 42300, + "âĺijï¸ı": 22291, + "âĺĶï¸ı": 31238, + "âĺķ": 11454, + "âĺķ": 26561, + "âĺķï¸ı": 25839, + "âĺķï¸ı": 15499, + "âĺĺ": 23483, + "âĺĺï¸ı": 31454, + "âĺĿ": 21982, + "âĺĿï¸ı": 38891, + "âĺŀ": 31255, + "âĺłï¸ı": 34672, + "âĻ": 1548, + "âĻ¡": 11091, + "âĻ¡": 6251, + "âĻ¡âĻ¡": 22360, + "âĻ¡âĻ¡": 34267, + "âĻ¡âĻ¡âĻ¡": 36611, + "âĻ¤": 47435, + "âĻ¥": 4622, + "âĻ¥": 3405, + "âĻ¥âĻ¥": 12975, + "âĻ¥âĻ¥": 19604, + "âĻ¥âĻ¥âĻ¥": 23255, + "âĻ¥âĻ¥âĻ¥âĻ¥": 49020, + "âĻ¥ï¸ı": 17774, + "âĻ¥ï¸ı": 10561, + "âĻ¥ï¸ıâĻ¥ï¸ı": 40309, + "âĻ¦": 32376, + "âĻ¦": 47547, + "âĻ©": 30339, + "âĻ©âĻ«": 31636, + "âĻª": 27364, + "âĻª": 12382, + "âĻ«": 39217, + "âĻ«": 10814, + "âĻ¬": 24753, + "âĻ»": 39611, + "âĻ»ï¸ı": 46075, + "âļ": 2234, + "âļ¡": 40098, + "âļ¡": 20712, + "âļ¡ï¸ı": 19500, + "âļ¡ï¸ı": 11605, + "âļ¡ï¸ıâļ¡ï¸ı": 45922, + "âļª": 11922, + "âļª": 36373, + "âļªï¸ı": 22251, + "âļªï¸ı": 17885, + "âļ«": 15374, + "âļ«ï¸ı": 26529, + "âļ«ï¸ı": 24649, + "âļ½": 4867, + "âļ½": 13173, + "âļ½âļ½": 43259, + "âļ½ï¸ı": 11342, + "âļ½ï¸ı": 6768, + "âļ½ï¸ıâļ½ï¸ı": 30358, + "âļ½ï¸ıâļ½ï¸ı": 44148, + "âļ¾": 11314, + "âļ¾": 34717, + "âļ¾ï¸ı": 24727, + "âļ¾ï¸ı": 14858, + "âļĵ": 23522, + "âļĵï¸ı": 35299, + "âļĶï¸ı": 29361, + "âļľ": 47491, + "âļł": 39203, + "âļłï¸ı": 40966, + "âļłï¸ı": 15596, + "âĽ": 7956, + "âĽ³ï¸ı": 29204, + "âĽĦ": 30668, + "âĽĦï¸ı": 45465, + "âľ": 1508, + "⾨": 7181, + "⾨": 3531, + "⾨⾨": 35174, + "⾨⾨": 21985, + "⾨⾨⾨": 39424, + "âľĤ": 38602, + "âľħ": 29544, + "âľħ": 5564, + "âľĪ": 10682, + "âľĪ": 30712, + "âľĪï¸ı": 26176, + "âľĪï¸ı": 13413, + "âľĬ": 12392, + "âľĬ": 17819, + "âľĬðŁı½": 48547, + "âľĬðŁı¾": 41185, + "âľĭ": 39383, + "âľĭ": 30239, + "âľĮ": 6419, + "âľĮ": 12656, + "âľĮï¸ı": 21906, + "âľĮï¸ı": 12239, + "âľĮðŁı»": 30538, + "âľĮðŁı¼": 30588, + "âľį": 20872, + "âľįï¸ı": 30888, + "âľı": 32574, + "âľıï¸ı": 40724, + "âľĵ": 36700, + "âľĶ": 47200, + "âľĶ": 13749, + "âľĶï¸ı": 40544, + "âľĶï¸ı": 9191, + "âľĸï¸ı": 44133, + "âľĿ": 42220, + "âĿ": 1045, + "âĿ£": 37007, + "âĿ£": 25623, + "âĿ£ï¸ı": 25240, + "âĿ¤": 1266, + "âĿ¤": 2720, + "âĿ¤âĿ¤": 9033, + "âĿ¤âĿ¤": 14058, + "âĿ¤âĿ¤âĿ¤": 16708, + "âĿ¤âĿ¤âĿ¤âĿ¤": 37918, + "âĿ¤âĿ¤âĿ¤âĿ¤": 43970, + "âĿ¤ï¸ı": 2626, + "âĿ¤ï¸ı#": 30281, + "âĿ¤ï¸ı.": 45326, + "âĿ¤ï¸ı": 1752, + "âĿ¤ï¸ı@": 31187, + "âĿ¤ï¸ıâĿ¤ï¸ı": 6713, + "âĿ¤ï¸ıâĿ¤ï¸ı": 10363, + "âĿ¤ï¸ıâĿ¤ï¸ıâĿ¤ï¸ı": 12282, + "âĿ¤ï¸ıâĿ¤ï¸ıâĿ¤ï¸ıâĿ¤ï¸ı": 39167, + "âĿ¤ï¸ıâĿ¤ï¸ıâĿ¤ï¸ıâĿ¤ï¸ı": 29880, + "âĿ¤ï¸ıðŁĴĻ": 37380, + "âĿ¤ï¸ıðŁĺį": 37272, + "âĿ¤ï¸ıðŁĺĺ": 41800, + "âĿ¤ðŁĺį": 49120, + "âĿ¥": 36914, + "âĿĦ": 8501, + "âĿĦ": 30494, + "âĿĦï¸ı": 16834, + "âĿĦï¸ı": 12402, + "âĿĦï¸ıâĿĦï¸ı": 41626, + "âĿĮ": 44485, + "âĿĮ": 17975, + "âĿĵ": 29791, + "âĿĹ": 12868, + "âĿĹ": 29079, + "âĿĹï¸ı": 28642, + "âĿĹï¸ı": 17391, + "âĿĿ": 46951, + "âŀ": 3257, + "âŀ¡": 12854, + "âŀ¡ï¸ı": 31860, + "âŀ¡ï¸ı": 4956, + "âŀ¤": 18651, + "âŀķ": 46526, + "âŀĸ": 21327, + "âŀĸ": 34902, + "âŀĸâŀĸ": 23316, + "âŀĸâŀĸâŀĸâŀĸ": 40401, + "âŀľ": 23775, + "âł": 5689, + "âłĢ": 9691, + "âłĢ": 8621, + "âłĢâłĢ": 11466, + "âłĢâłĢ": 39092, + "âłĢâłĢâłĢâłĢ": 20976, + "âłĢâłĢâłĢâłĢâłĢâłĢâłĢâłĢ": 46063, + "âŃ": 5527, + "âŃIJ": 6410, + "âŃIJ": 19012, + "âŃIJâŃIJ": 32663, + "âŃIJï¸ı": 12427, + "âŃIJï¸ı": 10251, + "âŃIJï¸ıâŃIJï¸ı": 18640, + "âŃIJï¸ıâŃIJï¸ıâŃIJï¸ı": 40746, + "ã": 159, + "ã": 415, + "ãĢ": 4092, + "ãĢģ": 45262, + "ãĢĤ": 38060, + "ãĢĤ": 38000, + "ãĢĬ": 39920, + "ãĢĭ": 32898, + "ãĢĮ": 18116, + "ãĢį": 19149, + "ãĢİ": 26947, + "ãĢı": 30293, + "ãĢIJ": 12534, + "ãĢij": 12990, + "ãĢľ": 39581, + "ãģ": 4813, + "ãģ¦": 48029, + "ãģ¨": 34671, + "ãģ¨ç¹ĭãģ": 47310, + "ãģ¨ç¹ĭãģĮãĤĬãģŁãģĦ": 48290, + "ãģª": 29104, + "ãģ®": 21575, + "ãģ·": 44130, + "ãģĦ": 33523, + "ãģĦ": 38850, + "ãģĨ": 44235, + "ãģį": 42184, + "ãĤ": 3909, + "ãĤ¢": 26560, + "ãĤ¤": 19319, + "ãĤ¤ãĥ": 36294, + "ãĤ«": 37367, + "ãĤ¯": 31574, + "ãĤ·": 37665, + "ãĤ¸": 32234, + "ãĤ¸ãĥ": 43491, + "ãĤ¹": 22694, + "ãĤ¹": 39220, + "ãĤ¹ãĥ": 32421, + "ãĤ¿": 34941, + "ãĤĬãģ": 40500, + "ãĤĮ": 45211, + "ãĤŃ": 47121, + "ãĥ": 2429, + "ãĥ©": 23007, + "ãĥª": 32115, + "ãĥ«": 33257, + "ãĥ¬": 32965, + "ãĥ³": 17671, + "ãĥ³": 26875, + "ãĥ³ãĤ": 45105, + "ãĥ³ãĥ": 25914, + "ãĥ»": 8415, + "ãĥ»": 11158, + "ãĥ»ãĥ»": 13949, + "ãĥ»ãĥ»ãĥ»": 14234, + "ãĥ¼": 13457, + "ãĥ¼": 30391, + "ãĥ¼ãĥ": 18584, + "ãĥĥ": 28902, + "ãĥĦ": 32173, + "ãĥĪ": 42384, + "ãĥİ": 39967, + "ãĥķãĤ": 33371, + "ãĥŀ": 48924, + "ãĥŃ": 35827, + "ãħ": 5947, + "ãħ¤": 21096, + "ãħ¤ãħ¤": 22583, + "ãħ¤ãħ¤ãħ¤ãħ¤": 39329, + "ãħĭ": 13052, + "ãħĭ": 25108, + "ãħĭãħĭ": 16604, + "ãħĭãħĭ": 42581, + "ãħĭãħĭãħĭ": 46407, + "ãħĭãħĭãħĭãħĭ": 39362, + "ãħł": 16089, + "ãħł": 25781, + "ãħłãħł": 22021, + "ãħłãħł": 34398, + "ãħłãħłãħłãħł": 47028, + "ä": 160, + "ä": 416, + "ä¸": 19759, + "ä¹": 41854, + "äº": 21078, + "人": 36839, + "ä»": 37743, + "ä½": 47466, + "å": 161, + "å": 417, + "å¤": 23170, + "å¥": 29290, + "å®": 27047, + "å°": 34720, + "å±": 46096, + "å¸": 42021, + "å¹": 38780, + "åħ": 34314, + "åĨ": 27972, + "åĨĻ": 44653, + "åĪ": 42748, + "åĭ": 47505, + "åı": 34517, + "åIJ": 41673, + "åĽ": 39027, + "åľ": 37746, + "åŃ": 35751, + "æ": 162, + "æ": 418, + "æĸ": 29032, + "æĹ": 22265, + "æĹ¥": 39121, + "æĹ¥": 37156, + "æĺ": 42891, + "æĻ": 48132, + "æľ": 19277, + "æľ¬": 44353, + "æĿ": 27667, + "æĿ±": 48338, + "ç": 163, + "ç": 419, + "ç¥": 26369, + "ç¥Ń": 42557, + "çµ": 37810, + "ç¹": 43431, + "ç¹ĭãģ": 45930, + "çĶ": 20211, + "çĶŁ": 33375, + "çľ": 33440, + "羣": 41570, + "è": 164, + "è": 420, + "èª": 34002, + "èªķ": 41293, + "é": 165, + "é": 421, + "éģ": 44854, + "éĩ": 38283, + "ê": 166, + "ê": 422, + "ê°": 21122, + "ê°ĵ": 41076, + "ê°ĵìĦ¸ë¸IJ": 41689, + "ê°ķ": 45758, + "ê²": 35555, + "ê³": 36216, + "êµ": 31871, + "ê·": 42680, + "ê¸": 32495, + "ê¹": 24531, + "ê¹Ģ": 25203, + "ë": 167, + "ë": 423, + "ë¦": 24621, + "리": 47649, + "ë§": 28024, + "ë§Ī": 40027, + "ëª": 36311, + "ë¯": 19528, + "민": 34442, + "민": 44632, + "ë°": 15810, + "ë°©": 23273, + "ë°©íĥ": 25081, + "ë°©íĥĦ": 25641, + "ë°©íĥĦìĨĮëħĦëĭ": 26068, + "ë°©íĥĦìĨĮëħĦëĭ¨": 27129, + "ë°ķ": 40988, + "ë²": 48267, + "ë³": 44693, + "ë¹": 24193, + "ëĤ": 27252, + "ëĤĺ": 48484, + "ëĭ": 13094, + "ëĭ¤": 46680, + "ëĭĪ": 33708, + "ëį": 45543, + "ëı": 31972, + "ëĵ": 30850, + "ëĿ": 44317, + "ì": 168, + "ì": 424, + "ì£": 39856, + "주": 45161, + "ì¤": 31153, + "ì§": 16279, + "ì§Ģ": 28836, + "ì§Ħ": 38890, + "ì°": 40742, + "ì¶": 42476, + "ì¶ķ": 46403, + "ì¶ķíķĺ": 47866, + "ì¹": 45088, + "ìĤ": 31061, + "ìĥ": 30587, + "ìĥĿ": 47858, + "ìĦ": 15074, + "ìĦ¸ë": 29254, + "ìĦ¸ë¸": 29658, + "ìĦ¸ë¸IJ": 41415, + "ìĨ": 15115, + "ìĨĮë": 20515, + "ìĨĮëħ": 21391, + "ìĨĮëħĦëĭ": 25887, + "ìĪ": 32757, + "ìĬ": 12125, + "ìĬ¤": 20305, + "ìĬ¤": 23829, + "ìĭ": 23924, + "ìķ": 16071, + "ìķĦ": 23233, + "ìĸ": 31625, + "ìĹ": 13252, + "ìĹIJ": 37622, + "ìĹij": 31036, + "ìĹijìĨ": 42763, + "ìĹijìĨĮ": 45606, + "ìĺ": 21144, + "ìĻ": 39405, + "ìļ": 18541, + "ìļ°": 38415, + "ìļ°": 49344, + "ìĽ": 22543, + "ìĽIJ": 36495, + "ìľ": 20909, + "ìľł": 42890, + "ìĿ": 8276, + "ìĿ´": 12286, + "ìĿ´": 34746, + "ìĿ´ì": 37590, + "ìĿ¼": 43406, + "ìŀ": 20849, + "ìł": 20580, + "ìłķ": 34725, + "í": 169, + "í": 425, + "íģ": 35641, + "íģ¬": 45832, + "íĤ": 43565, + "íĥ": 15012, + "íĥĢ": 41126, + "íĥľ": 37663, + "íĬ": 23215, + "íĬ¸": 48974, + "íĬ¸": 39820, + "íĭ": 34350, + "íĶ": 29450, + "íķ": 15197, + "íķ´": 35286, + "íķĺ": 33992, + "íĺ": 15962, + "íĺ¸": 39657, + "íĺĦ": 34645, + "íĻ": 31882, + "î": 170, + "î": 426, + "îĢ": 36288, + "îĦ": 35368, + "îĮ": 41006, + "îIJ": 16929, + "îIJĴ": 40100, + "ï": 171, + "ï": 427, + "ï¸": 842, + "ï¸İ": 24029, + "ï¸ı": 1392, + "ï¸ı#": 46997, + "ï¸ı:": 32604, + "ï¸ı": 1001, + "ï¸ı@": 34600, + "ï¸ıâĥ£": 17394, + "ï¸ıâĥ£-": 40376, + "ï¸ıâĥ£": 4603, + "ï¿": 27850, + "�": 47356, + "�": 39802, + "ð": 172, + "ð": 428, + "ðĿ": 6874, + "ðĿIJ": 15889, + "ðĿij": 43794, + "ðĿĴ": 43387, + "ðĿĵ": 47110, + "ðĿĹ": 18865, + "ðĿĺ": 26109, + "ðĿĻ": 29415, + "ðŁ": 558, + "ðŁ¤": 1793, + "ðŁ¤£": 9665, + "ðŁ¤£": 9909, + "ðŁ¤£ðŁ¤£": 16430, + "ðŁ¤£ðŁ¤£": 31009, + "ðŁ¤£ðŁ¤£ðŁ¤£": 32262, + "ðŁ¤¤": 39550, + "ðŁ¤¤": 26759, + "ðŁ¤¦": 17186, + "ðŁ¤§": 40983, + "ðŁ¤©": 27351, + "ðŁ¤©": 16074, + "ðŁ¤ª": 44230, + "ðŁ¤ª": 24920, + "ðŁ¤«": 47671, + "ðŁ¤¯": 37595, + "ðŁ¤·": 13185, + "ðŁ¤·ðŁı»âĢįâĻĢï¸ı": 46770, + "ðŁ¤ij": 34801, + "ðŁ¤ĵ": 36580, + "ðŁ¤ĵ": 18928, + "ðŁ¤Ķ": 12706, + "ðŁ¤Ķ": 6497, + "ðŁ¤ĶðŁ¤Ķ": 28490, + "ðŁ¤ĶðŁ¤ĶðŁ¤Ķ": 43361, + "ðŁ¤ĸ": 46146, + "ðŁ¤Ĺ": 16646, + "ðŁ¤Ĺ": 10465, + "ðŁ¤ĹðŁ¤Ĺ": 44321, + "ðŁ¤ĺ": 10623, + "ðŁ¤ĺ": 17288, + "ðŁ¤ĺðŁı»": 46449, + "ðŁ¤ĺðŁı»": 30891, + "ðŁ¤ĺðŁı¼": 31458, + "ðŁ¤ĺðŁı½": 49362, + "ðŁ¤Ļ": 23800, + "ðŁ¤Ļ": 39101, + "ðŁ¤Ŀ": 35242, + "ðŁ¤ŀ": 29463, + "ðŁ¤ŀ": 38597, + "ðŁ¤Ł": 48509, + "ðŁ¤ł": 36737, + "ðŁ¤Ń": 47289, + "ðŁ¥": 4156, + "ðŁ¥°": 29246, + "ðŁ¥°": 17597, + "ðŁ¥³": 45823, + "ðŁ¥³": 28055, + "ðŁ¥º": 43380, + "ðŁ¥º": 36858, + "ðŁ¥Ĥ": 43805, + "ðŁ¥Ĥ": 25212, + "ðŁ¥ĥ": 47790, + "ðŁ¥ĩ": 34372, + "ðŁ¥ĩ": 20069, + "ðŁ¥Ī": 35858, + "ðŁ¥ī": 36782, + "ðŁ¥Ĭ": 29275, + "ðŁ¦": 6040, + "ðŁ¦ģ": 36367, + "ðŁ¦ģ": 26056, + "ðŁ¦ĥ": 40184, + "ðŁ¦Ħ": 37659, + "ðŁ¦ħ": 28800, + "ðŁ¦Ī": 48984, + "ðŁ¦ĭ": 49325, + "ðŁ¦ĭ": 28985, + "ðŁ§": 8792, + "ðŁ§¡": 30996, + "ðŁ§¡": 24578, + "ðŁ§IJ": 33549, + "ðŁħ": 22010, + "ðŁĨ": 9536, + "ðŁĨķ": 34956, + "ðŁĨĺ": 39868, + "ðŁĨļ": 16325, + "ðŁĩ": 1173, + "ðŁĩ¦": 12469, + "ðŁĩ¦": 28565, + "ðŁĩ¦ðŁĩ": 33196, + "ðŁĩ¦ðŁĩ·": 41629, + "ðŁĩ¦ðŁĩº": 25192, + "ðŁĩ§": 14660, + "ðŁĩ§ðŁĩ": 37342, + "ðŁĩ§ðŁĩª": 38794, + "ðŁĩ§ðŁĩ·": 28182, + "ðŁĩ¨": 8889, + "ðŁĩ¨ðŁĩ": 8989, + "ðŁĩ¨ðŁĩ¦": 34324, + "ðŁĩ¨ðŁĩ¦": 16364, + "ðŁĩ¨ðŁĩ³": 36819, + "ðŁĩ¨ðŁĩŃ": 41119, + "ðŁĩ©": 15222, + "ðŁĩ©ðŁĩ": 36350, + "ðŁĩ©ðŁĩª": 21531, + "ðŁĩª": 11428, + "ðŁĩª": 12331, + "ðŁĩªðŁĩ": 13917, + "ðŁĩªðŁĩ¸": 22177, + "ðŁĩªðŁĩº": 34655, + "ðŁĩ«": 12977, + "ðŁĩ«ðŁĩ·": 39109, + "ðŁĩ«ðŁĩ·": 16223, + "ðŁĩ¬": 8129, + "ðŁĩ¬ðŁĩ": 8354, + "ðŁĩ¬ðŁĩ§": 23762, + "ðŁĩ¬ðŁĩ§": 11559, + "ðŁĩ®": 8268, + "ðŁĩ®ðŁĩ": 8347, + "ðŁĩ®ðŁĩª": 34148, + "ðŁĩ®ðŁĩ³": 47299, + "ðŁĩ®ðŁĩ³": 23602, + "ðŁĩ®ðŁĩ¹": 42034, + "ðŁĩ®ðŁĩ¹": 17070, + "ðŁĩ¯": 20090, + "ðŁĩ¯ðŁĩ": 22924, + "ðŁĩ¯ðŁĩµ": 26527, + "ðŁĩ°": 28232, + "ðŁĩ±": 29533, + "ðŁĩ±ðŁĩ": 40941, + "ðŁĩ²": 16411, + "ðŁĩ²ðŁĩ": 17562, + "ðŁĩ²ðŁĩ½": 32073, + "ðŁĩ³": 16645, + "ðŁĩ³ðŁĩ": 17747, + "ðŁĩ³ðŁĩ±": 36747, + "ðŁĩµ": 12127, + "ðŁĩµðŁĩ": 13608, + "ðŁĩµðŁĩ°": 37764, + "ðŁĩµðŁĩ¹": 42621, + "ðŁĩµðŁĩŃ": 42777, + "ðŁĩ·": 16026, + "ðŁĩ·": 9869, + "ðŁĩ·ðŁĩº": 37902, + "ðŁĩ¸": 19447, + "ðŁĩ¸ðŁĩ": 33325, + "ðŁĩ¸ðŁĩª": 39260, + "ðŁĩ¹": 21810, + "ðŁĩ¹ðŁĩ": 36250, + "ðŁĩº": 4054, + "ðŁĩº": 17467, + "ðŁĩºðŁĩ": 4131, + "ðŁĩºðŁĩ¸": 8907, + "ðŁĩºðŁĩ¸": 5688, + "ðŁĩºðŁĩ¸ðŁĩºðŁĩ¸": 18739, + "ðŁĩºðŁĩ¸ðŁĩºðŁĩ¸": 41411, + "ðŁĩºðŁĩ¸ðŁĩºðŁĩ¸ðŁĩºðŁĩ¸": 43357, + "ðŁĩ¿": 25520, + "ðŁĩ¿ðŁĩ¦": 36982, + "ðŁĩŃ": 30370, + "ðŁĮ": 1576, + "ðŁĮ±": 35318, + "ðŁĮ±": 20665, + "ðŁĮ²": 34071, + "ðŁĮ²": 28154, + "ðŁĮ³": 44265, + "ðŁĮ³": 28543, + "ðŁĮ´": 20643, + "ðŁĮ´": 15968, + "ðŁĮµ": 40871, + "ðŁĮ·": 32328, + "ðŁĮ·": 24259, + "ðŁĮ¸": 16314, + "ðŁĮ¸": 10980, + "ðŁĮ¸ðŁĮ¸": 46210, + "ðŁĮ¹": 14990, + "ðŁĮ¹": 10662, + "ðŁĮ¹ðŁĮ¹": 37933, + "ðŁĮº": 27608, + "ðŁĮº": 19829, + "ðŁĮ»": 27196, + "ðŁĮ»": 19772, + "ðŁĮ¼": 36484, + "ðŁĮ¼": 26312, + "ðŁĮ¾": 39796, + "ðŁĮ¿": 27736, + "ðŁĮ¿": 18588, + "ðŁĮĢ": 34348, + "ðŁĮħ": 27547, + "ðŁĮĪ": 23038, + "ðŁĮĪ": 13042, + "ðŁĮĬ": 20465, + "ðŁĮĬ": 14302, + "ðŁĮĮ": 43393, + "ðŁĮį": 34931, + "ðŁĮį": 18641, + "ðŁĮİ": 31125, + "ðŁĮİ": 16969, + "ðŁĮı": 31527, + "ðŁĮIJ": 33071, + "ðŁĮĻ": 42330, + "ðŁĮĻ": 23283, + "ðŁĮļ": 49004, + "ðŁĮļ": 27877, + "ðŁĮŀ": 21152, + "ðŁĮŀ": 12980, + "ðŁĮŁ": 13196, + "ðŁĮŁ": 8542, + "ðŁĮŁðŁĮŁ": 26014, + "ðŁį": 2011, + "ðŁį¦": 47375, + "ðŁį¦": 32032, + "ðŁį©": 38379, + "ðŁįª": 38958, + "ðŁį«": 47994, + "ðŁį«": 33401, + "ðŁį°": 43732, + "ðŁį°": 30051, + "ðŁį³": 37441, + "ðŁį´": 41531, + "ðŁį´": 25338, + "ðŁį·": 24445, + "ðŁį·": 18072, + "ðŁį¸": 43058, + "ðŁį¸": 31217, + "ðŁį¹": 35598, + "ðŁįº": 31081, + "ðŁįº": 21590, + "ðŁį»": 22793, + "ðŁį»": 13167, + "ðŁį¾": 27294, + "ðŁį¾": 21656, + "ðŁįĢ": 22865, + "ðŁįĢ": 15764, + "ðŁįģ": 29837, + "ðŁįģ": 23075, + "ðŁįĤ": 35015, + "ðŁįĤ": 25721, + "ðŁįĥ": 27157, + "ðŁįĥ": 20147, + "ðŁįĩ": 48697, + "ðŁįĬ": 35001, + "ðŁįĬ": 28036, + "ðŁįĭ": 39543, + "ðŁįĮ": 44987, + "ðŁįį": 48946, + "ðŁįİ": 32069, + "ðŁįij": 32889, + "ðŁįĴ": 33160, + "ðŁįĵ": 44739, + "ðŁįĵ": 33456, + "ðŁįĶ": 46415, + "ðŁįĶ": 36031, + "ðŁįķ": 31469, + "ðŁįķ": 23904, + "ðŁįŃ": 42100, + "ðŁİ": 1165, + "ðŁİ£": 43158, + "ðŁİ¤": 23490, + "ðŁİ¤": 15690, + "ðŁİ¥": 22186, + "ðŁİ¥:": 43640, + "ðŁİ¥": 13233, + "ðŁİ§": 31254, + "ðŁİ§": 14266, + "ðŁİ¨": 31953, + "ðŁİ¨": 13461, + "ðŁİ©": 37701, + "ðŁİ«": 30331, + "ðŁİ¬": 36020, + "ðŁİ¬": 18150, + "ðŁİ®": 29312, + "ðŁİ¯": 23114, + "ðŁİµ": 27435, + "ðŁİµ": 14946, + "ðŁİ¶": 11755, + "ðŁİ¶": 6011, + "ðŁİ¶ðŁİ¶": 36283, + "ðŁİ¸": 29135, + "ðŁİ¸": 22122, + "ðŁİ¹": 43493, + "ðŁİ¼": 34949, + "ðŁİ¼": 23757, + "ðŁİ¾": 41982, + "ðŁİ¾": 24222, + "ðŁİĢ": 34347, + "ðŁİĢ": 20151, + "ðŁİģ": 18368, + "ðŁİģ": 13462, + "ðŁİĤ": 13026, + "ðŁİĤ": 10392, + "ðŁİĤðŁİĤ": 39338, + "ðŁİĥ": 22622, + "ðŁİĥ": 16780, + "ðŁİĦ": 12942, + "ðŁİĦ": 11267, + "ðŁİħ": 17685, + "ðŁİħ": 24276, + "ðŁİĨ": 39222, + "ðŁİĪ": 16142, + "ðŁİĪ": 14448, + "ðŁİĪðŁİī": 48049, + "ðŁİī": 4310, + "ðŁİī:": 17310, + "ðŁİī": 3986, + "ðŁİīðŁİ": 11473, + "ðŁİīðŁİĪ": 40499, + "ðŁİīðŁİĪ": 34008, + "ðŁİīðŁİī": 25159, + "ðŁİīðŁİī": 13450, + "ðŁİīðŁİīðŁİī": 20828, + "ðŁİīðŁİĬ": 31662, + "ðŁİīðŁİĬ": 30781, + "ðŁİĬ": 22763, + "ðŁİĬ": 22425, + "ðŁİĬðŁİī": 48801, + "ðŁİĵ": 28916, + "ðŁİĵ": 18744, + "ðŁİĻ": 29001, + "ðŁİĻ": 29753, + "ðŁİĻï¸ı": 44205, + "ðŁİŁ": 19248, + "ðŁİŁ": 21107, + "ðŁİŁï¸ı": 30243, + "ðŁİŃ": 28856, + "ðŁı": 1109, + "ðŁı¡": 27318, + "ðŁı³ï¸ı": 26844, + "ðŁı³ï¸ıâĢį": 27093, + "ðŁı³ï¸ıâĢįðŁĮĪ": 32610, + "ðŁı´": 39690, + "ðŁı´": 19704, + "ðŁı»": 5042, + "ðŁı»": 3702, + "ðŁı»âĢį": 46250, + "ðŁı»âĢįâĻĢï¸ı": 48391, + "ðŁı»âĢįâĻĢï¸ı": 23595, + "ðŁı»âĢįâĻĤï¸ı": 30984, + "ðŁı¼": 6193, + "ðŁı¼": 4027, + "ðŁı¼âĢįâĻĢï¸ı": 28955, + "ðŁı½": 8514, + "ðŁı½": 6114, + "ðŁı½âĢįâĻĢï¸ı": 37036, + "ðŁı½âĢįâĻĤï¸ı": 43157, + "ðŁı¾": 10230, + "ðŁı¾": 7778, + "ðŁı¾âĢįâĻĤï¸ı": 47189, + "ðŁı¿": 29854, + "ðŁı¿": 21094, + "ðŁıĢ": 13708, + "ðŁıĢ": 8813, + "ðŁıĢðŁıĢ": 43169, + "ðŁıģ": 29423, + "ðŁıģ": 17473, + "ðŁıĥ": 16820, + "ðŁıĥ": 32751, + "ðŁıħ": 25500, + "ðŁıĨ": 9585, + "ðŁıĨ": 5596, + "ðŁıĨðŁıĨ": 18946, + "ðŁıĨðŁıĨ": 38269, + "ðŁıĨðŁıĨðŁıĨ": 44484, + "ðŁıĩ": 45789, + "ðŁıĩ": 40288, + "ðŁıĪ": 16144, + "ðŁıĪ": 10477, + "ðŁıī": 26020, + "ðŁıĬ": 33061, + "ðŁıĬ": 47830, + "ðŁıĮ": 41116, + "ðŁıı": 32460, + "ðŁıIJ": 46334, + "ðŁıIJ": 29433, + "ðŁıĴ": 37756, + "ðŁıŁ": 35914, + "ðŁıŁ": 26472, + "ðŁıŁï¸ı": 42627, + "ðŁıł": 33727, + "ðŁIJ": 2074, + "ðŁIJ¢": 37049, + "ðŁIJ£": 39597, + "ðŁIJ¥": 42981, + "ðŁIJ¦": 37260, + "ðŁIJ¬": 44238, + "ðŁIJ¯": 34825, + "ðŁIJ¯": 26111, + "ðŁIJ°": 35378, + "ðŁIJ°": 25050, + "ðŁIJ±": 35710, + "ðŁIJ±": 22979, + "ðŁIJ´": 33509, + "ðŁIJ¶": 14466, + "ðŁIJ¶": 10631, + "ðŁIJ·": 38408, + "ðŁIJ¸": 45597, + "ðŁIJ¸": 40298, + "ðŁIJº": 44281, + "ðŁIJº": 31445, + "ðŁIJ»": 30750, + "ðŁIJ»": 25322, + "ðŁIJ¼": 46234, + "ðŁIJ¾": 16057, + "ðŁIJ¾": 11317, + "ðŁIJ¾ðŁIJ¾": 42202, + "ðŁIJī": 46908, + "ðŁIJĬ": 43974, + "ðŁIJį": 48903, + "ðŁIJį": 30177, + "ðŁIJİ": 48281, + "ðŁIJİ": 32726, + "ðŁIJIJ": 47735, + "ðŁIJIJ": 27954, + "ðŁIJij": 49389, + "ðŁIJķ": 41069, + "ðŁIJĺ": 38733, + "ðŁIJĿ": 30619, + "ðŁIJĿ": 20111, + "ðŁIJŁ": 42084, + "ðŁIJŁ": 29989, + "ðŁIJł": 42725, + "ðŁij": 964, + "ðŁij£": 39755, + "ðŁij§": 48938, + "ðŁij¨": 18966, + "ðŁij¨âĢį": 25023, + "ðŁij©": 18800, + "ðŁij©âĢį": 26304, + "ðŁij«": 47106, + "ðŁij«": 35457, + "ðŁij®": 42686, + "ðŁij¯": 25910, + "ðŁij¯": 20582, + "ðŁij¶": 26187, + "ðŁij¶": 33189, + "ðŁij¸": 26268, + "ðŁij¸": 36645, + "ðŁij¹": 46766, + "ðŁij»": 24625, + "ðŁij»": 16243, + "ðŁij¼": 25270, + "ðŁij¼": 31083, + "ðŁij½": 42677, + "ðŁij½": 26257, + "ðŁijĢ": 11524, + "ðŁijĢ": 5908, + "ðŁijĢðŁijĢ": 31561, + "ðŁijģ": 47796, + "ðŁijģ": 45705, + "ðŁijĦ": 47445, + "ðŁijħ": 31833, + "ðŁijħ": 24672, + "ðŁijĨ": 42975, + "ðŁijĨ": 45194, + "ðŁijĩ": 7662, + "ðŁijĩ": 7475, + "ðŁijĩðŁı»": 45811, + "ðŁijĩðŁı»": 32813, + "ðŁijĩðŁı¼": 37504, + "ðŁijĩðŁijĩ": 17915, + "ðŁijĩðŁijĩ": 31891, + "ðŁijĩðŁijĩðŁijĩ": 35627, + "ðŁijĪ": 32794, + "ðŁijĪ": 20832, + "ðŁijī": 9477, + "ðŁijī": 3988, + "ðŁijīðŁı»": 23481, + "ðŁijīðŁı¼": 27534, + "ðŁijīðŁı½": 38059, + "ðŁijīðŁijī": 41480, + "ðŁijĬ": 8897, + "ðŁijĬ": 9704, + "ðŁijĬðŁı»": 47393, + "ðŁijĬðŁı»": 29152, + "ðŁijĬðŁı¼": 49000, + "ðŁijĬðŁı¼": 30115, + "ðŁijĬðŁijĬ": 46521, + "ðŁijĭ": 19351, + "ðŁijĭ": 17686, + "ðŁijĮ": 4890, + "ðŁijĮ": 4494, + "ðŁijĮðŁı»": 31818, + "ðŁijĮðŁı»": 18606, + "ðŁijĮðŁı¼": 37655, + "ðŁijĮðŁı¼": 20031, + "ðŁijĮðŁı½": 35834, + "ðŁijĮðŁijĮ": 36139, + "ðŁijĮðŁijĮ": 21435, + "ðŁijĮðŁijĮðŁijĮ": 40876, + "ðŁijį": 4686, + "ðŁijį": 4201, + "ðŁijįðŁı»": 25803, + "ðŁijįðŁı»": 15129, + "ðŁijįðŁı¼": 37285, + "ðŁijįðŁı¼": 19689, + "ðŁijįðŁı½": 43722, + "ðŁijįðŁijį": 33012, + "ðŁijįðŁijį": 18997, + "ðŁijįðŁijįðŁijį": 37284, + "ðŁijİ": 39702, + "ðŁijİ": 32568, + "ðŁijı": 3802, + "ðŁijı": 4829, + "ðŁijıðŁı»": 19236, + "ðŁijıðŁı»": 17029, + "ðŁijıðŁı»ðŁijıðŁı»": 35254, + "ðŁijıðŁı¼": 24496, + "ðŁijıðŁı¼": 19979, + "ðŁijıðŁı¼ðŁijıðŁı¼": 46712, + "ðŁijıðŁı½": 40796, + "ðŁijıðŁı½": 33978, + "ðŁijıðŁı¾": 45450, + "ðŁijıðŁijı": 10356, + "ðŁijıðŁijı": 16706, + "ðŁijıðŁijıðŁijı": 17254, + "ðŁijIJ": 40877, + "ðŁijij": 14955, + "ðŁijij": 8717, + "ðŁijijðŁijij": 48532, + "ðŁijķ": 47865, + "ðŁijŁ": 41183, + "ðŁijł": 41264, + "ðŁijŃ": 34175, + "ðŁijŃ": 27943, + "ðŁĴ": 837, + "ðŁĴ¡": 24081, + "ðŁĴ£": 36862, + "ðŁĴ£": 29006, + "ðŁĴ¤": 34706, + "ðŁĴ¤": 25632, + "ðŁĴ¥": 12209, + "ðŁĴ¥": 7347, + "ðŁĴ¥ðŁĴ¥": 27396, + "ðŁĴ¥ðŁĴ¥": 39246, + "ðŁĴ¥ðŁĴ¥ðŁĴ¥": 48890, + "ðŁĴ¦": 21180, + "ðŁĴ¦": 14060, + "ðŁĴ¦ðŁĴ¦": 44469, + "ðŁĴ§": 34095, + "ðŁĴ¨": 27408, + "ðŁĴ¨": 17891, + "ðŁĴ©": 48621, + "ðŁĴ©": 28847, + "ðŁĴª": 5475, + "ðŁĴª": 6440, + "ðŁĴªðŁı»": 31669, + "ðŁĴªðŁı»": 21903, + "ðŁĴªðŁı¼": 32041, + "ðŁĴªðŁı¼": 20759, + "ðŁĴªðŁı½": 46380, + "ðŁĴªðŁı½": 31111, + "ðŁĴªðŁı¾": 39398, + "ðŁĴªðŁĴª": 24747, + "ðŁĴªðŁĴªðŁĴª": 39913, + "ðŁĴ«": 25770, + "ðŁĴ«": 12526, + "ðŁĴ¬": 30947, + "ðŁĴ¯": 10611, + "ðŁĴ¯": 7018, + "ðŁĴ¯ðŁĴ¯": 30234, + "ðŁĴ¯ðŁĴ¯": 44070, + "ðŁĴ°": 20454, + "ðŁĴ°": 14078, + "ðŁĴ°ðŁĴ°": 41747, + "ðŁĴµ": 47412, + "ðŁĴµ": 38041, + "ðŁĴ¸": 37696, + "ðŁĴ¸": 25957, + "ðŁĴ»": 33433, + "ðŁĴ»": 18135, + "ðŁĴ¿": 39541, + "ðŁĴĢ": 14888, + "ðŁĴĢ": 12158, + "ðŁĴĢðŁĴĢ": 30884, + "ðŁĴģ": 13997, + "ðŁĴģ": 14392, + "ðŁĴĥ": 9947, + "ðŁĴĥ": 14333, + "ðŁĴĥðŁı»": 38624, + "ðŁĴĥðŁĴĥ": 28041, + "ðŁĴĦ": 46116, + "ðŁĴĦ": 34571, + "ðŁĴħ": 27457, + "ðŁĴħ": 32414, + "ðŁĴī": 44316, + "ðŁĴī": 30503, + "ðŁĴĭ": 12217, + "ðŁĴĭ": 7417, + "ðŁĴĭðŁĴĭ": 29214, + "ðŁĴĮ": 40817, + "ðŁĴį": 35850, + "ðŁĴį": 24898, + "ðŁĴİ": 25938, + "ðŁĴİ": 15874, + "ðŁĴIJ": 27375, + "ðŁĴIJ": 20554, + "ðŁĴij": 49404, + "ðŁĴĵ": 20628, + "ðŁĴĵ": 12568, + "ðŁĴĵðŁĴĵ": 43505, + "ðŁĴĶ": 18880, + "ðŁĴĶ": 10704, + "ðŁĴĶðŁĴĶ": 44673, + "ðŁĴķ": 5412, + "ðŁĴķ": 3082, + "ðŁĴķðŁĴķ": 23106, + "ðŁĴķðŁĴķ": 14117, + "ðŁĴķðŁĴķðŁĴķ": 26772, + "ðŁĴĸ": 8466, + "ðŁĴĸ": 5582, + "ðŁĴĸðŁĴĸ": 19562, + "ðŁĴĸðŁĴĸ": 30595, + "ðŁĴĸðŁĴĸðŁĴĸ": 33915, + "ðŁĴĹ": 10148, + "ðŁĴĹ": 6690, + "ðŁĴĹðŁĴĹ": 47158, + "ðŁĴĹðŁĴĹ": 24064, + "ðŁĴĹðŁĴĹðŁĴĹ": 36990, + "ðŁĴĺ": 18223, + "ðŁĴĺ": 10816, + "ðŁĴĺðŁĴĺ": 40464, + "ðŁĴĻ": 5305, + "ðŁĴĻ": 4074, + "ðŁĴĻðŁĴĻ": 17833, + "ðŁĴĻðŁĴĻ": 27101, + "ðŁĴĻðŁĴĻðŁĴĻ": 30698, + "ðŁĴĻðŁĴĽ": 46804, + "ðŁĴĻðŁĴĽ": 26230, + "ðŁĴĻðŁĴľ": 47931, + "ðŁĴĻðŁĴľ": 42541, + "ðŁĴļ": 8102, + "ðŁĴļ": 6521, + "ðŁĴļðŁĴļ": 27497, + "ðŁĴļðŁĴļ": 46209, + "ðŁĴļðŁĴļðŁĴļ": 46182, + "ðŁĴļðŁĴĽ": 41232, + "ðŁĴĽ": 8221, + "ðŁĴĽ": 6233, + "ðŁĴĽðŁĴĻ": 36337, + "ðŁĴĽðŁĴļ": 37994, + "ðŁĴĽðŁĴĽ": 32420, + "ðŁĴľ": 6832, + "ðŁĴľ": 4882, + "ðŁĴľðŁĴľ": 17280, + "ðŁĴľðŁĴľ": 28211, + "ðŁĴľðŁĴľðŁĴľ": 31004, + "ðŁĴĿ": 36761, + "ðŁĴĿ": 22002, + "ðŁĴŀ": 14862, + "ðŁĴŀ": 8988, + "ðŁĴŀðŁĴŀ": 36448, + "ðŁĴŁ": 49394, + "ðŁĴŁ": 28828, + "ðŁĴŃ": 33848, + "ðŁĵ": 1497, + "ðŁĵ¢": 46560, + "ðŁĵ¢": 20901, + "ðŁĵ£": 48841, + "ðŁĵ£": 21282, + "ðŁĵ°:": 28952, + "ðŁĵ°": 14985, + "ðŁĵ±": 36104, + "ðŁĵ±": 20824, + "ðŁĵ²": 19363, + "ðŁĵ·": 6966, + "ðŁĵ·:": 8294, + "ðŁĵ·": 5551, + "ðŁĵ·@": 40032, + "ðŁĵ¸": 8401, + "ðŁĵ¸:": 10379, + "ðŁĵ¸": 6074, + "ðŁĵ¸@": 39660, + "ðŁĵ¹": 49251, + "ðŁĵº": 21792, + "ðŁĵº:": 29728, + "ðŁĵº": 10450, + "ðŁĵ»": 32711, + "ðŁĵ»": 15882, + "ðŁĵ½": 45361, + "ðŁĵħ": 21277, + "ðŁĵĨ": 23471, + "ðŁĵĪ": 23359, + "ðŁĵĬ": 22244, + "ðŁĵĭ": 46351, + "ðŁĵĮ": 22289, + "ðŁĵį": 25043, + "ðŁĵį:": 36845, + "ðŁĵį": 8903, + "ðŁĵĸ": 49003, + "ðŁĵĸ": 23043, + "ðŁĵļ": 25433, + "ðŁĵļ": 15566, + "ðŁĵĿ": 31888, + "ðŁĵĿ:": 48398, + "ðŁĵĿ": 15853, + "ðŁĵŀ": 24022, + "ðŁĶ": 1428, + "ðŁĶ¥": 3191, + "ðŁĶ¥#": 44354, + "ðŁĶ¥": 3016, + "ðŁĶ¥ðŁĶ¥": 5692, + "ðŁĶ¥ðŁĶ¥": 11771, + "ðŁĶ¥ðŁĶ¥ðŁĶ¥": 11004, + "ðŁĶ¥ðŁĶ¥ðŁĶ¥ðŁĶ¥": 23408, + "ðŁĶ¥ðŁĶ¥ðŁĶ¥ðŁĶ¥": 30989, + "ðŁĶ¥ðŁĶ¥ðŁĶ¥ðŁĶ¥ðŁĶ¥": 48401, + "ðŁĶ¥ðŁĶĹ": 35130, + "ðŁĶª": 47078, + "ðŁĶª": 34545, + "ðŁĶ«": 38116, + "ðŁĶ«": 20583, + "ðŁĶ¬": 44227, + "ðŁĶ®": 38077, + "ðŁĶ´": 12408, + "ðŁĶ´": 10854, + "ðŁĶ´âļªï¸ı": 46879, + "ðŁĶ´âļªï¸ı": 40055, + "ðŁĶµ": 17531, + "ðŁĶµ": 17193, + "ðŁĶµâļªï¸ı": 42412, + "ðŁĶ¶": 42880, + "ðŁĶ¶": 36222, + "ðŁĶ·": 37740, + "ðŁĶ¸": 24200, + "ðŁĶ¹": 19995, + "ðŁĶº": 45561, + "ðŁĶģ": 41299, + "ðŁĶĬ": 32580, + "ðŁĶĬ": 20502, + "ðŁĶİ": 44935, + "ðŁĶij": 35127, + "ðŁĶĴ": 44972, + "ðŁĶĶ": 45753, + "ðŁĶĹ": 47475, + "ðŁĶĹ": 14561, + "ðŁĶĺ": 38995, + "ðŁĶľ": 36011, + "ðŁĶĿ": 44387, + "ðŁĶĿ": 29506, + "ðŁķ": 7692, + "ðŁķº": 33958, + "ðŁķĬ": 42624, + "ðŁķĬ": 37760, + "ðŁĸ": 6269, + "ðŁĸ¤": 17603, + "ðŁĸ¤": 10860, + "ðŁĸ¥": 47990, + "ðŁĹ": 7045, + "ðŁĹ£": 33232, + "ðŁĹ£": 18583, + "ðŁĹ£ï¸ı": 37476, + "ðŁĹĵ": 34335, + "ðŁĹĵ": 28773, + "ðŁĹĵï¸ı": 39847, + "ðŁĺ": 668, + "ðŁĺ¡": 21968, + "ðŁĺ¡": 17452, + "ðŁĺ¡ðŁĺ¡": 37223, + "ðŁĺ¢": 14308, + "ðŁĺ¢": 9925, + "ðŁĺ¢ðŁĺ¢": 32923, + "ðŁĺ¢ðŁĺ¢": 47921, + "ðŁĺ£": 32718, + "ðŁĺ¤": 26872, + "ðŁĺ¤": 20740, + "ðŁĺ¥": 38383, + "ðŁĺ¥": 23951, + "ðŁĺ¨": 38080, + "ðŁĺ©": 9051, + "ðŁĺ©": 9494, + "ðŁĺ©ðŁĺ©": 22820, + "ðŁĺ©ðŁĺ©": 38031, + "ðŁĺ©ðŁĺ©ðŁĺ©": 49063, + "ðŁĺª": 38181, + "ðŁĺª": 22243, + "ðŁĺ«": 25141, + "ðŁĺ«": 22340, + "ðŁĺ¬": 23704, + "ðŁĺ¬": 14549, + "ðŁĺ®": 40163, + "ðŁĺ®": 21616, + "ðŁĺ¯": 37858, + "ðŁĺ°": 34728, + "ðŁĺ±": 10938, + "ðŁĺ±": 9055, + "ðŁĺ±ðŁĺ±": 22061, + "ðŁĺ±ðŁĺ±": 40767, + "ðŁĺ±ðŁĺ±ðŁĺ±": 40909, + "ðŁĺ²": 40460, + "ðŁĺ²": 24620, + "ðŁĺ³": 12047, + "ðŁĺ³": 8223, + "ðŁĺ³ðŁĺ³": 32592, + "ðŁĺ´": 23527, + "ðŁĺ´": 16415, + "ðŁĺ´ðŁĺ´": 49307, + "ðŁĺµ": 39368, + "ðŁĺ¶": 35207, + "ðŁĺ·": 37943, + "ðŁĺ·": 25759, + "ðŁĺ¸": 36912, + "ðŁĺ¹": 26477, + "ðŁĺ¹": 26573, + "ðŁĺ¹ðŁĺ¹": 46287, + "ðŁĺº": 40613, + "ðŁĺ»": 15453, + "ðŁĺ»": 12911, + "ðŁĺ»ðŁĺ»": 34414, + "ðŁĺ¼": 44245, + "ðŁĺ½": 45156, + "ðŁĺĢ": 12832, + "ðŁĺĢ": 7334, + "ðŁĺĢðŁĺĢ": 34503, + "ðŁĺģ": 6967, + "ðŁĺģ": 4821, + "ðŁĺģðŁĺģ": 37900, + "ðŁĺģðŁĺģ": 19213, + "ðŁĺģðŁĺģðŁĺģ": 29083, + "ðŁĺĤ": 1424, + "ðŁĺĤ)": 42643, + "ðŁĺĤ.": 42550, + "ðŁĺĤ": 1558, + "ðŁĺĤâĿ¤ï¸ı": 36412, + "ðŁĺĤðŁijĮ": 42000, + "ðŁĺĤðŁĺĤ": 2286, + "ðŁĺĤðŁĺĤ": 4112, + "ðŁĺĤðŁĺĤðŁĺĤ": 22233, + "ðŁĺĤðŁĺĤðŁĺĤ": 4887, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 9936, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 11522, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 19295, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 33415, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 48973, + "ðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤðŁĺĤ": 28504, + "ðŁĺĤðŁĺį": 43128, + "ðŁĺĤðŁĺŃ": 28965, + "ðŁĺĤðŁĺŃ": 25802, + "ðŁĺĥ": 14079, + "ðŁĺĥ": 8520, + "ðŁĺĥðŁĺĥ": 38358, + "ðŁĺĦ": 12141, + "ðŁĺĦ": 7624, + "ðŁĺĦðŁĺĦ": 32312, + "ðŁĺħ": 15245, + "ðŁĺħ": 9188, + "ðŁĺħðŁĺħ": 39078, + "ðŁĺĨ": 16541, + "ðŁĺĨ": 10943, + "ðŁĺĨðŁĺĨ": 39503, + "ðŁĺĩ": 21694, + "ðŁĺĩ": 13091, + "ðŁĺĪ": 14377, + "ðŁĺĪ": 9756, + "ðŁĺĪðŁĺĪ": 44473, + "ðŁĺī": 9740, + "ðŁĺī": 4955, + "ðŁĺīðŁĺī": 40430, + "ðŁĺĬ": 4692, + "ðŁĺĬ": 3020, + "ðŁĺĬâĿ¤ï¸ı": 43606, + "ðŁĺĬðŁĺĬ": 12838, + "ðŁĺĬðŁĺĬ": 20842, + "ðŁĺĬðŁĺĬðŁĺĬ": 28685, + "ðŁĺĬðŁĺĬðŁĺĬðŁĺĬ": 35519, + "ðŁĺĭ": 12391, + "ðŁĺĭ": 7203, + "ðŁĺĭðŁĺĭ": 33304, + "ðŁĺĮ": 19221, + "ðŁĺĮ": 12163, + "ðŁĺį": 1796, + "ðŁĺį#": 42357, + "ðŁĺį.": 48579, + "ðŁĺį": 1754, + "ðŁĺįâĿ¤": 29122, + "ðŁĺįâĿ¤ï¸ı": 21945, + "ðŁĺįðŁijĮ": 41005, + "ðŁĺįðŁĴķ": 35946, + "ðŁĺįðŁĶ¥": 46648, + "ðŁĺįðŁĺĤ": 48715, + "ðŁĺįðŁĺį": 3663, + "ðŁĺįðŁĺį": 6471, + "ðŁĺįðŁĺįðŁĺį": 30614, + "ðŁĺįðŁĺįðŁĺį": 7703, + "ðŁĺįðŁĺįðŁĺįðŁĺį": 16603, + "ðŁĺįðŁĺįðŁĺįðŁĺį": 18925, + "ðŁĺįðŁĺįðŁĺįðŁĺįðŁĺį": 32078, + "ðŁĺįðŁĺįðŁĺįðŁĺįðŁĺįðŁĺįðŁĺįðŁĺį": 48683, + "ðŁĺįðŁĺĺ": 29646, + "ðŁĺįðŁĺĺ": 19849, + "ðŁĺįðŁĺŃ": 39555, + "ðŁĺİ": 7426, + "ðŁĺİ": 4345, + "ðŁĺİðŁĺİ": 24048, + "ðŁĺİðŁĺİðŁĺİ": 39742, + "ðŁĺı": 11624, + "ðŁĺı": 6909, + "ðŁĺıðŁĺı": 38151, + "ðŁĺIJ": 38586, + "ðŁĺIJ": 19618, + "ðŁĺij": 32469, + "ðŁĺij": 18937, + "ðŁĺĴ": 20792, + "ðŁĺĴ": 11702, + "ðŁĺĵ": 28733, + "ðŁĺĶ": 19532, + "ðŁĺĶ": 11432, + "ðŁĺķ": 45741, + "ðŁĺķ": 20602, + "ðŁĺĸ": 35006, + "ðŁĺĺ": 4240, + "ðŁĺĺ": 3352, + "ðŁĺĺâĿ¤": 48409, + "ðŁĺĺâĿ¤ï¸ı": 39150, + "ðŁĺĺðŁĺį": 38176, + "ðŁĺĺðŁĺĺ": 15663, + "ðŁĺĺðŁĺĺ": 10507, + "ðŁĺĺðŁĺĺðŁĺĺ": 20208, + "ðŁĺĺðŁĺĺðŁĺĺðŁĺĺ": 44892, + "ðŁĺĻ": 36201, + "ðŁĺĻ": 29209, + "ðŁĺļ": 24897, + "ðŁĺļ": 19102, + "ðŁĺĽ": 24550, + "ðŁĺĽ": 15745, + "ðŁĺľ": 13226, + "ðŁĺľ": 7830, + "ðŁĺľðŁĺľ": 43065, + "ðŁĺĿ": 20064, + "ðŁĺĿ": 12970, + "ðŁĺŀ": 40458, + "ðŁĺŀ": 21103, + "ðŁĺŁ": 46947, + "ðŁĺł": 34094, + "ðŁĺŃ": 2962, + "ðŁĺŃ": 3915, + "ðŁĺŃâĿ¤ï¸ı": 29567, + "ðŁĺŃðŁĴķ": 46306, + "ðŁĺŃðŁĺĤ": 38505, + "ðŁĺŃðŁĺį": 36893, + "ðŁĺŃðŁĺŃ": 5300, + "ðŁĺŃðŁĺŃ": 11834, + "ðŁĺŃðŁĺŃðŁĺŃ": 44089, + "ðŁĺŃðŁĺŃðŁĺŃ": 13116, + "ðŁĺŃðŁĺŃðŁĺŃðŁĺŃ": 19793, + "ðŁĺŃðŁĺŃðŁĺŃðŁĺŃ": 27322, + "ðŁĺŃðŁĺŃðŁĺŃðŁĺŃðŁĺŃ": 43366, + "ðŁĻ": 1478, + "ðŁĻĢ": 43092, + "ðŁĻĤ": 32006, + "ðŁĻĤ": 14860, + "ðŁĻĥ": 27222, + "ðŁĻĥ": 15652, + "ðŁĻĦ": 20648, + "ðŁĻĦ": 13049, + "ðŁĻħ": 42702, + "ðŁĻĨ": 30050, + "ðŁĻĨ": 35730, + "ðŁĻĪ": 12661, + "ðŁĻĪ": 9516, + "ðŁĻĪðŁĻĪ": 41796, + "ðŁĻĬ": 23684, + "ðŁĻĬ": 16636, + "ðŁĻĭ": 19193, + "ðŁĻĭ": 30274, + "ðŁĻĮ": 4366, + "ðŁĻĮ": 4855, + "ðŁĻĮðŁı»": 26756, + "ðŁĻĮðŁı»": 15799, + "ðŁĻĮðŁı¼": 26584, + "ðŁĻĮðŁı¼": 15364, + "ðŁĻĮðŁı½": 36660, + "ðŁĻĮðŁı½": 22962, + "ðŁĻĮðŁı¾": 38023, + "ðŁĻĮðŁı¾": 26466, + "ðŁĻĮðŁĻĮ": 21202, + "ðŁĻĮðŁĻĮ": 30430, + "ðŁĻĮðŁĻĮðŁĻĮ": 37127, + "ðŁĻı": 4260, + "ðŁĻı": 5503, + "ðŁĻıðŁı»": 25100, + "ðŁĻıðŁı»": 16650, + "ðŁĻıðŁı¼": 31163, + "ðŁĻıðŁı¼": 18952, + "ðŁĻıðŁı½": 34103, + "ðŁĻıðŁı½": 21540, + "ðŁĻıðŁı¾": 34277, + "ðŁĻıðŁı¾": 21979, + "ðŁĻıðŁĻı": 18227, + "ðŁĻıðŁĻı": 26510, + "ðŁĻıðŁĻıðŁĻı": 31702, + "ðŁļ": 2730, + "ðŁļ¨": 12198, + "ðŁļ¨": 6056, + "ðŁļ¨ðŁļ¨": 36487, + "ðŁļ¨ðŁļ¨": 21440, + "ðŁļ¨ðŁļ¨ðŁļ¨": 41515, + "ðŁļ©": 44514, + "ðŁļ«": 35291, + "ðŁļ²": 37085, + "ðŁļ´": 30825, + "ðŁļ¶": 46060, + "ðŁļĢ": 22400, + "ðŁļĢ": 13542, + "ðŁļĢðŁļĢ": 49033, + "ðŁļĤ": 38949, + "ðŁļĮ": 46891, + "ðŁļĹ": 33054, + "ðŁļĹ": 22783, + "ðŁļĺ": 35825, + "ðŁļĻ": 48487, + "ðŁĽ": 11306, + "ñ": 173, + "ñ": 429, + "ò": 174, + "ò": 430, + "ó": 175, + "ó": 431, + "ô": 176, + "ô": 432, + "õ": 177, + "õ": 433, + "ö": 178, + "ö": 434, + "÷": 179, + "÷": 435, + "ø": 180, + "ø": 436, + "ù": 181, + "ù": 437, + "ú": 182, + "ú": 438, + "û": 183, + "û": 439, + "ü": 184, + "ü": 440, + "ý": 185, + "ý": 441, + "þ": 186, + "þ": 442, + "ÿ": 187, + "ÿ": 443, + "Ā": 188, + "Ā": 444, + "ā": 189, + "ā": 445, + "Ă": 190, + "Ă": 446, + "ă": 191, + "ă": 447, + "Ą": 192, + "Ą": 448, + "ą": 193, + "ą": 449, + "Ć": 194, + "Ć": 450, + "ć": 195, + "ć": 451, + "Ĉ": 196, + "Ĉ": 452, + "ĉ": 197, + "ĉ": 453, + "Ċ": 198, + "Ċ": 454, + "ċ": 199, + "ċ": 455, + "Č": 200, + "Č": 456, + "č": 201, + "č": 457, + "Ď": 202, + "Ď": 458, + "ď": 203, + "ď": 459, + "Đ": 204, + "Đ": 460, + "đ": 205, + "đ": 461, + "Ē": 206, + "Ē": 462, + "ē": 207, + "ē": 463, + "Ĕ": 208, + "Ĕ": 464, + "ĕ": 209, + "ĕ": 465, + "Ė": 210, + "Ė": 466, + "ė": 211, + "ė": 467, + "Ę": 212, + "Ę": 468, + "ę": 213, + "ę": 469, + "Ě": 214, + "Ě": 470, + "ě": 215, + "ě": 471, + "Ĝ": 216, + "Ĝ": 472, + "ĝ": 217, + "ĝ": 473, + "Ğ": 218, + "Ğ": 474, + "ğ": 219, + "ğ": 475, + "Ġ": 220, + "Ġ": 476, + "ġ": 221, + "ġ": 477, + "Ģ": 222, + "Ģ": 478, + "Ģï¸ı": 9668, + "Ģï¸ı": 5511, + "ģ": 223, + "ģ": 479, + "ģà¸": 15016, + "Ĥ": 224, + "Ĥ": 480, + "Ĥâĸ": 29036, + "ĤâĸĤâĸ": 30832, + "ĥ": 225, + "ĥ": 481, + "Ħ": 226, + "Ħ": 482, + "Ħà¸": 20537, + "Ħë": 34462, + "Ħëĭ": 25170, + "ħ": 227, + "ħ": 483, + "ħï¸ı": 33950, + "Ĩ": 228, + "Ĩ": 484, + "ĩ": 229, + "ĩ": 485, + "Ī": 230, + "Ī": 486, + "ī": 231, + "ī": 487, + "īï¸ı": 37463, + "Ĭ": 232, + "Ĭ": 488, + "Ĭãģ": 30294, + "ĭ": 233, + "ĭ": 489, + "ĭãģ": 36218, + "ĭãĤ": 45737, + "Į": 234, + "Į": 490, + "ĮãĤĬãģ": 45969, + "ĮãĤĬãģŁãģĦ": 47021, + "Įë": 17003, + "į": 235, + "į": 491, + "İ": 236, + "İ": 492, + "ı": 237, + "ı": 493, + "IJ": 238, + "IJ": 494, + "ij": 239, + "ij": 495, + "Ĵ": 240, + "Ĵ": 496, + "ĵ": 241, + "ĵ": 497, + "Ķ": 242, + "Ķ": 498, + "Ķë": 37978, + "Ķï¸ı": 24395, + "Ķï¸ı": 7443, + "ķ": 243, + "ķ": 499, + "ķãĤ": 26609, + "ķï¸ı": 44853, + "ĸ": 244, + "ĸ": 500, + "ĸï¸ı": 28877, + "Ĺ": 245, + "Ĺ": 501, + "ĺ": 246, + "ĺ": 502, + "Ļ": 247, + "Ļ": 503, + "ļ": 248, + "ļ": 504, + "Ľ": 249, + "Ľ": 505, + "ľ": 250, + "ľ": 506, + "ľë": 39810, + "Ŀ": 251, + "Ŀ": 507, + "ŀ": 252, + "ŀ": 508, + "Ł": 253, + "Ł": 509, + "ŁãģĦ": 46023, + "ł": 254, + "ł": 510, + "łï¸ı": 27899, + "łï¸ı": 12715, + "łĪ": 43364, + "Ń": 255, + "Ń": 511 +} diff --git a/ldm_patched/modules/sd2_clip.py b/ldm_patched/modules/sd2_clip.py new file mode 100644 index 0000000000000000000000000000000000000000..2caec52bed46c429ca777ce216e9e33544545335 --- /dev/null +++ b/ldm_patched/modules/sd2_clip.py @@ -0,0 +1,28 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +from ldm_patched.modules import sd1_clip +import torch +import os + +class SD2ClipHModel(sd1_clip.SDClipModel): + def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): + if layer == "penultimate": + layer="hidden" + layer_idx=-2 + + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") + super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}) + +class SD2ClipHTokenizer(sd1_clip.SDTokenizer): + def __init__(self, tokenizer_path=None, embedding_directory=None): + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) + +class SD2Tokenizer(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None): + super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer) + +class SD2ClipModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, **kwargs): + super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs) diff --git a/ldm_patched/modules/sd2_clip_config.json b/ldm_patched/modules/sd2_clip_config.json new file mode 100644 index 0000000000000000000000000000000000000000..85cec832be9a1d0957245a8d125af398829f247e --- /dev/null +++ b/ldm_patched/modules/sd2_clip_config.json @@ -0,0 +1,23 @@ +{ + "architectures": [ + "CLIPTextModel" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "dropout": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 1024, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 77, + "model_type": "clip_text_model", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "pad_token_id": 1, + "projection_dim": 1024, + "torch_dtype": "float32", + "vocab_size": 49408 +} diff --git a/ldm_patched/modules/sdxl_clip.py b/ldm_patched/modules/sdxl_clip.py new file mode 100644 index 0000000000000000000000000000000000000000..8839e7bbc7dde0087b175f9fc0e9b07b9e636a65 --- /dev/null +++ b/ldm_patched/modules/sdxl_clip.py @@ -0,0 +1,70 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +from ldm_patched.modules import sd1_clip +import torch +import os + +class SDXLClipG(sd1_clip.SDClipModel): + def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): + if layer == "penultimate": + layer="hidden" + layer_idx=-2 + + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") + super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, + special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) + + def load_sd(self, sd): + return super().load_sd(sd) + +class SDXLClipGTokenizer(sd1_clip.SDTokenizer): + def __init__(self, tokenizer_path=None, embedding_directory=None): + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') + + +class SDXLTokenizer: + def __init__(self, embedding_directory=None): + self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory) + self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) + + def tokenize_with_weights(self, text:str, return_word_ids=False): + out = {} + out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) + out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) + return out + + def untokenize(self, token_weight_pair): + return self.clip_g.untokenize(token_weight_pair) + +class SDXLClipModel(torch.nn.Module): + def __init__(self, device="cpu", dtype=None): + super().__init__() + self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False) + self.clip_g = SDXLClipG(device=device, dtype=dtype) + + def clip_layer(self, layer_idx): + self.clip_l.clip_layer(layer_idx) + self.clip_g.clip_layer(layer_idx) + + def reset_clip_layer(self): + self.clip_g.reset_clip_layer() + self.clip_l.reset_clip_layer() + + def encode_token_weights(self, token_weight_pairs): + token_weight_pairs_g = token_weight_pairs["g"] + token_weight_pairs_l = token_weight_pairs["l"] + g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) + l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) + return torch.cat([l_out, g_out], dim=-1), g_pooled + + def load_sd(self, sd): + if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: + return self.clip_g.load_sd(sd) + else: + return self.clip_l.load_sd(sd) + +class SDXLRefinerClipModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None): + super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG) diff --git a/ldm_patched/modules/supported_models.py b/ldm_patched/modules/supported_models.py new file mode 100644 index 0000000000000000000000000000000000000000..de21c10d4e3aea67abb4e9e2c192bd70aaa7f77d --- /dev/null +++ b/ldm_patched/modules/supported_models.py @@ -0,0 +1,313 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI + + +import torch +from . import model_base +from . import utils + +from . import sd1_clip +from . import sd2_clip +from . import sdxl_clip + +from . import supported_models_base +from . import latent_formats + +from . import diffusers_convert + +class SD15(supported_models_base.BASE): + unet_config = { + "context_dim": 768, + "model_channels": 320, + "use_linear_in_transformer": False, + "adm_in_channels": None, + "use_temporal_attention": False, + } + + unet_extra_config = { + "num_heads": 8, + "num_head_channels": -1, + } + + latent_format = latent_formats.SD15 + + def process_clip_state_dict(self, state_dict): + k = list(state_dict.keys()) + for x in k: + if x.startswith("cond_stage_model.transformer.") and not x.startswith("cond_stage_model.transformer.text_model."): + y = x.replace("cond_stage_model.transformer.", "cond_stage_model.transformer.text_model.") + state_dict[y] = state_dict.pop(x) + + if 'cond_stage_model.transformer.text_model.embeddings.position_ids' in state_dict: + ids = state_dict['cond_stage_model.transformer.text_model.embeddings.position_ids'] + if ids.dtype == torch.float32: + state_dict['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round() + + replace_prefix = {} + replace_prefix["cond_stage_model."] = "cond_stage_model.clip_l." + state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix) + return state_dict + + def process_clip_state_dict_for_saving(self, state_dict): + replace_prefix = {"clip_l.": "cond_stage_model."} + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def clip_target(self): + return supported_models_base.ClipTarget(sd1_clip.SD1Tokenizer, sd1_clip.SD1ClipModel) + +class SD20(supported_models_base.BASE): + unet_config = { + "context_dim": 1024, + "model_channels": 320, + "use_linear_in_transformer": True, + "adm_in_channels": None, + "use_temporal_attention": False, + } + + latent_format = latent_formats.SD15 + + def model_type(self, state_dict, prefix=""): + if self.unet_config["in_channels"] == 4: #SD2.0 inpainting models are not v prediction + k = "{}output_blocks.11.1.transformer_blocks.0.norm1.bias".format(prefix) + out = state_dict[k] + if torch.std(out, unbiased=False) > 0.09: # not sure how well this will actually work. I guess we will find out. + return model_base.ModelType.V_PREDICTION + return model_base.ModelType.EPS + + def process_clip_state_dict(self, state_dict): + replace_prefix = {} + replace_prefix["conditioner.embedders.0.model."] = "cond_stage_model.model." #SD2 in sgm format + state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix) + + state_dict = utils.transformers_convert(state_dict, "cond_stage_model.model.", "cond_stage_model.clip_h.transformer.text_model.", 24) + return state_dict + + def process_clip_state_dict_for_saving(self, state_dict): + replace_prefix = {} + replace_prefix["clip_h"] = "cond_stage_model.model" + state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix) + state_dict = diffusers_convert.convert_text_enc_state_dict_v20(state_dict) + return state_dict + + def clip_target(self): + return supported_models_base.ClipTarget(sd2_clip.SD2Tokenizer, sd2_clip.SD2ClipModel) + +class SD21UnclipL(SD20): + unet_config = { + "context_dim": 1024, + "model_channels": 320, + "use_linear_in_transformer": True, + "adm_in_channels": 1536, + "use_temporal_attention": False, + } + + clip_vision_prefix = "embedder.model.visual." + noise_aug_config = {"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 768} + + +class SD21UnclipH(SD20): + unet_config = { + "context_dim": 1024, + "model_channels": 320, + "use_linear_in_transformer": True, + "adm_in_channels": 2048, + "use_temporal_attention": False, + } + + clip_vision_prefix = "embedder.model.visual." + noise_aug_config = {"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1024} + +class SDXLRefiner(supported_models_base.BASE): + unet_config = { + "model_channels": 384, + "use_linear_in_transformer": True, + "context_dim": 1280, + "adm_in_channels": 2560, + "transformer_depth": [0, 0, 4, 4, 4, 4, 0, 0], + "use_temporal_attention": False, + } + + latent_format = latent_formats.SDXL + + def get_model(self, state_dict, prefix="", device=None): + return model_base.SDXLRefiner(self, device=device) + + def process_clip_state_dict(self, state_dict): + keys_to_replace = {} + replace_prefix = {} + + state_dict = utils.transformers_convert(state_dict, "conditioner.embedders.0.model.", "cond_stage_model.clip_g.transformer.text_model.", 32) + keys_to_replace["conditioner.embedders.0.model.text_projection"] = "cond_stage_model.clip_g.text_projection" + keys_to_replace["conditioner.embedders.0.model.logit_scale"] = "cond_stage_model.clip_g.logit_scale" + + state_dict = utils.state_dict_key_replace(state_dict, keys_to_replace) + return state_dict + + def process_clip_state_dict_for_saving(self, state_dict): + replace_prefix = {} + state_dict_g = diffusers_convert.convert_text_enc_state_dict_v20(state_dict, "clip_g") + if "clip_g.transformer.text_model.embeddings.position_ids" in state_dict_g: + state_dict_g.pop("clip_g.transformer.text_model.embeddings.position_ids") + replace_prefix["clip_g"] = "conditioner.embedders.0.model" + state_dict_g = utils.state_dict_prefix_replace(state_dict_g, replace_prefix) + return state_dict_g + + def clip_target(self): + return supported_models_base.ClipTarget(sdxl_clip.SDXLTokenizer, sdxl_clip.SDXLRefinerClipModel) + +class SDXL(supported_models_base.BASE): + unet_config = { + "model_channels": 320, + "use_linear_in_transformer": True, + "transformer_depth": [0, 0, 2, 2, 10, 10], + "context_dim": 2048, + "adm_in_channels": 2816, + "use_temporal_attention": False, + } + + latent_format = latent_formats.SDXL + + def model_type(self, state_dict, prefix=""): + if "v_pred" in state_dict: + return model_base.ModelType.V_PREDICTION + else: + return model_base.ModelType.EPS + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.SDXL(self, model_type=self.model_type(state_dict, prefix), device=device) + if self.inpaint_model(): + out.set_inpaint() + return out + + def process_clip_state_dict(self, state_dict): + keys_to_replace = {} + replace_prefix = {} + + replace_prefix["conditioner.embedders.0.transformer.text_model"] = "cond_stage_model.clip_l.transformer.text_model" + state_dict = utils.transformers_convert(state_dict, "conditioner.embedders.1.model.", "cond_stage_model.clip_g.transformer.text_model.", 32) + keys_to_replace["conditioner.embedders.1.model.text_projection"] = "cond_stage_model.clip_g.text_projection" + keys_to_replace["conditioner.embedders.1.model.text_projection.weight"] = "cond_stage_model.clip_g.text_projection" + keys_to_replace["conditioner.embedders.1.model.logit_scale"] = "cond_stage_model.clip_g.logit_scale" + + state_dict = utils.state_dict_prefix_replace(state_dict, replace_prefix) + state_dict = utils.state_dict_key_replace(state_dict, keys_to_replace) + return state_dict + + def process_clip_state_dict_for_saving(self, state_dict): + replace_prefix = {} + keys_to_replace = {} + state_dict_g = diffusers_convert.convert_text_enc_state_dict_v20(state_dict, "clip_g") + if "clip_g.transformer.text_model.embeddings.position_ids" in state_dict_g: + state_dict_g.pop("clip_g.transformer.text_model.embeddings.position_ids") + for k in state_dict: + if k.startswith("clip_l"): + state_dict_g[k] = state_dict[k] + + replace_prefix["clip_g"] = "conditioner.embedders.1.model" + replace_prefix["clip_l"] = "conditioner.embedders.0" + state_dict_g = utils.state_dict_prefix_replace(state_dict_g, replace_prefix) + return state_dict_g + + def clip_target(self): + return supported_models_base.ClipTarget(sdxl_clip.SDXLTokenizer, sdxl_clip.SDXLClipModel) + +class SSD1B(SDXL): + unet_config = { + "model_channels": 320, + "use_linear_in_transformer": True, + "transformer_depth": [0, 0, 2, 2, 4, 4], + "context_dim": 2048, + "adm_in_channels": 2816, + "use_temporal_attention": False, + } + +class Segmind_Vega(SDXL): + unet_config = { + "model_channels": 320, + "use_linear_in_transformer": True, + "transformer_depth": [0, 0, 1, 1, 2, 2], + "context_dim": 2048, + "adm_in_channels": 2816, + "use_temporal_attention": False, + } + +class SVD_img2vid(supported_models_base.BASE): + unet_config = { + "model_channels": 320, + "in_channels": 8, + "use_linear_in_transformer": True, + "transformer_depth": [1, 1, 1, 1, 1, 1, 0, 0], + "context_dim": 1024, + "adm_in_channels": 768, + "use_temporal_attention": True, + "use_temporal_resblock": True + } + + clip_vision_prefix = "conditioner.embedders.0.open_clip.model.visual." + + latent_format = latent_formats.SD15 + + sampling_settings = {"sigma_max": 700.0, "sigma_min": 0.002} + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.SVD_img2vid(self, device=device) + return out + + def clip_target(self): + return None + +class Stable_Zero123(supported_models_base.BASE): + unet_config = { + "context_dim": 768, + "model_channels": 320, + "use_linear_in_transformer": False, + "adm_in_channels": None, + "use_temporal_attention": False, + "in_channels": 8, + } + + unet_extra_config = { + "num_heads": 8, + "num_head_channels": -1, + } + + clip_vision_prefix = "cond_stage_model.model.visual." + + latent_format = latent_formats.SD15 + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.Stable_Zero123(self, device=device, cc_projection_weight=state_dict["cc_projection.weight"], cc_projection_bias=state_dict["cc_projection.bias"]) + return out + + def clip_target(self): + return None + +class SD_X4Upscaler(SD20): + unet_config = { + "context_dim": 1024, + "model_channels": 256, + 'in_channels': 7, + "use_linear_in_transformer": True, + "adm_in_channels": None, + "use_temporal_attention": False, + } + + unet_extra_config = { + "disable_self_attentions": [True, True, True, False], + "num_classes": 1000, + "num_heads": 8, + "num_head_channels": -1, + } + + latent_format = latent_formats.SD_X4 + + sampling_settings = { + "linear_start": 0.0001, + "linear_end": 0.02, + } + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.SD_X4Upscaler(self, device=device) + return out + +models = [Stable_Zero123, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXLRefiner, SDXL, SSD1B, Segmind_Vega, SD_X4Upscaler] +models += [SVD_img2vid] diff --git a/ldm_patched/modules/supported_models_base.py b/ldm_patched/modules/supported_models_base.py new file mode 100644 index 0000000000000000000000000000000000000000..e94420df57cf3ebe8528d9db23d5052235ab9600 --- /dev/null +++ b/ldm_patched/modules/supported_models_base.py @@ -0,0 +1,87 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI + + +import torch +from . import model_base +from . import utils +from . import latent_formats + +class ClipTarget: + def __init__(self, tokenizer, clip): + self.clip = clip + self.tokenizer = tokenizer + self.params = {} + +class BASE: + unet_config = {} + unet_extra_config = { + "num_heads": -1, + "num_head_channels": 64, + } + + clip_prefix = [] + clip_vision_prefix = None + noise_aug_config = None + sampling_settings = {} + latent_format = latent_formats.LatentFormat + vae_key_prefix = ["first_stage_model."] + + manual_cast_dtype = None + + @classmethod + def matches(s, unet_config): + for k in s.unet_config: + if s.unet_config[k] != unet_config[k]: + return False + return True + + def model_type(self, state_dict, prefix=""): + return model_base.ModelType.EPS + + def inpaint_model(self): + return self.unet_config["in_channels"] > 4 + + def __init__(self, unet_config): + self.unet_config = unet_config + self.latent_format = self.latent_format() + for x in self.unet_extra_config: + self.unet_config[x] = self.unet_extra_config[x] + + def get_model(self, state_dict, prefix="", device=None): + if self.noise_aug_config is not None: + out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) + else: + out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) + if self.inpaint_model(): + out.set_inpaint() + return out + + def process_clip_state_dict(self, state_dict): + return state_dict + + def process_unet_state_dict(self, state_dict): + return state_dict + + def process_vae_state_dict(self, state_dict): + return state_dict + + def process_clip_state_dict_for_saving(self, state_dict): + replace_prefix = {"": "cond_stage_model."} + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def process_clip_vision_state_dict_for_saving(self, state_dict): + replace_prefix = {} + if self.clip_vision_prefix is not None: + replace_prefix[""] = self.clip_vision_prefix + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def process_unet_state_dict_for_saving(self, state_dict): + replace_prefix = {"": "model.diffusion_model."} + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def process_vae_state_dict_for_saving(self, state_dict): + replace_prefix = {"": "first_stage_model."} + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def set_manual_cast(self, manual_cast_dtype): + self.manual_cast_dtype = manual_cast_dtype diff --git a/ldm_patched/modules/utils.py b/ldm_patched/modules/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..06a3c8262b596b45d2b29a35f540f4a686bdf10f --- /dev/null +++ b/ldm_patched/modules/utils.py @@ -0,0 +1,479 @@ +# 1st edit https://github.com/comfyanonymous/ComfyUI +# 2nd edit by Forge + + +import torch +import math +import struct +import ldm_patched.modules.checkpoint_pickle +import safetensors.torch +import numpy as np +from PIL import Image +from tqdm import tqdm + +def load_torch_file(ckpt, safe_load=False, device=None): + if device is None: + device = torch.device("cpu") + if ckpt.lower().endswith(".safetensors"): + sd = safetensors.torch.load_file(ckpt, device=device.type) + else: + if safe_load: + if not 'weights_only' in torch.load.__code__.co_varnames: + print("Warning torch.load doesn't support weights_only on this pytorch version, loading unsafely.") + safe_load = False + if safe_load: + pl_sd = torch.load(ckpt, map_location=device, weights_only=True) + else: + pl_sd = torch.load(ckpt, map_location=device, pickle_module=ldm_patched.modules.checkpoint_pickle) + if "global_step" in pl_sd: + print(f"Global Step: {pl_sd['global_step']}") + if "state_dict" in pl_sd: + sd = pl_sd["state_dict"] + else: + sd = pl_sd + return sd + +def save_torch_file(sd, ckpt, metadata=None): + if metadata is not None: + safetensors.torch.save_file(sd, ckpt, metadata=metadata) + else: + safetensors.torch.save_file(sd, ckpt) + +def calculate_parameters(sd, prefix=""): + params = 0 + for k in sd.keys(): + if k.startswith(prefix): + params += sd[k].nelement() + return params + +def state_dict_key_replace(state_dict, keys_to_replace): + for x in keys_to_replace: + if x in state_dict: + state_dict[keys_to_replace[x]] = state_dict.pop(x) + return state_dict + +def state_dict_prefix_replace(state_dict, replace_prefix, filter_keys=False): + if filter_keys: + out = {} + else: + out = state_dict + for rp in replace_prefix: + replace = list(map(lambda a: (a, "{}{}".format(replace_prefix[rp], a[len(rp):])), filter(lambda a: a.startswith(rp), state_dict.keys()))) + for x in replace: + w = state_dict.pop(x[0]) + out[x[1]] = w + return out + + +def transformers_convert(sd, prefix_from, prefix_to, number): + keys_to_replace = { + "{}positional_embedding": "{}embeddings.position_embedding.weight", + "{}token_embedding.weight": "{}embeddings.token_embedding.weight", + "{}ln_final.weight": "{}final_layer_norm.weight", + "{}ln_final.bias": "{}final_layer_norm.bias", + } + + for k in keys_to_replace: + x = k.format(prefix_from) + if x in sd: + sd[keys_to_replace[k].format(prefix_to)] = sd.pop(x) + + resblock_to_replace = { + "ln_1": "layer_norm1", + "ln_2": "layer_norm2", + "mlp.c_fc": "mlp.fc1", + "mlp.c_proj": "mlp.fc2", + "attn.out_proj": "self_attn.out_proj", + } + + for resblock in range(number): + for x in resblock_to_replace: + for y in ["weight", "bias"]: + k = "{}transformer.resblocks.{}.{}.{}".format(prefix_from, resblock, x, y) + k_to = "{}encoder.layers.{}.{}.{}".format(prefix_to, resblock, resblock_to_replace[x], y) + if k in sd: + sd[k_to] = sd.pop(k) + + for y in ["weight", "bias"]: + k_from = "{}transformer.resblocks.{}.attn.in_proj_{}".format(prefix_from, resblock, y) + if k_from in sd: + weights = sd.pop(k_from) + shape_from = weights.shape[0] // 3 + for x in range(3): + p = ["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj"] + k_to = "{}encoder.layers.{}.{}.{}".format(prefix_to, resblock, p[x], y) + sd[k_to] = weights[shape_from*x:shape_from*(x + 1)] + return sd + +UNET_MAP_ATTENTIONS = { + "proj_in.weight", + "proj_in.bias", + "proj_out.weight", + "proj_out.bias", + "norm.weight", + "norm.bias", +} + +TRANSFORMER_BLOCKS = { + "norm1.weight", + "norm1.bias", + "norm2.weight", + "norm2.bias", + "norm3.weight", + "norm3.bias", + "attn1.to_q.weight", + "attn1.to_k.weight", + "attn1.to_v.weight", + "attn1.to_out.0.weight", + "attn1.to_out.0.bias", + "attn2.to_q.weight", + "attn2.to_k.weight", + "attn2.to_v.weight", + "attn2.to_out.0.weight", + "attn2.to_out.0.bias", + "ff.net.0.proj.weight", + "ff.net.0.proj.bias", + "ff.net.2.weight", + "ff.net.2.bias", +} + +UNET_MAP_RESNET = { + "in_layers.2.weight": "conv1.weight", + "in_layers.2.bias": "conv1.bias", + "emb_layers.1.weight": "time_emb_proj.weight", + "emb_layers.1.bias": "time_emb_proj.bias", + "out_layers.3.weight": "conv2.weight", + "out_layers.3.bias": "conv2.bias", + "skip_connection.weight": "conv_shortcut.weight", + "skip_connection.bias": "conv_shortcut.bias", + "in_layers.0.weight": "norm1.weight", + "in_layers.0.bias": "norm1.bias", + "out_layers.0.weight": "norm2.weight", + "out_layers.0.bias": "norm2.bias", +} + +UNET_MAP_BASIC = { + ("label_emb.0.0.weight", "class_embedding.linear_1.weight"), + ("label_emb.0.0.bias", "class_embedding.linear_1.bias"), + ("label_emb.0.2.weight", "class_embedding.linear_2.weight"), + ("label_emb.0.2.bias", "class_embedding.linear_2.bias"), + ("label_emb.0.0.weight", "add_embedding.linear_1.weight"), + ("label_emb.0.0.bias", "add_embedding.linear_1.bias"), + ("label_emb.0.2.weight", "add_embedding.linear_2.weight"), + ("label_emb.0.2.bias", "add_embedding.linear_2.bias"), + ("input_blocks.0.0.weight", "conv_in.weight"), + ("input_blocks.0.0.bias", "conv_in.bias"), + ("out.0.weight", "conv_norm_out.weight"), + ("out.0.bias", "conv_norm_out.bias"), + ("out.2.weight", "conv_out.weight"), + ("out.2.bias", "conv_out.bias"), + ("time_embed.0.weight", "time_embedding.linear_1.weight"), + ("time_embed.0.bias", "time_embedding.linear_1.bias"), + ("time_embed.2.weight", "time_embedding.linear_2.weight"), + ("time_embed.2.bias", "time_embedding.linear_2.bias") +} + +def unet_to_diffusers(unet_config): + num_res_blocks = unet_config["num_res_blocks"] + channel_mult = unet_config["channel_mult"] + transformer_depth = unet_config["transformer_depth"][:] + transformer_depth_output = unet_config["transformer_depth_output"][:] + num_blocks = len(channel_mult) + + transformers_mid = unet_config.get("transformer_depth_middle", None) + + diffusers_unet_map = {} + for x in range(num_blocks): + n = 1 + (num_res_blocks[x] + 1) * x + for i in range(num_res_blocks[x]): + for b in UNET_MAP_RESNET: + diffusers_unet_map["down_blocks.{}.resnets.{}.{}".format(x, i, UNET_MAP_RESNET[b])] = "input_blocks.{}.0.{}".format(n, b) + num_transformers = transformer_depth.pop(0) + if num_transformers > 0: + for b in UNET_MAP_ATTENTIONS: + diffusers_unet_map["down_blocks.{}.attentions.{}.{}".format(x, i, b)] = "input_blocks.{}.1.{}".format(n, b) + for t in range(num_transformers): + for b in TRANSFORMER_BLOCKS: + diffusers_unet_map["down_blocks.{}.attentions.{}.transformer_blocks.{}.{}".format(x, i, t, b)] = "input_blocks.{}.1.transformer_blocks.{}.{}".format(n, t, b) + n += 1 + for k in ["weight", "bias"]: + diffusers_unet_map["down_blocks.{}.downsamplers.0.conv.{}".format(x, k)] = "input_blocks.{}.0.op.{}".format(n, k) + + i = 0 + for b in UNET_MAP_ATTENTIONS: + diffusers_unet_map["mid_block.attentions.{}.{}".format(i, b)] = "middle_block.1.{}".format(b) + for t in range(transformers_mid): + for b in TRANSFORMER_BLOCKS: + diffusers_unet_map["mid_block.attentions.{}.transformer_blocks.{}.{}".format(i, t, b)] = "middle_block.1.transformer_blocks.{}.{}".format(t, b) + + for i, n in enumerate([0, 2]): + for b in UNET_MAP_RESNET: + diffusers_unet_map["mid_block.resnets.{}.{}".format(i, UNET_MAP_RESNET[b])] = "middle_block.{}.{}".format(n, b) + + num_res_blocks = list(reversed(num_res_blocks)) + for x in range(num_blocks): + n = (num_res_blocks[x] + 1) * x + l = num_res_blocks[x] + 1 + for i in range(l): + c = 0 + for b in UNET_MAP_RESNET: + diffusers_unet_map["up_blocks.{}.resnets.{}.{}".format(x, i, UNET_MAP_RESNET[b])] = "output_blocks.{}.0.{}".format(n, b) + c += 1 + num_transformers = transformer_depth_output.pop() + if num_transformers > 0: + c += 1 + for b in UNET_MAP_ATTENTIONS: + diffusers_unet_map["up_blocks.{}.attentions.{}.{}".format(x, i, b)] = "output_blocks.{}.1.{}".format(n, b) + for t in range(num_transformers): + for b in TRANSFORMER_BLOCKS: + diffusers_unet_map["up_blocks.{}.attentions.{}.transformer_blocks.{}.{}".format(x, i, t, b)] = "output_blocks.{}.1.transformer_blocks.{}.{}".format(n, t, b) + if i == l - 1: + for k in ["weight", "bias"]: + diffusers_unet_map["up_blocks.{}.upsamplers.0.conv.{}".format(x, k)] = "output_blocks.{}.{}.conv.{}".format(n, c, k) + n += 1 + + for k in UNET_MAP_BASIC: + diffusers_unet_map[k[1]] = k[0] + + return diffusers_unet_map + +def repeat_to_batch_size(tensor, batch_size): + if tensor.shape[0] > batch_size: + return tensor[:batch_size] + elif tensor.shape[0] < batch_size: + return tensor.repeat([math.ceil(batch_size / tensor.shape[0])] + [1] * (len(tensor.shape) - 1))[:batch_size] + return tensor + +def resize_to_batch_size(tensor, batch_size): + in_batch_size = tensor.shape[0] + if in_batch_size == batch_size: + return tensor + + if batch_size <= 1: + return tensor[:batch_size] + + output = torch.empty([batch_size] + list(tensor.shape)[1:], dtype=tensor.dtype, device=tensor.device) + if batch_size < in_batch_size: + scale = (in_batch_size - 1) / (batch_size - 1) + for i in range(batch_size): + output[i] = tensor[min(round(i * scale), in_batch_size - 1)] + else: + scale = in_batch_size / batch_size + for i in range(batch_size): + output[i] = tensor[min(math.floor((i + 0.5) * scale), in_batch_size - 1)] + + return output + +def convert_sd_to(state_dict, dtype): + keys = list(state_dict.keys()) + for k in keys: + state_dict[k] = state_dict[k].to(dtype) + return state_dict + +def safetensors_header(safetensors_path, max_size=100*1024*1024): + with open(safetensors_path, "rb") as f: + header = f.read(8) + length_of_header = struct.unpack(' max_size: + return None + return f.read(length_of_header) + +def set_attr(obj, attr, value): + attrs = attr.split(".") + for name in attrs[:-1]: + obj = getattr(obj, name) + prev = getattr(obj, attrs[-1]) + setattr(obj, attrs[-1], torch.nn.Parameter(value, requires_grad=False)) + del prev + +def set_attr_raw(obj, attr, value): + attrs = attr.split(".") + for name in attrs[:-1]: + obj = getattr(obj, name) + setattr(obj, attrs[-1], value) + +def copy_to_param(obj, attr, value): + # inplace update tensor instead of replacing it + attrs = attr.split(".") + for name in attrs[:-1]: + obj = getattr(obj, name) + prev = getattr(obj, attrs[-1]) + prev.data.copy_(value) + +def get_attr(obj, attr): + attrs = attr.split(".") + for name in attrs: + obj = getattr(obj, name) + return obj + +def bislerp(samples, width, height): + def slerp(b1, b2, r): + '''slerps batches b1, b2 according to ratio r, batches should be flat e.g. NxC''' + + c = b1.shape[-1] + + #norms + b1_norms = torch.norm(b1, dim=-1, keepdim=True) + b2_norms = torch.norm(b2, dim=-1, keepdim=True) + + #normalize + b1_normalized = b1 / b1_norms + b2_normalized = b2 / b2_norms + + #zero when norms are zero + b1_normalized[b1_norms.expand(-1,c) == 0.0] = 0.0 + b2_normalized[b2_norms.expand(-1,c) == 0.0] = 0.0 + + #slerp + dot = (b1_normalized*b2_normalized).sum(1) + omega = torch.acos(dot) + so = torch.sin(omega) + + #technically not mathematically correct, but more pleasing? + res = (torch.sin((1.0-r.squeeze(1))*omega)/so).unsqueeze(1)*b1_normalized + (torch.sin(r.squeeze(1)*omega)/so).unsqueeze(1) * b2_normalized + res *= (b1_norms * (1.0-r) + b2_norms * r).expand(-1,c) + + #edge cases for same or polar opposites + res[dot > 1 - 1e-5] = b1[dot > 1 - 1e-5] + res[dot < 1e-5 - 1] = (b1 * (1.0-r) + b2 * r)[dot < 1e-5 - 1] + return res + + def generate_bilinear_data(length_old, length_new, device): + coords_1 = torch.arange(length_old, dtype=torch.float32, device=device).reshape((1,1,1,-1)) + coords_1 = torch.nn.functional.interpolate(coords_1, size=(1, length_new), mode="bilinear") + ratios = coords_1 - coords_1.floor() + coords_1 = coords_1.to(torch.int64) + + coords_2 = torch.arange(length_old, dtype=torch.float32, device=device).reshape((1,1,1,-1)) + 1 + coords_2[:,:,:,-1] -= 1 + coords_2 = torch.nn.functional.interpolate(coords_2, size=(1, length_new), mode="bilinear") + coords_2 = coords_2.to(torch.int64) + return ratios, coords_1, coords_2 + + orig_dtype = samples.dtype + samples = samples.float() + n,c,h,w = samples.shape + h_new, w_new = (height, width) + + #linear w + ratios, coords_1, coords_2 = generate_bilinear_data(w, w_new, samples.device) + coords_1 = coords_1.expand((n, c, h, -1)) + coords_2 = coords_2.expand((n, c, h, -1)) + ratios = ratios.expand((n, 1, h, -1)) + + pass_1 = samples.gather(-1,coords_1).movedim(1, -1).reshape((-1,c)) + pass_2 = samples.gather(-1,coords_2).movedim(1, -1).reshape((-1,c)) + ratios = ratios.movedim(1, -1).reshape((-1,1)) + + result = slerp(pass_1, pass_2, ratios) + result = result.reshape(n, h, w_new, c).movedim(-1, 1) + + #linear h + ratios, coords_1, coords_2 = generate_bilinear_data(h, h_new, samples.device) + coords_1 = coords_1.reshape((1,1,-1,1)).expand((n, c, -1, w_new)) + coords_2 = coords_2.reshape((1,1,-1,1)).expand((n, c, -1, w_new)) + ratios = ratios.reshape((1,1,-1,1)).expand((n, 1, -1, w_new)) + + pass_1 = result.gather(-2,coords_1).movedim(1, -1).reshape((-1,c)) + pass_2 = result.gather(-2,coords_2).movedim(1, -1).reshape((-1,c)) + ratios = ratios.movedim(1, -1).reshape((-1,1)) + + result = slerp(pass_1, pass_2, ratios) + result = result.reshape(n, h_new, w_new, c).movedim(-1, 1) + return result.to(orig_dtype) + +def lanczos(samples, width, height): + images = [Image.fromarray(np.clip(255. * image.movedim(0, -1).cpu().numpy(), 0, 255).astype(np.uint8)) for image in samples] + images = [image.resize((width, height), resample=Image.Resampling.LANCZOS) for image in images] + images = [torch.from_numpy(np.array(image).astype(np.float32) / 255.0).movedim(-1, 0) for image in images] + result = torch.stack(images) + return result.to(samples.device, samples.dtype) + +def common_upscale(samples, width, height, upscale_method, crop): + if crop == "center": + old_width = samples.shape[3] + old_height = samples.shape[2] + old_aspect = old_width / old_height + new_aspect = width / height + x = 0 + y = 0 + if old_aspect > new_aspect: + x = round((old_width - old_width * (new_aspect / old_aspect)) / 2) + elif old_aspect < new_aspect: + y = round((old_height - old_height * (old_aspect / new_aspect)) / 2) + s = samples[:,:,y:old_height-y,x:old_width-x] + else: + s = samples + + if upscale_method == "bislerp": + return bislerp(s, width, height) + elif upscale_method == "lanczos": + return lanczos(s, width, height) + else: + return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method) + +def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap): + return math.ceil((height / (tile_y - overlap))) * math.ceil((width / (tile_x - overlap))) + +@torch.inference_mode() +def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None): + output = torch.empty((samples.shape[0], out_channels, round(samples.shape[2] * upscale_amount), round(samples.shape[3] * upscale_amount)), device=output_device) + for b in range(samples.shape[0]): + s = samples[b:b+1] + out = torch.zeros((s.shape[0], out_channels, round(s.shape[2] * upscale_amount), round(s.shape[3] * upscale_amount)), device=output_device) + out_div = torch.zeros((s.shape[0], out_channels, round(s.shape[2] * upscale_amount), round(s.shape[3] * upscale_amount)), device=output_device) + for y in range(0, s.shape[2], tile_y - overlap): + for x in range(0, s.shape[3], tile_x - overlap): + x = max(0, min(s.shape[-1] - overlap, x)) + y = max(0, min(s.shape[-2] - overlap, y)) + s_in = s[:,:,y:y+tile_y,x:x+tile_x] + + ps = function(s_in).to(output_device) + mask = torch.ones_like(ps) + feather = round(overlap * upscale_amount) + for t in range(feather): + mask[:,:,t:1+t,:] *= ((1.0/feather) * (t + 1)) + mask[:,:,mask.shape[2] -1 -t: mask.shape[2]-t,:] *= ((1.0/feather) * (t + 1)) + mask[:,:,:,t:1+t] *= ((1.0/feather) * (t + 1)) + mask[:,:,:,mask.shape[3]- 1 - t: mask.shape[3]- t] *= ((1.0/feather) * (t + 1)) + out[:,:,round(y*upscale_amount):round((y+tile_y)*upscale_amount),round(x*upscale_amount):round((x+tile_x)*upscale_amount)] += ps * mask + out_div[:,:,round(y*upscale_amount):round((y+tile_y)*upscale_amount),round(x*upscale_amount):round((x+tile_x)*upscale_amount)] += mask + if pbar is not None: + pbar.update(1) + + output[b:b+1] = out/out_div + return output + +PROGRESS_BAR_ENABLED = True +def set_progress_bar_enabled(enabled): + global PROGRESS_BAR_ENABLED + PROGRESS_BAR_ENABLED = enabled + +PROGRESS_BAR_HOOK = None +def set_progress_bar_global_hook(function): + global PROGRESS_BAR_HOOK + PROGRESS_BAR_HOOK = function + +class ProgressBar: + def __init__(self, total, title=None): + global PROGRESS_BAR_HOOK + self.total = total + self.current = 0 + self.hook = PROGRESS_BAR_HOOK + self.tqdm = tqdm(total=total, desc=title) + + def update_absolute(self, value, total=None, preview=None): + if total is not None: + self.total = total + if value > self.total: + value = self.total + inc = value - self.current + self.tqdm.update(inc) + self.current = value + if self.hook is not None: + self.hook(self.current, self.total, preview) + if self.current >= self.total: + self.tqdm.close() + + def update(self, value): + self.update_absolute(self.current + value) diff --git a/ldm_patched/pfn/__init__.py b/ldm_patched/pfn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ldm_patched/pfn/architecture/DAT.py b/ldm_patched/pfn/architecture/DAT.py new file mode 100644 index 0000000000000000000000000000000000000000..0bcc26ef422b73cef41744e2203901a3d290c2f0 --- /dev/null +++ b/ldm_patched/pfn/architecture/DAT.py @@ -0,0 +1,1182 @@ +# pylint: skip-file +import math +import re + +import numpy as np +import torch +import torch.nn as nn +import torch.utils.checkpoint as checkpoint +from einops import rearrange +from einops.layers.torch import Rearrange +from torch import Tensor +from torch.nn import functional as F + +from .timm.drop import DropPath +from .timm.weight_init import trunc_normal_ + + +def img2windows(img, H_sp, W_sp): + """ + Input: Image (B, C, H, W) + Output: Window Partition (B', N, C) + """ + B, C, H, W = img.shape + img_reshape = img.view(B, C, H // H_sp, H_sp, W // W_sp, W_sp) + img_perm = ( + img_reshape.permute(0, 2, 4, 3, 5, 1).contiguous().reshape(-1, H_sp * W_sp, C) + ) + return img_perm + + +def windows2img(img_splits_hw, H_sp, W_sp, H, W): + """ + Input: Window Partition (B', N, C) + Output: Image (B, H, W, C) + """ + B = int(img_splits_hw.shape[0] / (H * W / H_sp / W_sp)) + + img = img_splits_hw.view(B, H // H_sp, W // W_sp, H_sp, W_sp, -1) + img = img.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return img + + +class SpatialGate(nn.Module): + """Spatial-Gate. + Args: + dim (int): Half of input channels. + """ + + def __init__(self, dim): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.conv = nn.Conv2d( + dim, dim, kernel_size=3, stride=1, padding=1, groups=dim + ) # DW Conv + + def forward(self, x, H, W): + # Split + x1, x2 = x.chunk(2, dim=-1) + B, N, C = x.shape + x2 = ( + self.conv(self.norm(x2).transpose(1, 2).contiguous().view(B, C // 2, H, W)) + .flatten(2) + .transpose(-1, -2) + .contiguous() + ) + + return x1 * x2 + + +class SGFN(nn.Module): + """Spatial-Gate Feed-Forward Network. + Args: + in_features (int): Number of input channels. + hidden_features (int | None): Number of hidden channels. Default: None + out_features (int | None): Number of output channels. Default: None + act_layer (nn.Module): Activation layer. Default: nn.GELU + drop (float): Dropout rate. Default: 0.0 + """ + + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.sg = SpatialGate(hidden_features // 2) + self.fc2 = nn.Linear(hidden_features // 2, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x, H, W): + """ + Input: x: (B, H*W, C), H, W + Output: x: (B, H*W, C) + """ + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + + x = self.sg(x, H, W) + x = self.drop(x) + + x = self.fc2(x) + x = self.drop(x) + return x + + +class DynamicPosBias(nn.Module): + # The implementation builds on Crossformer code https://github.com/cheerss/CrossFormer/blob/main/models/crossformer.py + """Dynamic Relative Position Bias. + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + residual (bool): If True, use residual strage to connect conv. + """ + + def __init__(self, dim, num_heads, residual): + super().__init__() + self.residual = residual + self.num_heads = num_heads + self.pos_dim = dim // 4 + self.pos_proj = nn.Linear(2, self.pos_dim) + self.pos1 = nn.Sequential( + nn.LayerNorm(self.pos_dim), + nn.ReLU(inplace=True), + nn.Linear(self.pos_dim, self.pos_dim), + ) + self.pos2 = nn.Sequential( + nn.LayerNorm(self.pos_dim), + nn.ReLU(inplace=True), + nn.Linear(self.pos_dim, self.pos_dim), + ) + self.pos3 = nn.Sequential( + nn.LayerNorm(self.pos_dim), + nn.ReLU(inplace=True), + nn.Linear(self.pos_dim, self.num_heads), + ) + + def forward(self, biases): + if self.residual: + pos = self.pos_proj(biases) # 2Gh-1 * 2Gw-1, heads + pos = pos + self.pos1(pos) + pos = pos + self.pos2(pos) + pos = self.pos3(pos) + else: + pos = self.pos3(self.pos2(self.pos1(self.pos_proj(biases)))) + return pos + + +class Spatial_Attention(nn.Module): + """Spatial Window Self-Attention. + It supports rectangle window (containing square window). + Args: + dim (int): Number of input channels. + idx (int): The indentix of window. (0/1) + split_size (tuple(int)): Height and Width of spatial window. + dim_out (int | None): The dimension of the attention output. Default: None + num_heads (int): Number of attention heads. Default: 6 + attn_drop (float): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float): Dropout ratio of output. Default: 0.0 + qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set + position_bias (bool): The dynamic relative position bias. Default: True + """ + + def __init__( + self, + dim, + idx, + split_size=[8, 8], + dim_out=None, + num_heads=6, + attn_drop=0.0, + proj_drop=0.0, + qk_scale=None, + position_bias=True, + ): + super().__init__() + self.dim = dim + self.dim_out = dim_out or dim + self.split_size = split_size + self.num_heads = num_heads + self.idx = idx + self.position_bias = position_bias + + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + if idx == 0: + H_sp, W_sp = self.split_size[0], self.split_size[1] + elif idx == 1: + W_sp, H_sp = self.split_size[0], self.split_size[1] + else: + print("ERROR MODE", idx) + exit(0) + self.H_sp = H_sp + self.W_sp = W_sp + + if self.position_bias: + self.pos = DynamicPosBias(self.dim // 4, self.num_heads, residual=False) + # generate mother-set + position_bias_h = torch.arange(1 - self.H_sp, self.H_sp) + position_bias_w = torch.arange(1 - self.W_sp, self.W_sp) + biases = torch.stack(torch.meshgrid([position_bias_h, position_bias_w])) + biases = biases.flatten(1).transpose(0, 1).contiguous().float() + self.register_buffer("rpe_biases", biases) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.H_sp) + coords_w = torch.arange(self.W_sp) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) + coords_flatten = torch.flatten(coords, 1) + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + relative_coords[:, :, 0] += self.H_sp - 1 + relative_coords[:, :, 1] += self.W_sp - 1 + relative_coords[:, :, 0] *= 2 * self.W_sp - 1 + relative_position_index = relative_coords.sum(-1) + self.register_buffer("relative_position_index", relative_position_index) + + self.attn_drop = nn.Dropout(attn_drop) + + def im2win(self, x, H, W): + B, N, C = x.shape + x = x.transpose(-2, -1).contiguous().view(B, C, H, W) + x = img2windows(x, self.H_sp, self.W_sp) + x = ( + x.reshape(-1, self.H_sp * self.W_sp, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + .contiguous() + ) + return x + + def forward(self, qkv, H, W, mask=None): + """ + Input: qkv: (B, 3*L, C), H, W, mask: (B, N, N), N is the window size + Output: x (B, H, W, C) + """ + q, k, v = qkv[0], qkv[1], qkv[2] + + B, L, C = q.shape + assert L == H * W, "flatten img_tokens has wrong size" + + # partition the q,k,v, image to window + q = self.im2win(q, H, W) + k = self.im2win(k, H, W) + v = self.im2win(v, H, W) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) # B head N C @ B head C N --> B head N N + + # calculate drpe + if self.position_bias: + pos = self.pos(self.rpe_biases) + # select position bias + relative_position_bias = pos[self.relative_position_index.view(-1)].view( + self.H_sp * self.W_sp, self.H_sp * self.W_sp, -1 + ) + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() + attn = attn + relative_position_bias.unsqueeze(0) + + N = attn.shape[3] + + # use mask for shift window + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze( + 0 + ) + attn = attn.view(-1, self.num_heads, N, N) + + attn = nn.functional.softmax(attn, dim=-1, dtype=attn.dtype) + attn = self.attn_drop(attn) + + x = attn @ v + x = x.transpose(1, 2).reshape( + -1, self.H_sp * self.W_sp, C + ) # B head N N @ B head N C + + # merge the window, window to image + x = windows2img(x, self.H_sp, self.W_sp, H, W) # B H' W' C + + return x + + +class Adaptive_Spatial_Attention(nn.Module): + # The implementation builds on CAT code https://github.com/Zhengchen1999/CAT + """Adaptive Spatial Self-Attention + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. Default: 6 + split_size (tuple(int)): Height and Width of spatial window. + shift_size (tuple(int)): Shift size for spatial window. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. + drop (float): Dropout rate. Default: 0.0 + attn_drop (float): Attention dropout rate. Default: 0.0 + rg_idx (int): The indentix of Residual Group (RG) + b_idx (int): The indentix of Block in each RG + """ + + def __init__( + self, + dim, + num_heads, + reso=64, + split_size=[8, 8], + shift_size=[1, 2], + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + rg_idx=0, + b_idx=0, + ): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.split_size = split_size + self.shift_size = shift_size + self.b_idx = b_idx + self.rg_idx = rg_idx + self.patches_resolution = reso + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + + assert ( + 0 <= self.shift_size[0] < self.split_size[0] + ), "shift_size must in 0-split_size0" + assert ( + 0 <= self.shift_size[1] < self.split_size[1] + ), "shift_size must in 0-split_size1" + + self.branch_num = 2 + + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(drop) + + self.attns = nn.ModuleList( + [ + Spatial_Attention( + dim // 2, + idx=i, + split_size=split_size, + num_heads=num_heads // 2, + dim_out=dim // 2, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + position_bias=True, + ) + for i in range(self.branch_num) + ] + ) + + if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or ( + self.rg_idx % 2 != 0 and self.b_idx % 4 == 0 + ): + attn_mask = self.calculate_mask( + self.patches_resolution, self.patches_resolution + ) + self.register_buffer("attn_mask_0", attn_mask[0]) + self.register_buffer("attn_mask_1", attn_mask[1]) + else: + attn_mask = None + self.register_buffer("attn_mask_0", None) + self.register_buffer("attn_mask_1", None) + + self.dwconv = nn.Sequential( + nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim), + nn.BatchNorm2d(dim), + nn.GELU(), + ) + self.channel_interaction = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(dim, dim // 8, kernel_size=1), + nn.BatchNorm2d(dim // 8), + nn.GELU(), + nn.Conv2d(dim // 8, dim, kernel_size=1), + ) + self.spatial_interaction = nn.Sequential( + nn.Conv2d(dim, dim // 16, kernel_size=1), + nn.BatchNorm2d(dim // 16), + nn.GELU(), + nn.Conv2d(dim // 16, 1, kernel_size=1), + ) + + def calculate_mask(self, H, W): + # The implementation builds on Swin Transformer code https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_transformer.py + # calculate attention mask for shift window + img_mask_0 = torch.zeros((1, H, W, 1)) # 1 H W 1 idx=0 + img_mask_1 = torch.zeros((1, H, W, 1)) # 1 H W 1 idx=1 + h_slices_0 = ( + slice(0, -self.split_size[0]), + slice(-self.split_size[0], -self.shift_size[0]), + slice(-self.shift_size[0], None), + ) + w_slices_0 = ( + slice(0, -self.split_size[1]), + slice(-self.split_size[1], -self.shift_size[1]), + slice(-self.shift_size[1], None), + ) + + h_slices_1 = ( + slice(0, -self.split_size[1]), + slice(-self.split_size[1], -self.shift_size[1]), + slice(-self.shift_size[1], None), + ) + w_slices_1 = ( + slice(0, -self.split_size[0]), + slice(-self.split_size[0], -self.shift_size[0]), + slice(-self.shift_size[0], None), + ) + cnt = 0 + for h in h_slices_0: + for w in w_slices_0: + img_mask_0[:, h, w, :] = cnt + cnt += 1 + cnt = 0 + for h in h_slices_1: + for w in w_slices_1: + img_mask_1[:, h, w, :] = cnt + cnt += 1 + + # calculate mask for window-0 + img_mask_0 = img_mask_0.view( + 1, + H // self.split_size[0], + self.split_size[0], + W // self.split_size[1], + self.split_size[1], + 1, + ) + img_mask_0 = ( + img_mask_0.permute(0, 1, 3, 2, 4, 5) + .contiguous() + .view(-1, self.split_size[0], self.split_size[1], 1) + ) # nW, sw[0], sw[1], 1 + mask_windows_0 = img_mask_0.view(-1, self.split_size[0] * self.split_size[1]) + attn_mask_0 = mask_windows_0.unsqueeze(1) - mask_windows_0.unsqueeze(2) + attn_mask_0 = attn_mask_0.masked_fill( + attn_mask_0 != 0, float(-100.0) + ).masked_fill(attn_mask_0 == 0, float(0.0)) + + # calculate mask for window-1 + img_mask_1 = img_mask_1.view( + 1, + H // self.split_size[1], + self.split_size[1], + W // self.split_size[0], + self.split_size[0], + 1, + ) + img_mask_1 = ( + img_mask_1.permute(0, 1, 3, 2, 4, 5) + .contiguous() + .view(-1, self.split_size[1], self.split_size[0], 1) + ) # nW, sw[1], sw[0], 1 + mask_windows_1 = img_mask_1.view(-1, self.split_size[1] * self.split_size[0]) + attn_mask_1 = mask_windows_1.unsqueeze(1) - mask_windows_1.unsqueeze(2) + attn_mask_1 = attn_mask_1.masked_fill( + attn_mask_1 != 0, float(-100.0) + ).masked_fill(attn_mask_1 == 0, float(0.0)) + + return attn_mask_0, attn_mask_1 + + def forward(self, x, H, W): + """ + Input: x: (B, H*W, C), H, W + Output: x: (B, H*W, C) + """ + B, L, C = x.shape + assert L == H * W, "flatten img_tokens has wrong size" + + qkv = self.qkv(x).reshape(B, -1, 3, C).permute(2, 0, 1, 3) # 3, B, HW, C + # V without partition + v = qkv[2].transpose(-2, -1).contiguous().view(B, C, H, W) + + # image padding + max_split_size = max(self.split_size[0], self.split_size[1]) + pad_l = pad_t = 0 + pad_r = (max_split_size - W % max_split_size) % max_split_size + pad_b = (max_split_size - H % max_split_size) % max_split_size + + qkv = qkv.reshape(3 * B, H, W, C).permute(0, 3, 1, 2) # 3B C H W + qkv = ( + F.pad(qkv, (pad_l, pad_r, pad_t, pad_b)) + .reshape(3, B, C, -1) + .transpose(-2, -1) + ) # l r t b + _H = pad_b + H + _W = pad_r + W + _L = _H * _W + + # window-0 and window-1 on split channels [C/2, C/2]; for square windows (e.g., 8x8), window-0 and window-1 can be merged + # shift in block: (0, 4, 8, ...), (2, 6, 10, ...), (0, 4, 8, ...), (2, 6, 10, ...), ... + if (self.rg_idx % 2 == 0 and self.b_idx > 0 and (self.b_idx - 2) % 4 == 0) or ( + self.rg_idx % 2 != 0 and self.b_idx % 4 == 0 + ): + qkv = qkv.view(3, B, _H, _W, C) + qkv_0 = torch.roll( + qkv[:, :, :, :, : C // 2], + shifts=(-self.shift_size[0], -self.shift_size[1]), + dims=(2, 3), + ) + qkv_0 = qkv_0.view(3, B, _L, C // 2) + qkv_1 = torch.roll( + qkv[:, :, :, :, C // 2 :], + shifts=(-self.shift_size[1], -self.shift_size[0]), + dims=(2, 3), + ) + qkv_1 = qkv_1.view(3, B, _L, C // 2) + + if self.patches_resolution != _H or self.patches_resolution != _W: + mask_tmp = self.calculate_mask(_H, _W) + x1_shift = self.attns[0](qkv_0, _H, _W, mask=mask_tmp[0].to(x.device)) + x2_shift = self.attns[1](qkv_1, _H, _W, mask=mask_tmp[1].to(x.device)) + else: + x1_shift = self.attns[0](qkv_0, _H, _W, mask=self.attn_mask_0) + x2_shift = self.attns[1](qkv_1, _H, _W, mask=self.attn_mask_1) + + x1 = torch.roll( + x1_shift, shifts=(self.shift_size[0], self.shift_size[1]), dims=(1, 2) + ) + x2 = torch.roll( + x2_shift, shifts=(self.shift_size[1], self.shift_size[0]), dims=(1, 2) + ) + x1 = x1[:, :H, :W, :].reshape(B, L, C // 2) + x2 = x2[:, :H, :W, :].reshape(B, L, C // 2) + # attention output + attened_x = torch.cat([x1, x2], dim=2) + + else: + x1 = self.attns[0](qkv[:, :, :, : C // 2], _H, _W)[:, :H, :W, :].reshape( + B, L, C // 2 + ) + x2 = self.attns[1](qkv[:, :, :, C // 2 :], _H, _W)[:, :H, :W, :].reshape( + B, L, C // 2 + ) + # attention output + attened_x = torch.cat([x1, x2], dim=2) + + # convolution output + conv_x = self.dwconv(v) + + # Adaptive Interaction Module (AIM) + # C-Map (before sigmoid) + channel_map = ( + self.channel_interaction(conv_x) + .permute(0, 2, 3, 1) + .contiguous() + .view(B, 1, C) + ) + # S-Map (before sigmoid) + attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W) + spatial_map = self.spatial_interaction(attention_reshape) + + # C-I + attened_x = attened_x * torch.sigmoid(channel_map) + # S-I + conv_x = torch.sigmoid(spatial_map) * conv_x + conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, L, C) + + x = attened_x + conv_x + + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class Adaptive_Channel_Attention(nn.Module): + # The implementation builds on XCiT code https://github.com/facebookresearch/xcit + """Adaptive Channel Self-Attention + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. Default: 6 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. + attn_drop (float): Attention dropout rate. Default: 0.0 + drop_path (float): Stochastic depth rate. Default: 0.0 + """ + + def __init__( + self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.num_heads = num_heads + self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1)) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.dwconv = nn.Sequential( + nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim), + nn.BatchNorm2d(dim), + nn.GELU(), + ) + self.channel_interaction = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(dim, dim // 8, kernel_size=1), + nn.BatchNorm2d(dim // 8), + nn.GELU(), + nn.Conv2d(dim // 8, dim, kernel_size=1), + ) + self.spatial_interaction = nn.Sequential( + nn.Conv2d(dim, dim // 16, kernel_size=1), + nn.BatchNorm2d(dim // 16), + nn.GELU(), + nn.Conv2d(dim // 16, 1, kernel_size=1), + ) + + def forward(self, x, H, W): + """ + Input: x: (B, H*W, C), H, W + Output: x: (B, H*W, C) + """ + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) + qkv = qkv.permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q.transpose(-2, -1) + k = k.transpose(-2, -1) + v = v.transpose(-2, -1) + + v_ = v.reshape(B, C, N).contiguous().view(B, C, H, W) + + q = torch.nn.functional.normalize(q, dim=-1) + k = torch.nn.functional.normalize(k, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.temperature + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + # attention output + attened_x = (attn @ v).permute(0, 3, 1, 2).reshape(B, N, C) + + # convolution output + conv_x = self.dwconv(v_) + + # Adaptive Interaction Module (AIM) + # C-Map (before sigmoid) + attention_reshape = attened_x.transpose(-2, -1).contiguous().view(B, C, H, W) + channel_map = self.channel_interaction(attention_reshape) + # S-Map (before sigmoid) + spatial_map = ( + self.spatial_interaction(conv_x) + .permute(0, 2, 3, 1) + .contiguous() + .view(B, N, 1) + ) + + # S-I + attened_x = attened_x * torch.sigmoid(spatial_map) + # C-I + conv_x = conv_x * torch.sigmoid(channel_map) + conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(B, N, C) + + x = attened_x + conv_x + + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class DATB(nn.Module): + def __init__( + self, + dim, + num_heads, + reso=64, + split_size=[2, 4], + shift_size=[1, 2], + expansion_factor=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + rg_idx=0, + b_idx=0, + ): + super().__init__() + + self.norm1 = norm_layer(dim) + + if b_idx % 2 == 0: + # DSTB + self.attn = Adaptive_Spatial_Attention( + dim, + num_heads=num_heads, + reso=reso, + split_size=split_size, + shift_size=shift_size, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + rg_idx=rg_idx, + b_idx=b_idx, + ) + else: + # DCTB + self.attn = Adaptive_Channel_Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + + ffn_hidden_dim = int(dim * expansion_factor) + self.ffn = SGFN( + in_features=dim, + hidden_features=ffn_hidden_dim, + out_features=dim, + act_layer=act_layer, + ) + self.norm2 = norm_layer(dim) + + def forward(self, x, x_size): + """ + Input: x: (B, H*W, C), x_size: (H, W) + Output: x: (B, H*W, C) + """ + H, W = x_size + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.ffn(self.norm2(x), H, W)) + + return x + + +class ResidualGroup(nn.Module): + """ResidualGroup + Args: + dim (int): Number of input channels. + reso (int): Input resolution. + num_heads (int): Number of attention heads. + split_size (tuple(int)): Height and Width of spatial window. + expansion_factor (float): Ratio of ffn hidden dim to embedding dim. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop (float): Dropout rate. Default: 0 + attn_drop(float): Attention dropout rate. Default: 0 + drop_paths (float | None): Stochastic depth rate. + act_layer (nn.Module): Activation layer. Default: nn.GELU + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm + depth (int): Number of dual aggregation Transformer blocks in residual group. + use_chk (bool): Whether to use checkpointing to save memory. + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__( + self, + dim, + reso, + num_heads, + split_size=[2, 4], + expansion_factor=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_paths=None, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + depth=2, + use_chk=False, + resi_connection="1conv", + rg_idx=0, + ): + super().__init__() + self.use_chk = use_chk + self.reso = reso + + self.blocks = nn.ModuleList( + [ + DATB( + dim=dim, + num_heads=num_heads, + reso=reso, + split_size=split_size, + shift_size=[split_size[0] // 2, split_size[1] // 2], + expansion_factor=expansion_factor, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_paths[i], + act_layer=act_layer, + norm_layer=norm_layer, + rg_idx=rg_idx, + b_idx=i, + ) + for i in range(depth) + ] + ) + + if resi_connection == "1conv": + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == "3conv": + self.conv = nn.Sequential( + nn.Conv2d(dim, dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1), + ) + + def forward(self, x, x_size): + """ + Input: x: (B, H*W, C), x_size: (H, W) + Output: x: (B, H*W, C) + """ + H, W = x_size + res = x + for blk in self.blocks: + if self.use_chk: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W) + x = self.conv(x) + x = rearrange(x, "b c h w -> b (h w) c") + x = res + x + + return x + + +class Upsample(nn.Sequential): + """Upsample module. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError( + f"scale {scale} is not supported. " "Supported scales: 2^n and 3." + ) + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + h, w = self.input_resolution + flops = h * w * self.num_feat * 3 * 9 + return flops + + +class DAT(nn.Module): + """Dual Aggregation Transformer + Args: + img_size (int): Input image size. Default: 64 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 180 + depths (tuple(int)): Depth of each residual group (number of DATB in each RG). + split_size (tuple(int)): Height and Width of spatial window. + num_heads (tuple(int)): Number of attention heads in different residual groups. + expansion_factor (float): Ratio of ffn hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + act_layer (nn.Module): Activation layer. Default: nn.GELU + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm + use_chk (bool): Whether to use checkpointing to save memory. + upscale: Upscale factor. 2/3/4 for image SR + img_range: Image range. 1. or 255. + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, state_dict): + super().__init__() + + # defaults + img_size = 64 + in_chans = 3 + embed_dim = 180 + split_size = [2, 4] + depth = [2, 2, 2, 2] + num_heads = [2, 2, 2, 2] + expansion_factor = 4.0 + qkv_bias = True + qk_scale = None + drop_rate = 0.0 + attn_drop_rate = 0.0 + drop_path_rate = 0.1 + act_layer = nn.GELU + norm_layer = nn.LayerNorm + use_chk = False + upscale = 2 + img_range = 1.0 + resi_connection = "1conv" + upsampler = "pixelshuffle" + + self.model_arch = "DAT" + self.sub_type = "SR" + self.state = state_dict + + state_keys = state_dict.keys() + if "conv_before_upsample.0.weight" in state_keys: + if "conv_up1.weight" in state_keys: + upsampler = "nearest+conv" + else: + upsampler = "pixelshuffle" + supports_fp16 = False + elif "upsample.0.weight" in state_keys: + upsampler = "pixelshuffledirect" + else: + upsampler = "" + + num_feat = ( + state_dict.get("conv_before_upsample.0.weight", None).shape[1] + if state_dict.get("conv_before_upsample.weight", None) + else 64 + ) + + num_in_ch = state_dict["conv_first.weight"].shape[1] + in_chans = num_in_ch + if "conv_last.weight" in state_keys: + num_out_ch = state_dict["conv_last.weight"].shape[0] + else: + num_out_ch = num_in_ch + + upscale = 1 + if upsampler == "nearest+conv": + upsample_keys = [ + x for x in state_keys if "conv_up" in x and "bias" not in x + ] + + for upsample_key in upsample_keys: + upscale *= 2 + elif upsampler == "pixelshuffle": + upsample_keys = [ + x + for x in state_keys + if "upsample" in x and "conv" not in x and "bias" not in x + ] + for upsample_key in upsample_keys: + shape = state_dict[upsample_key].shape[0] + upscale *= math.sqrt(shape // num_feat) + upscale = int(upscale) + elif upsampler == "pixelshuffledirect": + upscale = int( + math.sqrt(state_dict["upsample.0.bias"].shape[0] // num_out_ch) + ) + + max_layer_num = 0 + max_block_num = 0 + for key in state_keys: + result = re.match(r"layers.(\d*).blocks.(\d*).norm1.weight", key) + if result: + layer_num, block_num = result.groups() + max_layer_num = max(max_layer_num, int(layer_num)) + max_block_num = max(max_block_num, int(block_num)) + + depth = [max_block_num + 1 for _ in range(max_layer_num + 1)] + + if "layers.0.blocks.1.attn.temperature" in state_keys: + num_heads_num = state_dict["layers.0.blocks.1.attn.temperature"].shape[0] + num_heads = [num_heads_num for _ in range(max_layer_num + 1)] + else: + num_heads = depth + + embed_dim = state_dict["conv_first.weight"].shape[0] + expansion_factor = float( + state_dict["layers.0.blocks.0.ffn.fc1.weight"].shape[0] / embed_dim + ) + + # TODO: could actually count the layers, but this should do + if "layers.0.conv.4.weight" in state_keys: + resi_connection = "3conv" + else: + resi_connection = "1conv" + + if "layers.0.blocks.2.attn.attn_mask_0" in state_keys: + attn_mask_0_x, attn_mask_0_y, attn_mask_0_z = state_dict[ + "layers.0.blocks.2.attn.attn_mask_0" + ].shape + + img_size = int(math.sqrt(attn_mask_0_x * attn_mask_0_y)) + + if "layers.0.blocks.0.attn.attns.0.rpe_biases" in state_keys: + split_sizes = ( + state_dict["layers.0.blocks.0.attn.attns.0.rpe_biases"][-1] + 1 + ) + split_size = [int(x) for x in split_sizes] + + self.in_nc = num_in_ch + self.out_nc = num_out_ch + self.num_feat = num_feat + self.embed_dim = embed_dim + self.num_heads = num_heads + self.depth = depth + self.scale = upscale + self.upsampler = upsampler + self.img_size = img_size + self.img_range = img_range + self.expansion_factor = expansion_factor + self.resi_connection = resi_connection + self.split_size = split_size + + self.supports_fp16 = False # Too much weirdness to support this at the moment + self.supports_bfp16 = True + self.min_size_restriction = 16 + + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + + # ------------------------- 1, Shallow Feature Extraction ------------------------- # + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + # ------------------------- 2, Deep Feature Extraction ------------------------- # + self.num_layers = len(depth) + self.use_chk = use_chk + self.num_features = ( + self.embed_dim + ) = embed_dim # num_features for consistency with other models + heads = num_heads + + self.before_RG = nn.Sequential( + Rearrange("b c h w -> b (h w) c"), nn.LayerNorm(embed_dim) + ) + + curr_dim = embed_dim + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, np.sum(depth)) + ] # stochastic depth decay rule + + self.layers = nn.ModuleList() + for i in range(self.num_layers): + layer = ResidualGroup( + dim=embed_dim, + num_heads=heads[i], + reso=img_size, + split_size=split_size, + expansion_factor=expansion_factor, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_paths=dpr[sum(depth[:i]) : sum(depth[: i + 1])], + act_layer=act_layer, + norm_layer=norm_layer, + depth=depth[i], + use_chk=use_chk, + resi_connection=resi_connection, + rg_idx=i, + ) + self.layers.append(layer) + + self.norm = norm_layer(curr_dim) + # build the last conv layer in deep feature extraction + if resi_connection == "1conv": + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == "3conv": + # to save parameters and memory + self.conv_after_body = nn.Sequential( + nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1), + ) + + # ------------------------- 3, Reconstruction ------------------------- # + if self.upsampler == "pixelshuffle": + # for classical SR + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep( + upscale, embed_dim, num_out_ch, (img_size, img_size) + ) + + self.apply(self._init_weights) + self.load_state_dict(state_dict, strict=True) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance( + m, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm, nn.InstanceNorm2d) + ): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward_features(self, x): + _, _, H, W = x.shape + x_size = [H, W] + x = self.before_RG(x) + for layer in self.layers: + x = layer(x, x_size) + x = self.norm(x) + x = rearrange(x, "b (h w) c -> b c h w", h=H, w=W) + + return x + + def forward(self, x): + """ + Input: x: (B, C, H, W) + """ + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.upsampler == "pixelshuffle": + # for image SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + + x = x / self.img_range + self.mean + return x diff --git a/ldm_patched/pfn/architecture/HAT.py b/ldm_patched/pfn/architecture/HAT.py new file mode 100644 index 0000000000000000000000000000000000000000..6694742199bcbdb34ca197b941804dc68af353e7 --- /dev/null +++ b/ldm_patched/pfn/architecture/HAT.py @@ -0,0 +1,1277 @@ +# pylint: skip-file +# HAT from https://github.com/XPixelGroup/HAT/blob/main/hat/archs/hat_arch.py +import math +import re + +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange + +from .timm.helpers import to_2tuple +from .timm.weight_init import trunc_normal_ + + +def drop_path(x, drop_prob: float = 0.0, training: bool = False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(keep_prob) * random_tensor + return output + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) # type: ignore + + +class ChannelAttention(nn.Module): + """Channel attention used in RCAN. + Args: + num_feat (int): Channel number of intermediate features. + squeeze_factor (int): Channel squeeze factor. Default: 16. + """ + + def __init__(self, num_feat, squeeze_factor=16): + super(ChannelAttention, self).__init__() + self.attention = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0), + nn.ReLU(inplace=True), + nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0), + nn.Sigmoid(), + ) + + def forward(self, x): + y = self.attention(x) + return x * y + + +class CAB(nn.Module): + def __init__(self, num_feat, compress_ratio=3, squeeze_factor=30): + super(CAB, self).__init__() + + self.cab = nn.Sequential( + nn.Conv2d(num_feat, num_feat // compress_ratio, 3, 1, 1), + nn.GELU(), + nn.Conv2d(num_feat // compress_ratio, num_feat, 3, 1, 1), + ChannelAttention(num_feat, squeeze_factor), + ) + + def forward(self, x): + return self.cab(x) + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (b, h, w, c) + window_size (int): window size + Returns: + windows: (num_windows*b, window_size, window_size, c) + """ + b, h, w, c = x.shape + x = x.view(b, h // window_size, window_size, w // window_size, window_size, c) + windows = ( + x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, c) + ) + return windows + + +def window_reverse(windows, window_size, h, w): + """ + Args: + windows: (num_windows*b, window_size, window_size, c) + window_size (int): Window size + h (int): Height of image + w (int): Width of image + Returns: + x: (b, h, w, c) + """ + b = int(windows.shape[0] / (h * w / window_size / window_size)) + x = windows.view( + b, h // window_size, w // window_size, window_size, window_size, -1 + ) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(b, h, w, -1) + return x + + +class WindowAttention(nn.Module): + r"""Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( # type: ignore + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, rpi, mask=None): + """ + Args: + x: input features with shape of (num_windows*b, n, c) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + b_, n, c = x.shape + qkv = ( + self.qkv(x) + .reshape(b_, n, 3, self.num_heads, c // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nw = mask.shape[0] + attn = attn.view(b_ // nw, nw, self.num_heads, n, n) + mask.unsqueeze( + 1 + ).unsqueeze(0) + attn = attn.view(-1, self.num_heads, n, n) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(b_, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class HAB(nn.Module): + r"""Hybrid Attention Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__( + self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + compress_ratio=3, + squeeze_factor=30, + conv_scale=0.01, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert ( + 0 <= self.shift_size < self.window_size + ), "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + + self.conv_scale = conv_scale + self.conv_block = CAB( + num_feat=dim, compress_ratio=compress_ratio, squeeze_factor=squeeze_factor + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + def forward(self, x, x_size, rpi_sa, attn_mask): + h, w = x_size + b, _, c = x.shape + # assert seq_len == h * w, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(b, h, w, c) + + # Conv_X + conv_x = self.conv_block(x.permute(0, 3, 1, 2)) + conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(b, h * w, c) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll( + x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2) + ) + attn_mask = attn_mask + else: + shifted_x = x + attn_mask = None + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nw*b, window_size, window_size, c + x_windows = x_windows.view( + -1, self.window_size * self.window_size, c + ) # nw*b, window_size*window_size, c + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + attn_windows = self.attn(x_windows, rpi=rpi_sa, mask=attn_mask) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, c) + shifted_x = window_reverse(attn_windows, self.window_size, h, w) # b h' w' c + + # reverse cyclic shift + if self.shift_size > 0: + attn_x = torch.roll( + shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2) + ) + else: + attn_x = shifted_x + attn_x = attn_x.view(b, h * w, c) + + # FFN + x = shortcut + self.drop_path(attn_x) + conv_x * self.conv_scale + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class PatchMerging(nn.Module): + r"""Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: b, h*w, c + """ + h, w = self.input_resolution + b, seq_len, c = x.shape + assert seq_len == h * w, "input feature has wrong size" + assert h % 2 == 0 and w % 2 == 0, f"x size ({h}*{w}) are not even." + + x = x.view(b, h, w, c) + + x0 = x[:, 0::2, 0::2, :] # b h/2 w/2 c + x1 = x[:, 1::2, 0::2, :] # b h/2 w/2 c + x2 = x[:, 0::2, 1::2, :] # b h/2 w/2 c + x3 = x[:, 1::2, 1::2, :] # b h/2 w/2 c + x = torch.cat([x0, x1, x2, x3], -1) # b h/2 w/2 4*c + x = x.view(b, -1, 4 * c) # b h/2*w/2 4*c + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class OCAB(nn.Module): + # overlapping cross-attention block + + def __init__( + self, + dim, + input_resolution, + window_size, + overlap_ratio, + num_heads, + qkv_bias=True, + qk_scale=None, + mlp_ratio=2, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.window_size = window_size + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + self.overlap_win_size = int(window_size * overlap_ratio) + window_size + + self.norm1 = norm_layer(dim) + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.unfold = nn.Unfold( + kernel_size=(self.overlap_win_size, self.overlap_win_size), + stride=window_size, + padding=(self.overlap_win_size - window_size) // 2, + ) + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( # type: ignore + torch.zeros( + (window_size + self.overlap_win_size - 1) + * (window_size + self.overlap_win_size - 1), + num_heads, + ) + ) # 2*Wh-1 * 2*Ww-1, nH + + trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + self.proj = nn.Linear(dim, dim) + + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, hidden_features=mlp_hidden_dim, act_layer=nn.GELU + ) + + def forward(self, x, x_size, rpi): + h, w = x_size + b, _, c = x.shape + + shortcut = x + x = self.norm1(x) + x = x.view(b, h, w, c) + + qkv = self.qkv(x).reshape(b, h, w, 3, c).permute(3, 0, 4, 1, 2) # 3, b, c, h, w + q = qkv[0].permute(0, 2, 3, 1) # b, h, w, c + kv = torch.cat((qkv[1], qkv[2]), dim=1) # b, 2*c, h, w + + # partition windows + q_windows = window_partition( + q, self.window_size + ) # nw*b, window_size, window_size, c + q_windows = q_windows.view( + -1, self.window_size * self.window_size, c + ) # nw*b, window_size*window_size, c + + kv_windows = self.unfold(kv) # b, c*w*w, nw + kv_windows = rearrange( + kv_windows, + "b (nc ch owh oww) nw -> nc (b nw) (owh oww) ch", + nc=2, + ch=c, + owh=self.overlap_win_size, + oww=self.overlap_win_size, + ).contiguous() # 2, nw*b, ow*ow, c + # Do the above rearrangement without the rearrange function + # kv_windows = kv_windows.view( + # 2, b, self.overlap_win_size, self.overlap_win_size, c, -1 + # ) + # kv_windows = kv_windows.permute(0, 5, 1, 2, 3, 4).contiguous() + # kv_windows = kv_windows.view( + # 2, -1, self.overlap_win_size * self.overlap_win_size, c + # ) + + k_windows, v_windows = kv_windows[0], kv_windows[1] # nw*b, ow*ow, c + + b_, nq, _ = q_windows.shape + _, n, _ = k_windows.shape + d = self.dim // self.num_heads + q = q_windows.reshape(b_, nq, self.num_heads, d).permute( + 0, 2, 1, 3 + ) # nw*b, nH, nq, d + k = k_windows.reshape(b_, n, self.num_heads, d).permute( + 0, 2, 1, 3 + ) # nw*b, nH, n, d + v = v_windows.reshape(b_, n, self.num_heads, d).permute( + 0, 2, 1, 3 + ) # nw*b, nH, n, d + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view( + self.window_size * self.window_size, + self.overlap_win_size * self.overlap_win_size, + -1, + ) # ws*ws, wse*wse, nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, ws*ws, wse*wse + attn = attn + relative_position_bias.unsqueeze(0) + + attn = self.softmax(attn) + attn_windows = (attn @ v).transpose(1, 2).reshape(b_, nq, self.dim) + + # merge windows + attn_windows = attn_windows.view( + -1, self.window_size, self.window_size, self.dim + ) + x = window_reverse(attn_windows, self.window_size, h, w) # b h w c + x = x.view(b, h * w, self.dim) + + x = self.proj(x) + shortcut + + x = x + self.mlp(self.norm2(x)) + return x + + +class AttenBlocks(nn.Module): + """A series of attention blocks for one RHAG. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + compress_ratio, + squeeze_factor, + conv_scale, + overlap_ratio, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + HAB( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + compress_ratio=compress_ratio, + squeeze_factor=squeeze_factor, + conv_scale=conv_scale, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) + else drop_path, + norm_layer=norm_layer, + ) + for i in range(depth) + ] + ) + + # OCAB + self.overlap_attn = OCAB( + dim=dim, + input_resolution=input_resolution, + window_size=window_size, + overlap_ratio=overlap_ratio, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + mlp_ratio=mlp_ratio, # type: ignore + norm_layer=norm_layer, + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer + ) + else: + self.downsample = None + + def forward(self, x, x_size, params): + for blk in self.blocks: + x = blk(x, x_size, params["rpi_sa"], params["attn_mask"]) + + x = self.overlap_attn(x, x_size, params["rpi_oca"]) + + if self.downsample is not None: + x = self.downsample(x) + return x + + +class RHAG(nn.Module): + """Residual Hybrid Attention Group (RHAG). + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + compress_ratio, + squeeze_factor, + conv_scale, + overlap_ratio, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection="1conv", + ): + super(RHAG, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = AttenBlocks( + dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + compress_ratio=compress_ratio, + squeeze_factor=squeeze_factor, + conv_scale=conv_scale, + overlap_ratio=overlap_ratio, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint, + ) + + if resi_connection == "1conv": + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == "identity": + self.conv = nn.Identity() + + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None, + ) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None, + ) + + def forward(self, x, x_size, params): + return ( + self.patch_embed( + self.conv( + self.patch_unembed(self.residual_group(x, x_size, params), x_size) + ) + ) + + x + ) + + +class PatchEmbed(nn.Module): + r"""Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], # type: ignore + img_size[1] // patch_size[1], # type: ignore + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose(1, 2) # b Ph*Pw c + if self.norm is not None: + x = self.norm(x) + return x + + +class PatchUnEmbed(nn.Module): + r"""Image to Patch Unembedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], # type: ignore + img_size[1] // patch_size[1], # type: ignore + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + x = ( + x.transpose(1, 2) + .contiguous() + .view(x.shape[0], self.embed_dim, x_size[0], x_size[1]) + ) # b Ph*Pw c + return x + + +class Upsample(nn.Sequential): + """Upsample module. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError( + f"scale {scale} is not supported. " "Supported scales: 2^n and 3." + ) + super(Upsample, self).__init__(*m) + + +class HAT(nn.Module): + r"""Hybrid Attention Transformer + A PyTorch implementation of : `Activating More Pixels in Image Super-Resolution Transformer`. + Some codes are based on SwinIR. + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__( + self, + state_dict, + **kwargs, + ): + super(HAT, self).__init__() + + # Defaults + img_size = 64 + patch_size = 1 + in_chans = 3 + embed_dim = 96 + depths = (6, 6, 6, 6) + num_heads = (6, 6, 6, 6) + window_size = 7 + compress_ratio = 3 + squeeze_factor = 30 + conv_scale = 0.01 + overlap_ratio = 0.5 + mlp_ratio = 4.0 + qkv_bias = True + qk_scale = None + drop_rate = 0.0 + attn_drop_rate = 0.0 + drop_path_rate = 0.1 + norm_layer = nn.LayerNorm + ape = False + patch_norm = True + use_checkpoint = False + upscale = 2 + img_range = 1.0 + upsampler = "" + resi_connection = "1conv" + + self.state = state_dict + self.model_arch = "HAT" + self.sub_type = "SR" + self.supports_fp16 = False + self.support_bf16 = True + self.min_size_restriction = 16 + + state_keys = list(state_dict.keys()) + + num_feat = state_dict["conv_last.weight"].shape[1] + in_chans = state_dict["conv_first.weight"].shape[1] + num_out_ch = state_dict["conv_last.weight"].shape[0] + embed_dim = state_dict["conv_first.weight"].shape[0] + + if "conv_before_upsample.0.weight" in state_keys: + if "conv_up1.weight" in state_keys: + upsampler = "nearest+conv" + else: + upsampler = "pixelshuffle" + supports_fp16 = False + elif "upsample.0.weight" in state_keys: + upsampler = "pixelshuffledirect" + else: + upsampler = "" + upscale = 1 + if upsampler == "nearest+conv": + upsample_keys = [ + x for x in state_keys if "conv_up" in x and "bias" not in x + ] + + for upsample_key in upsample_keys: + upscale *= 2 + elif upsampler == "pixelshuffle": + upsample_keys = [ + x + for x in state_keys + if "upsample" in x and "conv" not in x and "bias" not in x + ] + for upsample_key in upsample_keys: + shape = self.state[upsample_key].shape[0] + upscale *= math.sqrt(shape // num_feat) + upscale = int(upscale) + elif upsampler == "pixelshuffledirect": + upscale = int( + math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch) + ) + + max_layer_num = 0 + max_block_num = 0 + for key in state_keys: + result = re.match( + r"layers.(\d*).residual_group.blocks.(\d*).conv_block.cab.0.weight", key + ) + if result: + layer_num, block_num = result.groups() + max_layer_num = max(max_layer_num, int(layer_num)) + max_block_num = max(max_block_num, int(block_num)) + + depths = [max_block_num + 1 for _ in range(max_layer_num + 1)] + + if ( + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + in state_keys + ): + num_heads_num = self.state[ + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + ].shape[-1] + num_heads = [num_heads_num for _ in range(max_layer_num + 1)] + else: + num_heads = depths + + mlp_ratio = float( + self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0] + / embed_dim + ) + + # TODO: could actually count the layers, but this should do + if "layers.0.conv.4.weight" in state_keys: + resi_connection = "3conv" + else: + resi_connection = "1conv" + + window_size = int(math.sqrt(self.state["relative_position_index_SA"].shape[0])) + + # Not sure if this is needed or used at all anywhere in HAT's config + if "layers.0.residual_group.blocks.1.attn_mask" in state_keys: + img_size = int( + math.sqrt( + self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0] + ) + * window_size + ) + + self.window_size = window_size + self.shift_size = window_size // 2 + self.overlap_ratio = overlap_ratio + + self.in_nc = in_chans + self.out_nc = num_out_ch + self.num_feat = num_feat + self.embed_dim = embed_dim + self.num_heads = num_heads + self.depths = depths + self.window_size = window_size + self.mlp_ratio = mlp_ratio + self.scale = upscale + self.upsampler = upsampler + self.img_size = img_size + self.img_range = img_range + self.resi_connection = resi_connection + + num_in_ch = in_chans + # num_out_ch = in_chans + # num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + + # relative position index + relative_position_index_SA = self.calculate_rpi_sa() + relative_position_index_OCA = self.calculate_rpi_oca() + self.register_buffer("relative_position_index_SA", relative_position_index_SA) + self.register_buffer("relative_position_index_OCA", relative_position_index_OCA) + + # ------------------------- 1, shallow feature extraction ------------------------- # + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + # ------------------------- 2, deep feature extraction ------------------------- # + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter( # type: ignore[arg-type] + torch.zeros(1, num_patches, embed_dim) + ) + trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + # build Residual Hybrid Attention Groups (RHAG) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RHAG( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + compress_ratio=compress_ratio, + squeeze_factor=squeeze_factor, + conv_scale=conv_scale, + overlap_ratio=overlap_ratio, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[ + sum(depths[:i_layer]) : sum(depths[: i_layer + 1]) # type: ignore + ], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection, + ) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == "1conv": + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == "identity": + self.conv_after_body = nn.Identity() + + # ------------------------- 3, high quality image reconstruction ------------------------- # + if self.upsampler == "pixelshuffle": + # for classical SR + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + self.load_state_dict(self.state, strict=False) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def calculate_rpi_sa(self): + # calculate relative position index for SA + coords_h = torch.arange(self.window_size) + coords_w = torch.arange(self.window_size) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size - 1 + relative_coords[:, :, 0] *= 2 * self.window_size - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + return relative_position_index + + def calculate_rpi_oca(self): + # calculate relative position index for OCA + window_size_ori = self.window_size + window_size_ext = self.window_size + int(self.overlap_ratio * self.window_size) + + coords_h = torch.arange(window_size_ori) + coords_w = torch.arange(window_size_ori) + coords_ori = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, ws, ws + coords_ori_flatten = torch.flatten(coords_ori, 1) # 2, ws*ws + + coords_h = torch.arange(window_size_ext) + coords_w = torch.arange(window_size_ext) + coords_ext = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, wse, wse + coords_ext_flatten = torch.flatten(coords_ext, 1) # 2, wse*wse + + relative_coords = ( + coords_ext_flatten[:, None, :] - coords_ori_flatten[:, :, None] + ) # 2, ws*ws, wse*wse + + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # ws*ws, wse*wse, 2 + relative_coords[:, :, 0] += ( + window_size_ori - window_size_ext + 1 + ) # shift to start from 0 + relative_coords[:, :, 1] += window_size_ori - window_size_ext + 1 + + relative_coords[:, :, 0] *= window_size_ori + window_size_ext - 1 + relative_position_index = relative_coords.sum(-1) + return relative_position_index + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + h, w = x_size + img_mask = torch.zeros((1, h, w, 1)) # 1 h w 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nw, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( + attn_mask == 0, float(0.0) + ) + + return attn_mask + + @torch.jit.ignore # type: ignore + def no_weight_decay(self): + return {"absolute_pos_embed"} + + @torch.jit.ignore # type: ignore + def no_weight_decay_keywords(self): + return {"relative_position_bias_table"} + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect") + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + + # Calculate attention mask and relative position index in advance to speed up inference. + # The original code is very time-cosuming for large window size. + attn_mask = self.calculate_mask(x_size).to(x.device) + params = { + "attn_mask": attn_mask, + "rpi_sa": self.relative_position_index_SA, + "rpi_oca": self.relative_position_index_OCA, + } + + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size, params) + + x = self.norm(x) # b seq_len c + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + x = self.check_image_size(x) + + if self.upsampler == "pixelshuffle": + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + + x = x / self.img_range + self.mean + + return x[:, :, : H * self.upscale, : W * self.upscale] diff --git a/ldm_patched/pfn/architecture/LICENSE-DAT b/ldm_patched/pfn/architecture/LICENSE-DAT new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-DAT @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-ESRGAN b/ldm_patched/pfn/architecture/LICENSE-ESRGAN new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-ESRGAN @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-HAT b/ldm_patched/pfn/architecture/LICENSE-HAT new file mode 100644 index 0000000000000000000000000000000000000000..003e97e96cbed07d07b5ff15831711181607edb3 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-HAT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Xiangyu Chen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ldm_patched/pfn/architecture/LICENSE-RealESRGAN b/ldm_patched/pfn/architecture/LICENSE-RealESRGAN new file mode 100644 index 0000000000000000000000000000000000000000..552a1eeaf01f4e7077013ed3496600c608f35202 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-RealESRGAN @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2021, Xintao Wang +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ldm_patched/pfn/architecture/LICENSE-SCUNet b/ldm_patched/pfn/architecture/LICENSE-SCUNet new file mode 100644 index 0000000000000000000000000000000000000000..ff75c988f3482ab21da41f0d10068108be54ad88 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-SCUNet @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 Kai Zhang (cskaizhang@gmail.com, https://cszn.github.io/). All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-SPSR b/ldm_patched/pfn/architecture/LICENSE-SPSR new file mode 100644 index 0000000000000000000000000000000000000000..3245f3f9e4f476ee3a283f41dd0d9db65544c222 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-SPSR @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2022 BasicSR Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-SwiftSRGAN b/ldm_patched/pfn/architecture/LICENSE-SwiftSRGAN new file mode 100644 index 0000000000000000000000000000000000000000..0e259d42c996742e9e3cba14c677129b2c1b6311 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-SwiftSRGAN @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/ldm_patched/pfn/architecture/LICENSE-Swin2SR b/ldm_patched/pfn/architecture/LICENSE-Swin2SR new file mode 100644 index 0000000000000000000000000000000000000000..e5e4ee061a3f3fbad64bc837425716af7fb108f5 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-Swin2SR @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2021] [SwinIR Authors] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-SwinIR b/ldm_patched/pfn/architecture/LICENSE-SwinIR new file mode 100644 index 0000000000000000000000000000000000000000..e5e4ee061a3f3fbad64bc837425716af7fb108f5 --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-SwinIR @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2021] [SwinIR Authors] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LICENSE-lama b/ldm_patched/pfn/architecture/LICENSE-lama new file mode 100644 index 0000000000000000000000000000000000000000..ca822bb5f62a37a5a73f56a2d563b16dab46c03f --- /dev/null +++ b/ldm_patched/pfn/architecture/LICENSE-lama @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2021] Samsung Research + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/LaMa.py b/ldm_patched/pfn/architecture/LaMa.py new file mode 100644 index 0000000000000000000000000000000000000000..a781f3e4dda789c06493fcf35a9803ee61efce73 --- /dev/null +++ b/ldm_patched/pfn/architecture/LaMa.py @@ -0,0 +1,694 @@ +# pylint: skip-file +""" +Model adapted from advimman's lama project: https://github.com/advimman/lama +""" + +# Fast Fourier Convolution NeurIPS 2020 +# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py +# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf + +from typing import List + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.transforms.functional import InterpolationMode, rotate + + +class LearnableSpatialTransformWrapper(nn.Module): + def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True): + super().__init__() + self.impl = impl + self.angle = torch.rand(1) * angle_init_range + if train_angle: + self.angle = nn.Parameter(self.angle, requires_grad=True) + self.pad_coef = pad_coef + + def forward(self, x): + if torch.is_tensor(x): + return self.inverse_transform(self.impl(self.transform(x)), x) + elif isinstance(x, tuple): + x_trans = tuple(self.transform(elem) for elem in x) + y_trans = self.impl(x_trans) + return tuple( + self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x) + ) + else: + raise ValueError(f"Unexpected input type {type(x)}") + + def transform(self, x): + height, width = x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode="reflect") + x_padded_rotated = rotate( + x_padded, self.angle.to(x_padded), InterpolationMode.BILINEAR, fill=0 + ) + + return x_padded_rotated + + def inverse_transform(self, y_padded_rotated, orig_x): + height, width = orig_x.shape[2:] + pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef) + + y_padded = rotate( + y_padded_rotated, + -self.angle.to(y_padded_rotated), + InterpolationMode.BILINEAR, + fill=0, + ) + y_height, y_width = y_padded.shape[2:] + y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w] + return y + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid(), + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + res = x * y.expand_as(x) + return res + + +class FourierUnit(nn.Module): + def __init__( + self, + in_channels, + out_channels, + groups=1, + spatial_scale_factor=None, + spatial_scale_mode="bilinear", + spectral_pos_encoding=False, + use_se=False, + se_kwargs=None, + ffc3d=False, + fft_norm="ortho", + ): + # bn_layer not used + super(FourierUnit, self).__init__() + self.groups = groups + + self.conv_layer = torch.nn.Conv2d( + in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0), + out_channels=out_channels * 2, + kernel_size=1, + stride=1, + padding=0, + groups=self.groups, + bias=False, + ) + self.bn = torch.nn.BatchNorm2d(out_channels * 2) + self.relu = torch.nn.ReLU(inplace=True) + + # squeeze and excitation block + self.use_se = use_se + if use_se: + if se_kwargs is None: + se_kwargs = {} + self.se = SELayer(self.conv_layer.in_channels, **se_kwargs) + + self.spatial_scale_factor = spatial_scale_factor + self.spatial_scale_mode = spatial_scale_mode + self.spectral_pos_encoding = spectral_pos_encoding + self.ffc3d = ffc3d + self.fft_norm = fft_norm + + def forward(self, x): + half_check = False + if x.type() == "torch.cuda.HalfTensor": + # half only works on gpu anyway + half_check = True + + batch = x.shape[0] + + if self.spatial_scale_factor is not None: + orig_size = x.shape[-2:] + x = F.interpolate( + x, + scale_factor=self.spatial_scale_factor, + mode=self.spatial_scale_mode, + align_corners=False, + ) + + # (batch, c, h, w/2+1, 2) + fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1) + if half_check == True: + ffted = torch.fft.rfftn( + x.float(), dim=fft_dim, norm=self.fft_norm + ) # .type(torch.cuda.HalfTensor) + else: + ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm) + + ffted = torch.stack((ffted.real, ffted.imag), dim=-1) + ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1) + ffted = ffted.view( + ( + batch, + -1, + ) + + ffted.size()[3:] + ) + + if self.spectral_pos_encoding: + height, width = ffted.shape[-2:] + coords_vert = ( + torch.linspace(0, 1, height)[None, None, :, None] + .expand(batch, 1, height, width) + .to(ffted) + ) + coords_hor = ( + torch.linspace(0, 1, width)[None, None, None, :] + .expand(batch, 1, height, width) + .to(ffted) + ) + ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1) + + if self.use_se: + ffted = self.se(ffted) + + if half_check == True: + ffted = self.conv_layer(ffted.half()) # (batch, c*2, h, w/2+1) + else: + ffted = self.conv_layer( + ffted + ) # .type(torch.cuda.FloatTensor) # (batch, c*2, h, w/2+1) + + ffted = self.relu(self.bn(ffted)) + # forcing to be always float + ffted = ffted.float() + + ffted = ( + ffted.view( + ( + batch, + -1, + 2, + ) + + ffted.size()[2:] + ) + .permute(0, 1, 3, 4, 2) + .contiguous() + ) # (batch,c, t, h, w/2+1, 2) + + ffted = torch.complex(ffted[..., 0], ffted[..., 1]) + + ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:] + output = torch.fft.irfftn( + ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm + ) + + if half_check == True: + output = output.half() + + if self.spatial_scale_factor is not None: + output = F.interpolate( + output, + size=orig_size, + mode=self.spatial_scale_mode, + align_corners=False, + ) + + return output + + +class SpectralTransform(nn.Module): + def __init__( + self, + in_channels, + out_channels, + stride=1, + groups=1, + enable_lfu=True, + separable_fu=False, + **fu_kwargs, + ): + # bn_layer not used + super(SpectralTransform, self).__init__() + self.enable_lfu = enable_lfu + if stride == 2: + self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2) + else: + self.downsample = nn.Identity() + + self.stride = stride + self.conv1 = nn.Sequential( + nn.Conv2d( + in_channels, out_channels // 2, kernel_size=1, groups=groups, bias=False + ), + nn.BatchNorm2d(out_channels // 2), + nn.ReLU(inplace=True), + ) + fu_class = FourierUnit + self.fu = fu_class(out_channels // 2, out_channels // 2, groups, **fu_kwargs) + if self.enable_lfu: + self.lfu = fu_class(out_channels // 2, out_channels // 2, groups) + self.conv2 = torch.nn.Conv2d( + out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False + ) + + def forward(self, x): + x = self.downsample(x) + x = self.conv1(x) + output = self.fu(x) + + if self.enable_lfu: + _, c, h, _ = x.shape + split_no = 2 + split_s = h // split_no + xs = torch.cat( + torch.split(x[:, : c // 4], split_s, dim=-2), dim=1 + ).contiguous() + xs = torch.cat(torch.split(xs, split_s, dim=-1), dim=1).contiguous() + xs = self.lfu(xs) + xs = xs.repeat(1, 1, split_no, split_no).contiguous() + else: + xs = 0 + + output = self.conv2(x + output + xs) + + return output + + +class FFC(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + ratio_gin, + ratio_gout, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=False, + enable_lfu=True, + padding_type="reflect", + gated=False, + **spectral_kwargs, + ): + super(FFC, self).__init__() + + assert stride == 1 or stride == 2, "Stride should be 1 or 2." + self.stride = stride + + in_cg = int(in_channels * ratio_gin) + in_cl = in_channels - in_cg + out_cg = int(out_channels * ratio_gout) + out_cl = out_channels - out_cg + # groups_g = 1 if groups == 1 else int(groups * ratio_gout) + # groups_l = 1 if groups == 1 else groups - groups_g + + self.ratio_gin = ratio_gin + self.ratio_gout = ratio_gout + self.global_in_num = in_cg + + module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d + self.convl2l = module( + in_cl, + out_cl, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + padding_mode=padding_type, + ) + module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d + self.convl2g = module( + in_cl, + out_cg, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + padding_mode=padding_type, + ) + module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d + self.convg2l = module( + in_cg, + out_cl, + kernel_size, + stride, + padding, + dilation, + groups, + bias, + padding_mode=padding_type, + ) + module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform + self.convg2g = module( + in_cg, + out_cg, + stride, + 1 if groups == 1 else groups // 2, + enable_lfu, + **spectral_kwargs, + ) + + self.gated = gated + module = ( + nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d + ) + self.gate = module(in_channels, 2, 1) + + def forward(self, x): + x_l, x_g = x if type(x) is tuple else (x, 0) + out_xl, out_xg = 0, 0 + + if self.gated: + total_input_parts = [x_l] + if torch.is_tensor(x_g): + total_input_parts.append(x_g) + total_input = torch.cat(total_input_parts, dim=1) + + gates = torch.sigmoid(self.gate(total_input)) + g2l_gate, l2g_gate = gates.chunk(2, dim=1) + else: + g2l_gate, l2g_gate = 1, 1 + + if self.ratio_gout != 1: + out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate + if self.ratio_gout != 0: + out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g) + + return out_xl, out_xg + + +class FFC_BN_ACT(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + ratio_gin, + ratio_gout, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=False, + norm_layer=nn.BatchNorm2d, + activation_layer=nn.Identity, + padding_type="reflect", + enable_lfu=True, + **kwargs, + ): + super(FFC_BN_ACT, self).__init__() + self.ffc = FFC( + in_channels, + out_channels, + kernel_size, + ratio_gin, + ratio_gout, + stride, + padding, + dilation, + groups, + bias, + enable_lfu, + padding_type=padding_type, + **kwargs, + ) + lnorm = nn.Identity if ratio_gout == 1 else norm_layer + gnorm = nn.Identity if ratio_gout == 0 else norm_layer + global_channels = int(out_channels * ratio_gout) + self.bn_l = lnorm(out_channels - global_channels) + self.bn_g = gnorm(global_channels) + + lact = nn.Identity if ratio_gout == 1 else activation_layer + gact = nn.Identity if ratio_gout == 0 else activation_layer + self.act_l = lact(inplace=True) + self.act_g = gact(inplace=True) + + def forward(self, x): + x_l, x_g = self.ffc(x) + x_l = self.act_l(self.bn_l(x_l)) + x_g = self.act_g(self.bn_g(x_g)) + return x_l, x_g + + +class FFCResnetBlock(nn.Module): + def __init__( + self, + dim, + padding_type, + norm_layer, + activation_layer=nn.ReLU, + dilation=1, + spatial_transform_kwargs=None, + inline=False, + **conv_kwargs, + ): + super().__init__() + self.conv1 = FFC_BN_ACT( + dim, + dim, + kernel_size=3, + padding=dilation, + dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs, + ) + self.conv2 = FFC_BN_ACT( + dim, + dim, + kernel_size=3, + padding=dilation, + dilation=dilation, + norm_layer=norm_layer, + activation_layer=activation_layer, + padding_type=padding_type, + **conv_kwargs, + ) + if spatial_transform_kwargs is not None: + self.conv1 = LearnableSpatialTransformWrapper( + self.conv1, **spatial_transform_kwargs + ) + self.conv2 = LearnableSpatialTransformWrapper( + self.conv2, **spatial_transform_kwargs + ) + self.inline = inline + + def forward(self, x): + if self.inline: + x_l, x_g = ( + x[:, : -self.conv1.ffc.global_in_num], + x[:, -self.conv1.ffc.global_in_num :], + ) + else: + x_l, x_g = x if type(x) is tuple else (x, 0) + + id_l, id_g = x_l, x_g + + x_l, x_g = self.conv1((x_l, x_g)) + x_l, x_g = self.conv2((x_l, x_g)) + + x_l, x_g = id_l + x_l, id_g + x_g + out = x_l, x_g + if self.inline: + out = torch.cat(out, dim=1) + return out + + +class ConcatTupleLayer(nn.Module): + def forward(self, x): + assert isinstance(x, tuple) + x_l, x_g = x + assert torch.is_tensor(x_l) or torch.is_tensor(x_g) + if not torch.is_tensor(x_g): + return x_l + return torch.cat(x, dim=1) + + +class FFCResNetGenerator(nn.Module): + def __init__( + self, + input_nc, + output_nc, + ngf=64, + n_downsampling=3, + n_blocks=18, + norm_layer=nn.BatchNorm2d, + padding_type="reflect", + activation_layer=nn.ReLU, + up_norm_layer=nn.BatchNorm2d, + up_activation=nn.ReLU(True), + init_conv_kwargs={}, + downsample_conv_kwargs={}, + resnet_conv_kwargs={}, + spatial_transform_layers=None, + spatial_transform_kwargs={}, + max_features=1024, + out_ffc=False, + out_ffc_kwargs={}, + ): + assert n_blocks >= 0 + super().__init__() + """ + init_conv_kwargs = {'ratio_gin': 0, 'ratio_gout': 0, 'enable_lfu': False} + downsample_conv_kwargs = {'ratio_gin': '${generator.init_conv_kwargs.ratio_gout}', 'ratio_gout': '${generator.downsample_conv_kwargs.ratio_gin}', 'enable_lfu': False} + resnet_conv_kwargs = {'ratio_gin': 0.75, 'ratio_gout': '${generator.resnet_conv_kwargs.ratio_gin}', 'enable_lfu': False} + spatial_transform_kwargs = {} + out_ffc_kwargs = {} + """ + """ + print(input_nc, output_nc, ngf, n_downsampling, n_blocks, norm_layer, + padding_type, activation_layer, + up_norm_layer, up_activation, + spatial_transform_layers, + add_out_act, max_features, out_ffc, file=sys.stderr) + + 4 3 64 3 18 + reflect + + ReLU(inplace=True) + None sigmoid 1024 False + """ + init_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False} + downsample_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False} + resnet_conv_kwargs = { + "ratio_gin": 0.75, + "ratio_gout": 0.75, + "enable_lfu": False, + } + spatial_transform_kwargs = {} + out_ffc_kwargs = {} + + model = [ + nn.ReflectionPad2d(3), + FFC_BN_ACT( + input_nc, + ngf, + kernel_size=7, + padding=0, + norm_layer=norm_layer, + activation_layer=activation_layer, + **init_conv_kwargs, + ), + ] + + ### downsample + for i in range(n_downsampling): + mult = 2**i + if i == n_downsampling - 1: + cur_conv_kwargs = dict(downsample_conv_kwargs) + cur_conv_kwargs["ratio_gout"] = resnet_conv_kwargs.get("ratio_gin", 0) + else: + cur_conv_kwargs = downsample_conv_kwargs + model += [ + FFC_BN_ACT( + min(max_features, ngf * mult), + min(max_features, ngf * mult * 2), + kernel_size=3, + stride=2, + padding=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + **cur_conv_kwargs, + ) + ] + + mult = 2**n_downsampling + feats_num_bottleneck = min(max_features, ngf * mult) + + ### resnet blocks + for i in range(n_blocks): + cur_resblock = FFCResnetBlock( + feats_num_bottleneck, + padding_type=padding_type, + activation_layer=activation_layer, + norm_layer=norm_layer, + **resnet_conv_kwargs, + ) + if spatial_transform_layers is not None and i in spatial_transform_layers: + cur_resblock = LearnableSpatialTransformWrapper( + cur_resblock, **spatial_transform_kwargs + ) + model += [cur_resblock] + + model += [ConcatTupleLayer()] + + ### upsample + for i in range(n_downsampling): + mult = 2 ** (n_downsampling - i) + model += [ + nn.ConvTranspose2d( + min(max_features, ngf * mult), + min(max_features, int(ngf * mult / 2)), + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + ), + up_norm_layer(min(max_features, int(ngf * mult / 2))), + up_activation, + ] + + if out_ffc: + model += [ + FFCResnetBlock( + ngf, + padding_type=padding_type, + activation_layer=activation_layer, + norm_layer=norm_layer, + inline=True, + **out_ffc_kwargs, + ) + ] + + model += [ + nn.ReflectionPad2d(3), + nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), + ] + model.append(nn.Sigmoid()) + self.model = nn.Sequential(*model) + + def forward(self, image, mask): + return self.model(torch.cat([image, mask], dim=1)) + + +class LaMa(nn.Module): + def __init__(self, state_dict) -> None: + super(LaMa, self).__init__() + self.model_arch = "LaMa" + self.sub_type = "Inpaint" + self.in_nc = 4 + self.out_nc = 3 + self.scale = 1 + + self.min_size = None + self.pad_mod = 8 + self.pad_to_square = False + + self.model = FFCResNetGenerator(self.in_nc, self.out_nc) + self.state = { + k.replace("generator.model", "model.model"): v + for k, v in state_dict.items() + } + + self.supports_fp16 = False + self.support_bf16 = True + + self.load_state_dict(self.state, strict=False) + + def forward(self, img, mask): + masked_img = img * (1 - mask) + inpainted_mask = mask * self.model.forward(masked_img, mask) + result = inpainted_mask + (1 - mask) * img + return result diff --git a/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py b/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d52aa1e063d274b7aec7bd1ace77b19eb2ca61 --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py @@ -0,0 +1,110 @@ +import math + +import torch.nn as nn + + +class CA_layer(nn.Module): + def __init__(self, channel, reduction=16): + super(CA_layer, self).__init__() + # global average pooling + self.gap = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), + nn.GELU(), + nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), + # nn.Sigmoid() + ) + + def forward(self, x): + y = self.fc(self.gap(x)) + return x * y.expand_as(x) + + +class Simple_CA_layer(nn.Module): + def __init__(self, channel): + super(Simple_CA_layer, self).__init__() + self.gap = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d( + in_channels=channel, + out_channels=channel, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias=True, + ) + + def forward(self, x): + return x * self.fc(self.gap(x)) + + +class ECA_layer(nn.Module): + """Constructs a ECA module. + Args: + channel: Number of channels of the input feature map + k_size: Adaptive selection of kernel size + """ + + def __init__(self, channel): + super(ECA_layer, self).__init__() + + b = 1 + gamma = 2 + k_size = int(abs(math.log(channel, 2) + b) / gamma) + k_size = k_size if k_size % 2 else k_size + 1 + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv = nn.Conv1d( + 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False + ) + # self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # x: input features with shape [b, c, h, w] + # b, c, h, w = x.size() + + # feature descriptor on the global spatial information + y = self.avg_pool(x) + + # Two different branches of ECA module + y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) + + # Multi-scale information fusion + # y = self.sigmoid(y) + + return x * y.expand_as(x) + + +class ECA_MaxPool_layer(nn.Module): + """Constructs a ECA module. + Args: + channel: Number of channels of the input feature map + k_size: Adaptive selection of kernel size + """ + + def __init__(self, channel): + super(ECA_MaxPool_layer, self).__init__() + + b = 1 + gamma = 2 + k_size = int(abs(math.log(channel, 2) + b) / gamma) + k_size = k_size if k_size % 2 else k_size + 1 + self.max_pool = nn.AdaptiveMaxPool2d(1) + self.conv = nn.Conv1d( + 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False + ) + # self.sigmoid = nn.Sigmoid() + + def forward(self, x): + # x: input features with shape [b, c, h, w] + # b, c, h, w = x.size() + + # feature descriptor on the global spatial information + y = self.max_pool(x) + + # Two different branches of ECA module + y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) + + # Multi-scale information fusion + # y = self.sigmoid(y) + + return x * y.expand_as(x) diff --git a/ldm_patched/pfn/architecture/OmniSR/LICENSE b/ldm_patched/pfn/architecture/OmniSR/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ldm_patched/pfn/architecture/OmniSR/OSA.py b/ldm_patched/pfn/architecture/OmniSR/OSA.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a129696b254b022fa6fc54dc85befcc19ffc2c --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/OSA.py @@ -0,0 +1,577 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: OSA.py +# Created Date: Tuesday April 28th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Sunday, 23rd April 2023 3:07:42 pm +# Modified By: Chen Xuanhong +# Copyright (c) 2020 Shanghai Jiao Tong University +############################################################# + +import torch +import torch.nn.functional as F +from einops import rearrange, repeat +from einops.layers.torch import Rearrange, Reduce +from torch import einsum, nn + +from .layernorm import LayerNorm2d + +# helpers + + +def exists(val): + return val is not None + + +def default(val, d): + return val if exists(val) else d + + +def cast_tuple(val, length=1): + return val if isinstance(val, tuple) else ((val,) * length) + + +# helper classes + + +class PreNormResidual(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x): + return self.fn(self.norm(x)) + x + + +class Conv_PreNormResidual(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = LayerNorm2d(dim) + self.fn = fn + + def forward(self, x): + return self.fn(self.norm(x)) + x + + +class FeedForward(nn.Module): + def __init__(self, dim, mult=2, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + self.net = nn.Sequential( + nn.Linear(dim, inner_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(inner_dim, dim), + nn.Dropout(dropout), + ) + + def forward(self, x): + return self.net(x) + + +class Conv_FeedForward(nn.Module): + def __init__(self, dim, mult=2, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + self.net = nn.Sequential( + nn.Conv2d(dim, inner_dim, 1, 1, 0), + nn.GELU(), + nn.Dropout(dropout), + nn.Conv2d(inner_dim, dim, 1, 1, 0), + nn.Dropout(dropout), + ) + + def forward(self, x): + return self.net(x) + + +class Gated_Conv_FeedForward(nn.Module): + def __init__(self, dim, mult=1, bias=False, dropout=0.0): + super().__init__() + + hidden_features = int(dim * mult) + + self.project_in = nn.Conv2d(dim, hidden_features * 2, kernel_size=1, bias=bias) + + self.dwconv = nn.Conv2d( + hidden_features * 2, + hidden_features * 2, + kernel_size=3, + stride=1, + padding=1, + groups=hidden_features * 2, + bias=bias, + ) + + self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1, bias=bias) + + def forward(self, x): + x = self.project_in(x) + x1, x2 = self.dwconv(x).chunk(2, dim=1) + x = F.gelu(x1) * x2 + x = self.project_out(x) + return x + + +# MBConv + + +class SqueezeExcitation(nn.Module): + def __init__(self, dim, shrinkage_rate=0.25): + super().__init__() + hidden_dim = int(dim * shrinkage_rate) + + self.gate = nn.Sequential( + Reduce("b c h w -> b c", "mean"), + nn.Linear(dim, hidden_dim, bias=False), + nn.SiLU(), + nn.Linear(hidden_dim, dim, bias=False), + nn.Sigmoid(), + Rearrange("b c -> b c 1 1"), + ) + + def forward(self, x): + return x * self.gate(x) + + +class MBConvResidual(nn.Module): + def __init__(self, fn, dropout=0.0): + super().__init__() + self.fn = fn + self.dropsample = Dropsample(dropout) + + def forward(self, x): + out = self.fn(x) + out = self.dropsample(out) + return out + x + + +class Dropsample(nn.Module): + def __init__(self, prob=0): + super().__init__() + self.prob = prob + + def forward(self, x): + device = x.device + + if self.prob == 0.0 or (not self.training): + return x + + keep_mask = ( + torch.FloatTensor((x.shape[0], 1, 1, 1), device=device).uniform_() + > self.prob + ) + return x * keep_mask / (1 - self.prob) + + +def MBConv( + dim_in, dim_out, *, downsample, expansion_rate=4, shrinkage_rate=0.25, dropout=0.0 +): + hidden_dim = int(expansion_rate * dim_out) + stride = 2 if downsample else 1 + + net = nn.Sequential( + nn.Conv2d(dim_in, hidden_dim, 1), + # nn.BatchNorm2d(hidden_dim), + nn.GELU(), + nn.Conv2d( + hidden_dim, hidden_dim, 3, stride=stride, padding=1, groups=hidden_dim + ), + # nn.BatchNorm2d(hidden_dim), + nn.GELU(), + SqueezeExcitation(hidden_dim, shrinkage_rate=shrinkage_rate), + nn.Conv2d(hidden_dim, dim_out, 1), + # nn.BatchNorm2d(dim_out) + ) + + if dim_in == dim_out and not downsample: + net = MBConvResidual(net, dropout=dropout) + + return net + + +# attention related classes +class Attention(nn.Module): + def __init__( + self, + dim, + dim_head=32, + dropout=0.0, + window_size=7, + with_pe=True, + ): + super().__init__() + assert ( + dim % dim_head + ) == 0, "dimension should be divisible by dimension per head" + + self.heads = dim // dim_head + self.scale = dim_head**-0.5 + self.with_pe = with_pe + + self.to_qkv = nn.Linear(dim, dim * 3, bias=False) + + self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout)) + + self.to_out = nn.Sequential( + nn.Linear(dim, dim, bias=False), nn.Dropout(dropout) + ) + + # relative positional bias + if self.with_pe: + self.rel_pos_bias = nn.Embedding((2 * window_size - 1) ** 2, self.heads) + + pos = torch.arange(window_size) + grid = torch.stack(torch.meshgrid(pos, pos)) + grid = rearrange(grid, "c i j -> (i j) c") + rel_pos = rearrange(grid, "i ... -> i 1 ...") - rearrange( + grid, "j ... -> 1 j ..." + ) + rel_pos += window_size - 1 + rel_pos_indices = (rel_pos * torch.tensor([2 * window_size - 1, 1])).sum( + dim=-1 + ) + + self.register_buffer("rel_pos_indices", rel_pos_indices, persistent=False) + + def forward(self, x): + batch, height, width, window_height, window_width, _, device, h = ( + *x.shape, + x.device, + self.heads, + ) + + # flatten + + x = rearrange(x, "b x y w1 w2 d -> (b x y) (w1 w2) d") + + # project for queries, keys, values + + q, k, v = self.to_qkv(x).chunk(3, dim=-1) + + # split heads + + q, k, v = map(lambda t: rearrange(t, "b n (h d ) -> b h n d", h=h), (q, k, v)) + + # scale + + q = q * self.scale + + # sim + + sim = einsum("b h i d, b h j d -> b h i j", q, k) + + # add positional bias + if self.with_pe: + bias = self.rel_pos_bias(self.rel_pos_indices) + sim = sim + rearrange(bias, "i j h -> h i j") + + # attention + + attn = self.attend(sim) + + # aggregate + + out = einsum("b h i j, b h j d -> b h i d", attn, v) + + # merge heads + + out = rearrange( + out, "b h (w1 w2) d -> b w1 w2 (h d)", w1=window_height, w2=window_width + ) + + # combine heads out + + out = self.to_out(out) + return rearrange(out, "(b x y) ... -> b x y ...", x=height, y=width) + + +class Block_Attention(nn.Module): + def __init__( + self, + dim, + dim_head=32, + bias=False, + dropout=0.0, + window_size=7, + with_pe=True, + ): + super().__init__() + assert ( + dim % dim_head + ) == 0, "dimension should be divisible by dimension per head" + + self.heads = dim // dim_head + self.ps = window_size + self.scale = dim_head**-0.5 + self.with_pe = with_pe + + self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias) + self.qkv_dwconv = nn.Conv2d( + dim * 3, + dim * 3, + kernel_size=3, + stride=1, + padding=1, + groups=dim * 3, + bias=bias, + ) + + self.attend = nn.Sequential(nn.Softmax(dim=-1), nn.Dropout(dropout)) + + self.to_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias) + + def forward(self, x): + # project for queries, keys, values + b, c, h, w = x.shape + + qkv = self.qkv_dwconv(self.qkv(x)) + q, k, v = qkv.chunk(3, dim=1) + + # split heads + + q, k, v = map( + lambda t: rearrange( + t, + "b (h d) (x w1) (y w2) -> (b x y) h (w1 w2) d", + h=self.heads, + w1=self.ps, + w2=self.ps, + ), + (q, k, v), + ) + + # scale + + q = q * self.scale + + # sim + + sim = einsum("b h i d, b h j d -> b h i j", q, k) + + # attention + attn = self.attend(sim) + + # aggregate + + out = einsum("b h i j, b h j d -> b h i d", attn, v) + + # merge heads + out = rearrange( + out, + "(b x y) head (w1 w2) d -> b (head d) (x w1) (y w2)", + x=h // self.ps, + y=w // self.ps, + head=self.heads, + w1=self.ps, + w2=self.ps, + ) + + out = self.to_out(out) + return out + + +class Channel_Attention(nn.Module): + def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7): + super(Channel_Attention, self).__init__() + self.heads = heads + + self.temperature = nn.Parameter(torch.ones(heads, 1, 1)) + + self.ps = window_size + + self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias) + self.qkv_dwconv = nn.Conv2d( + dim * 3, + dim * 3, + kernel_size=3, + stride=1, + padding=1, + groups=dim * 3, + bias=bias, + ) + self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias) + + def forward(self, x): + b, c, h, w = x.shape + + qkv = self.qkv_dwconv(self.qkv(x)) + qkv = qkv.chunk(3, dim=1) + + q, k, v = map( + lambda t: rearrange( + t, + "b (head d) (h ph) (w pw) -> b (h w) head d (ph pw)", + ph=self.ps, + pw=self.ps, + head=self.heads, + ), + qkv, + ) + + q = F.normalize(q, dim=-1) + k = F.normalize(k, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.temperature + attn = attn.softmax(dim=-1) + out = attn @ v + + out = rearrange( + out, + "b (h w) head d (ph pw) -> b (head d) (h ph) (w pw)", + h=h // self.ps, + w=w // self.ps, + ph=self.ps, + pw=self.ps, + head=self.heads, + ) + + out = self.project_out(out) + + return out + + +class Channel_Attention_grid(nn.Module): + def __init__(self, dim, heads, bias=False, dropout=0.0, window_size=7): + super(Channel_Attention_grid, self).__init__() + self.heads = heads + + self.temperature = nn.Parameter(torch.ones(heads, 1, 1)) + + self.ps = window_size + + self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias) + self.qkv_dwconv = nn.Conv2d( + dim * 3, + dim * 3, + kernel_size=3, + stride=1, + padding=1, + groups=dim * 3, + bias=bias, + ) + self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias) + + def forward(self, x): + b, c, h, w = x.shape + + qkv = self.qkv_dwconv(self.qkv(x)) + qkv = qkv.chunk(3, dim=1) + + q, k, v = map( + lambda t: rearrange( + t, + "b (head d) (h ph) (w pw) -> b (ph pw) head d (h w)", + ph=self.ps, + pw=self.ps, + head=self.heads, + ), + qkv, + ) + + q = F.normalize(q, dim=-1) + k = F.normalize(k, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.temperature + attn = attn.softmax(dim=-1) + out = attn @ v + + out = rearrange( + out, + "b (ph pw) head d (h w) -> b (head d) (h ph) (w pw)", + h=h // self.ps, + w=w // self.ps, + ph=self.ps, + pw=self.ps, + head=self.heads, + ) + + out = self.project_out(out) + + return out + + +class OSA_Block(nn.Module): + def __init__( + self, + channel_num=64, + bias=True, + ffn_bias=True, + window_size=8, + with_pe=False, + dropout=0.0, + ): + super(OSA_Block, self).__init__() + + w = window_size + + self.layer = nn.Sequential( + MBConv( + channel_num, + channel_num, + downsample=False, + expansion_rate=1, + shrinkage_rate=0.25, + ), + Rearrange( + "b d (x w1) (y w2) -> b x y w1 w2 d", w1=w, w2=w + ), # block-like attention + PreNormResidual( + channel_num, + Attention( + dim=channel_num, + dim_head=channel_num // 4, + dropout=dropout, + window_size=window_size, + with_pe=with_pe, + ), + ), + Rearrange("b x y w1 w2 d -> b d (x w1) (y w2)"), + Conv_PreNormResidual( + channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout) + ), + # channel-like attention + Conv_PreNormResidual( + channel_num, + Channel_Attention( + dim=channel_num, heads=4, dropout=dropout, window_size=window_size + ), + ), + Conv_PreNormResidual( + channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout) + ), + Rearrange( + "b d (w1 x) (w2 y) -> b x y w1 w2 d", w1=w, w2=w + ), # grid-like attention + PreNormResidual( + channel_num, + Attention( + dim=channel_num, + dim_head=channel_num // 4, + dropout=dropout, + window_size=window_size, + with_pe=with_pe, + ), + ), + Rearrange("b x y w1 w2 d -> b d (w1 x) (w2 y)"), + Conv_PreNormResidual( + channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout) + ), + # channel-like attention + Conv_PreNormResidual( + channel_num, + Channel_Attention_grid( + dim=channel_num, heads=4, dropout=dropout, window_size=window_size + ), + ), + Conv_PreNormResidual( + channel_num, Gated_Conv_FeedForward(dim=channel_num, dropout=dropout) + ), + ) + + def forward(self, x): + out = self.layer(x) + return out diff --git a/ldm_patched/pfn/architecture/OmniSR/OSAG.py b/ldm_patched/pfn/architecture/OmniSR/OSAG.py new file mode 100644 index 0000000000000000000000000000000000000000..477e81f9da4eb1db9b5ec418549d75dd591209ec --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/OSAG.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: OSAG.py +# Created Date: Tuesday April 28th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Sunday, 23rd April 2023 3:08:49 pm +# Modified By: Chen Xuanhong +# Copyright (c) 2020 Shanghai Jiao Tong University +############################################################# + + +import torch.nn as nn + +from .esa import ESA +from .OSA import OSA_Block + + +class OSAG(nn.Module): + def __init__( + self, + channel_num=64, + bias=True, + block_num=4, + ffn_bias=False, + window_size=0, + pe=False, + ): + super(OSAG, self).__init__() + + # print("window_size: %d" % (window_size)) + # print("with_pe", pe) + # print("ffn_bias: %d" % (ffn_bias)) + + # block_script_name = kwargs.get("block_script_name", "OSA") + # block_class_name = kwargs.get("block_class_name", "OSA_Block") + + # script_name = "." + block_script_name + # package = __import__(script_name, fromlist=True) + block_class = OSA_Block # getattr(package, block_class_name) + group_list = [] + for _ in range(block_num): + temp_res = block_class( + channel_num, + bias, + ffn_bias=ffn_bias, + window_size=window_size, + with_pe=pe, + ) + group_list.append(temp_res) + group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) + self.residual_layer = nn.Sequential(*group_list) + esa_channel = max(channel_num // 4, 16) + self.esa = ESA(esa_channel, channel_num) + + def forward(self, x): + out = self.residual_layer(x) + out = out + x + return self.esa(out) diff --git a/ldm_patched/pfn/architecture/OmniSR/OmniSR.py b/ldm_patched/pfn/architecture/OmniSR/OmniSR.py new file mode 100644 index 0000000000000000000000000000000000000000..1e1c3f35e657fb972d4209456719a61163831385 --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/OmniSR.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: OmniSR.py +# Created Date: Tuesday April 28th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Sunday, 23rd April 2023 3:06:36 pm +# Modified By: Chen Xuanhong +# Copyright (c) 2020 Shanghai Jiao Tong University +############################################################# + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .OSAG import OSAG +from .pixelshuffle import pixelshuffle_block + + +class OmniSR(nn.Module): + def __init__( + self, + state_dict, + **kwargs, + ): + super(OmniSR, self).__init__() + self.state = state_dict + + bias = True # Fine to assume this for now + block_num = 1 # Fine to assume this for now + ffn_bias = True + pe = True + + num_feat = state_dict["input.weight"].shape[0] or 64 + num_in_ch = state_dict["input.weight"].shape[1] or 3 + num_out_ch = num_in_ch # we can just assume this for now. pixelshuffle smh + + pixelshuffle_shape = state_dict["up.0.weight"].shape[0] + up_scale = math.sqrt(pixelshuffle_shape / num_out_ch) + if up_scale - int(up_scale) > 0: + print( + "out_nc is probably different than in_nc, scale calculation might be wrong" + ) + up_scale = int(up_scale) + res_num = 0 + for key in state_dict.keys(): + if "residual_layer" in key: + temp_res_num = int(key.split(".")[1]) + if temp_res_num > res_num: + res_num = temp_res_num + res_num = res_num + 1 # zero-indexed + + residual_layer = [] + self.res_num = res_num + + if ( + "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" + in state_dict.keys() + ): + rel_pos_bias_weight = state_dict[ + "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" + ].shape[0] + self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2) + else: + self.window_size = 8 + + self.up_scale = up_scale + + for _ in range(res_num): + temp_res = OSAG( + channel_num=num_feat, + bias=bias, + block_num=block_num, + ffn_bias=ffn_bias, + window_size=self.window_size, + pe=pe, + ) + residual_layer.append(temp_res) + self.residual_layer = nn.Sequential(*residual_layer) + self.input = nn.Conv2d( + in_channels=num_in_ch, + out_channels=num_feat, + kernel_size=3, + stride=1, + padding=1, + bias=bias, + ) + self.output = nn.Conv2d( + in_channels=num_feat, + out_channels=num_feat, + kernel_size=3, + stride=1, + padding=1, + bias=bias, + ) + self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias) + + # self.tail = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias) + + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, sqrt(2. / n)) + + # chaiNNer specific stuff + self.model_arch = "OmniSR" + self.sub_type = "SR" + self.in_nc = num_in_ch + self.out_nc = num_out_ch + self.num_feat = num_feat + self.scale = up_scale + + self.supports_fp16 = True # TODO: Test this + self.supports_bfp16 = True + self.min_size_restriction = 16 + + self.load_state_dict(state_dict, strict=False) + + def check_image_size(self, x): + _, _, h, w = x.size() + # import pdb; pdb.set_trace() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0) + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + residual = self.input(x) + out = self.residual_layer(residual) + + # origin + out = torch.add(self.output(out), residual) + out = self.up(out) + + out = out[:, :, : H * self.up_scale, : W * self.up_scale] + return out diff --git a/ldm_patched/pfn/architecture/OmniSR/esa.py b/ldm_patched/pfn/architecture/OmniSR/esa.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ce7f7a60bfe20b3737eaa2e3110fd460a2d104 --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/esa.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: esa.py +# Created Date: Tuesday April 28th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 20th April 2023 9:28:06 am +# Modified By: Chen Xuanhong +# Copyright (c) 2020 Shanghai Jiao Tong University +############################################################# + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .layernorm import LayerNorm2d + + +def moment(x, dim=(2, 3), k=2): + assert len(x.size()) == 4 + mean = torch.mean(x, dim=dim).unsqueeze(-1).unsqueeze(-1) + mk = (1 / (x.size(2) * x.size(3))) * torch.sum(torch.pow(x - mean, k), dim=dim) + return mk + + +class ESA(nn.Module): + """ + Modification of Enhanced Spatial Attention (ESA), which is proposed by + `Residual Feature Aggregation Network for Image Super-Resolution` + Note: `conv_max` and `conv3_` are NOT used here, so the corresponding codes + are deleted. + """ + + def __init__(self, esa_channels, n_feats, conv=nn.Conv2d): + super(ESA, self).__init__() + f = esa_channels + self.conv1 = conv(n_feats, f, kernel_size=1) + self.conv_f = conv(f, f, kernel_size=1) + self.conv2 = conv(f, f, kernel_size=3, stride=2, padding=0) + self.conv3 = conv(f, f, kernel_size=3, padding=1) + self.conv4 = conv(f, n_feats, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + c1_ = self.conv1(x) + c1 = self.conv2(c1_) + v_max = F.max_pool2d(c1, kernel_size=7, stride=3) + c3 = self.conv3(v_max) + c3 = F.interpolate( + c3, (x.size(2), x.size(3)), mode="bilinear", align_corners=False + ) + cf = self.conv_f(c1_) + c4 = self.conv4(c3 + cf) + m = self.sigmoid(c4) + return x * m + + +class LK_ESA(nn.Module): + def __init__( + self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True + ): + super(LK_ESA, self).__init__() + f = esa_channels + self.conv1 = conv(n_feats, f, kernel_size=1) + self.conv_f = conv(f, f, kernel_size=1) + + kernel_size = 17 + kernel_expand = kernel_expand + padding = kernel_size // 2 + + self.vec_conv = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(1, kernel_size), + padding=(0, padding), + groups=2, + bias=bias, + ) + self.vec_conv3x1 = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(1, 3), + padding=(0, 1), + groups=2, + bias=bias, + ) + + self.hor_conv = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(kernel_size, 1), + padding=(padding, 0), + groups=2, + bias=bias, + ) + self.hor_conv1x3 = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(3, 1), + padding=(1, 0), + groups=2, + bias=bias, + ) + + self.conv4 = conv(f, n_feats, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + c1_ = self.conv1(x) + + res = self.vec_conv(c1_) + self.vec_conv3x1(c1_) + res = self.hor_conv(res) + self.hor_conv1x3(res) + + cf = self.conv_f(c1_) + c4 = self.conv4(res + cf) + m = self.sigmoid(c4) + return x * m + + +class LK_ESA_LN(nn.Module): + def __init__( + self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True + ): + super(LK_ESA_LN, self).__init__() + f = esa_channels + self.conv1 = conv(n_feats, f, kernel_size=1) + self.conv_f = conv(f, f, kernel_size=1) + + kernel_size = 17 + kernel_expand = kernel_expand + padding = kernel_size // 2 + + self.norm = LayerNorm2d(n_feats) + + self.vec_conv = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(1, kernel_size), + padding=(0, padding), + groups=2, + bias=bias, + ) + self.vec_conv3x1 = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(1, 3), + padding=(0, 1), + groups=2, + bias=bias, + ) + + self.hor_conv = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(kernel_size, 1), + padding=(padding, 0), + groups=2, + bias=bias, + ) + self.hor_conv1x3 = nn.Conv2d( + in_channels=f * kernel_expand, + out_channels=f * kernel_expand, + kernel_size=(3, 1), + padding=(1, 0), + groups=2, + bias=bias, + ) + + self.conv4 = conv(f, n_feats, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + c1_ = self.norm(x) + c1_ = self.conv1(c1_) + + res = self.vec_conv(c1_) + self.vec_conv3x1(c1_) + res = self.hor_conv(res) + self.hor_conv1x3(res) + + cf = self.conv_f(c1_) + c4 = self.conv4(res + cf) + m = self.sigmoid(c4) + return x * m + + +class AdaGuidedFilter(nn.Module): + def __init__( + self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True + ): + super(AdaGuidedFilter, self).__init__() + + self.gap = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d( + in_channels=n_feats, + out_channels=1, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias=True, + ) + + self.r = 5 + + def box_filter(self, x, r): + channel = x.shape[1] + kernel_size = 2 * r + 1 + weight = 1.0 / (kernel_size**2) + box_kernel = weight * torch.ones( + (channel, 1, kernel_size, kernel_size), dtype=torch.float32, device=x.device + ) + output = F.conv2d(x, weight=box_kernel, stride=1, padding=r, groups=channel) + return output + + def forward(self, x): + _, _, H, W = x.shape + N = self.box_filter( + torch.ones((1, 1, H, W), dtype=x.dtype, device=x.device), self.r + ) + + # epsilon = self.fc(self.gap(x)) + # epsilon = torch.pow(epsilon, 2) + epsilon = 1e-2 + + mean_x = self.box_filter(x, self.r) / N + var_x = self.box_filter(x * x, self.r) / N - mean_x * mean_x + + A = var_x / (var_x + epsilon) + b = (1 - A) * mean_x + m = A * x + b + + # mean_A = self.box_filter(A, self.r) / N + # mean_b = self.box_filter(b, self.r) / N + # m = mean_A * x + mean_b + return x * m + + +class AdaConvGuidedFilter(nn.Module): + def __init__( + self, esa_channels, n_feats, conv=nn.Conv2d, kernel_expand=1, bias=True + ): + super(AdaConvGuidedFilter, self).__init__() + f = esa_channels + + self.conv_f = conv(f, f, kernel_size=1) + + kernel_size = 17 + kernel_expand = kernel_expand + padding = kernel_size // 2 + + self.vec_conv = nn.Conv2d( + in_channels=f, + out_channels=f, + kernel_size=(1, kernel_size), + padding=(0, padding), + groups=f, + bias=bias, + ) + + self.hor_conv = nn.Conv2d( + in_channels=f, + out_channels=f, + kernel_size=(kernel_size, 1), + padding=(padding, 0), + groups=f, + bias=bias, + ) + + self.gap = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d( + in_channels=f, + out_channels=f, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias=True, + ) + + def forward(self, x): + y = self.vec_conv(x) + y = self.hor_conv(y) + + sigma = torch.pow(y, 2) + epsilon = self.fc(self.gap(y)) + + weight = sigma / (sigma + epsilon) + + m = weight * x + (1 - weight) + + return x * m diff --git a/ldm_patched/pfn/architecture/OmniSR/layernorm.py b/ldm_patched/pfn/architecture/OmniSR/layernorm.py new file mode 100644 index 0000000000000000000000000000000000000000..731a25f7542d45757a284648055d7c6ffad4c3fd --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/layernorm.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: layernorm.py +# Created Date: Tuesday April 28th 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Thursday, 20th April 2023 9:28:20 am +# Modified By: Chen Xuanhong +# Copyright (c) 2020 Shanghai Jiao Tong University +############################################################# + +import torch +import torch.nn as nn + + +class LayerNormFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, x, weight, bias, eps): + ctx.eps = eps + N, C, H, W = x.size() + mu = x.mean(1, keepdim=True) + var = (x - mu).pow(2).mean(1, keepdim=True) + y = (x - mu) / (var + eps).sqrt() + ctx.save_for_backward(y, var, weight) + y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) + return y + + @staticmethod + def backward(ctx, grad_output): + eps = ctx.eps + + N, C, H, W = grad_output.size() + y, var, weight = ctx.saved_variables + g = grad_output * weight.view(1, C, 1, 1) + mean_g = g.mean(dim=1, keepdim=True) + + mean_gy = (g * y).mean(dim=1, keepdim=True) + gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) + return ( + gx, + (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), + grad_output.sum(dim=3).sum(dim=2).sum(dim=0), + None, + ) + + +class LayerNorm2d(nn.Module): + def __init__(self, channels, eps=1e-6): + super(LayerNorm2d, self).__init__() + self.register_parameter("weight", nn.Parameter(torch.ones(channels))) + self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) + self.eps = eps + + def forward(self, x): + return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) + + +class GRN(nn.Module): + """GRN (Global Response Normalization) layer""" + + def __init__(self, dim): + super().__init__() + self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) + self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) + + def forward(self, x): + Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) + Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) + return self.gamma * (x * Nx) + self.beta + x diff --git a/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py b/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py new file mode 100644 index 0000000000000000000000000000000000000000..4260fb7c9d8d912e34899ce7877595b617f9bb02 --- /dev/null +++ b/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################# +# File: pixelshuffle.py +# Created Date: Friday July 1st 2022 +# Author: Chen Xuanhong +# Email: chenxuanhongzju@outlook.com +# Last Modified: Friday, 1st July 2022 10:18:39 am +# Modified By: Chen Xuanhong +# Copyright (c) 2022 Shanghai Jiao Tong University +############################################################# + +import torch.nn as nn + + +def pixelshuffle_block( + in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False +): + """ + Upsample features according to `upscale_factor`. + """ + padding = kernel_size // 2 + conv = nn.Conv2d( + in_channels, + out_channels * (upscale_factor**2), + kernel_size, + padding=1, + bias=bias, + ) + pixel_shuffle = nn.PixelShuffle(upscale_factor) + return nn.Sequential(*[conv, pixel_shuffle]) diff --git a/ldm_patched/pfn/architecture/RRDB.py b/ldm_patched/pfn/architecture/RRDB.py new file mode 100644 index 0000000000000000000000000000000000000000..b50db7c24a8e6edc9154168a3d807c9219cb8cea --- /dev/null +++ b/ldm_patched/pfn/architecture/RRDB.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import functools +import math +import re +from collections import OrderedDict + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import block as B + + +# Borrowed from https://github.com/rlaphoenix/VSGAN/blob/master/vsgan/archs/ESRGAN.py +# Which enhanced stuff that was already here +class RRDBNet(nn.Module): + def __init__( + self, + state_dict, + norm=None, + act: str = "leakyrelu", + upsampler: str = "upconv", + mode: B.ConvMode = "CNA", + ) -> None: + """ + ESRGAN - Enhanced Super-Resolution Generative Adversarial Networks. + By Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao, + and Chen Change Loy. + This is old-arch Residual in Residual Dense Block Network and is not + the newest revision that's available at github.com/xinntao/ESRGAN. + This is on purpose, the newest Network has severely limited the + potential use of the Network with no benefits. + This network supports model files from both new and old-arch. + Args: + norm: Normalization layer + act: Activation layer + upsampler: Upsample layer. upconv, pixel_shuffle + mode: Convolution mode + """ + super(RRDBNet, self).__init__() + self.model_arch = "ESRGAN" + self.sub_type = "SR" + + self.state = state_dict + self.norm = norm + self.act = act + self.upsampler = upsampler + self.mode = mode + + self.state_map = { + # currently supports old, new, and newer RRDBNet arch models + # ESRGAN, BSRGAN/RealSR, Real-ESRGAN + "model.0.weight": ("conv_first.weight",), + "model.0.bias": ("conv_first.bias",), + "model.1.sub./NB/.weight": ("trunk_conv.weight", "conv_body.weight"), + "model.1.sub./NB/.bias": ("trunk_conv.bias", "conv_body.bias"), + r"model.1.sub.\1.RDB\2.conv\3.0.\4": ( + r"RRDB_trunk\.(\d+)\.RDB(\d)\.conv(\d+)\.(weight|bias)", + r"body\.(\d+)\.rdb(\d)\.conv(\d+)\.(weight|bias)", + ), + } + if "params_ema" in self.state: + self.state = self.state["params_ema"] + # self.model_arch = "RealESRGAN" + self.num_blocks = self.get_num_blocks() + self.plus = any("conv1x1" in k for k in self.state.keys()) + if self.plus: + self.model_arch = "ESRGAN+" + + self.state = self.new_to_old_arch(self.state) + + self.key_arr = list(self.state.keys()) + + self.in_nc: int = self.state[self.key_arr[0]].shape[1] + self.out_nc: int = self.state[self.key_arr[-1]].shape[0] + + self.scale: int = self.get_scale() + self.num_filters: int = self.state[self.key_arr[0]].shape[0] + + c2x2 = False + if self.state["model.0.weight"].shape[-2] == 2: + c2x2 = True + self.scale = round(math.sqrt(self.scale / 4)) + self.model_arch = "ESRGAN-2c2" + + self.supports_fp16 = True + self.supports_bfp16 = True + self.min_size_restriction = None + + # Detect if pixelunshuffle was used (Real-ESRGAN) + if self.in_nc in (self.out_nc * 4, self.out_nc * 16) and self.out_nc in ( + self.in_nc / 4, + self.in_nc / 16, + ): + self.shuffle_factor = int(math.sqrt(self.in_nc / self.out_nc)) + else: + self.shuffle_factor = None + + upsample_block = { + "upconv": B.upconv_block, + "pixel_shuffle": B.pixelshuffle_block, + }.get(self.upsampler) + if upsample_block is None: + raise NotImplementedError(f"Upsample mode [{self.upsampler}] is not found") + + if self.scale == 3: + upsample_blocks = upsample_block( + in_nc=self.num_filters, + out_nc=self.num_filters, + upscale_factor=3, + act_type=self.act, + c2x2=c2x2, + ) + else: + upsample_blocks = [ + upsample_block( + in_nc=self.num_filters, + out_nc=self.num_filters, + act_type=self.act, + c2x2=c2x2, + ) + for _ in range(int(math.log(self.scale, 2))) + ] + + self.model = B.sequential( + # fea conv + B.conv_block( + in_nc=self.in_nc, + out_nc=self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + c2x2=c2x2, + ), + B.ShortcutBlock( + B.sequential( + # rrdb blocks + *[ + B.RRDB( + nf=self.num_filters, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=self.norm, + act_type=self.act, + mode="CNA", + plus=self.plus, + c2x2=c2x2, + ) + for _ in range(self.num_blocks) + ], + # lr conv + B.conv_block( + in_nc=self.num_filters, + out_nc=self.num_filters, + kernel_size=3, + norm_type=self.norm, + act_type=None, + mode=self.mode, + c2x2=c2x2, + ), + ) + ), + *upsample_blocks, + # hr_conv0 + B.conv_block( + in_nc=self.num_filters, + out_nc=self.num_filters, + kernel_size=3, + norm_type=None, + act_type=self.act, + c2x2=c2x2, + ), + # hr_conv1 + B.conv_block( + in_nc=self.num_filters, + out_nc=self.out_nc, + kernel_size=3, + norm_type=None, + act_type=None, + c2x2=c2x2, + ), + ) + + # Adjust these properties for calculations outside of the model + if self.shuffle_factor: + self.in_nc //= self.shuffle_factor**2 + self.scale //= self.shuffle_factor + + self.load_state_dict(self.state, strict=False) + + def new_to_old_arch(self, state): + """Convert a new-arch model state dictionary to an old-arch dictionary.""" + if "params_ema" in state: + state = state["params_ema"] + + if "conv_first.weight" not in state: + # model is already old arch, this is a loose check, but should be sufficient + return state + + # add nb to state keys + for kind in ("weight", "bias"): + self.state_map[f"model.1.sub.{self.num_blocks}.{kind}"] = self.state_map[ + f"model.1.sub./NB/.{kind}" + ] + del self.state_map[f"model.1.sub./NB/.{kind}"] + + old_state = OrderedDict() + for old_key, new_keys in self.state_map.items(): + for new_key in new_keys: + if r"\1" in old_key: + for k, v in state.items(): + sub = re.sub(new_key, old_key, k) + if sub != k: + old_state[sub] = v + else: + if new_key in state: + old_state[old_key] = state[new_key] + + # upconv layers + max_upconv = 0 + for key in state.keys(): + match = re.match(r"(upconv|conv_up)(\d)\.(weight|bias)", key) + if match is not None: + _, key_num, key_type = match.groups() + old_state[f"model.{int(key_num) * 3}.{key_type}"] = state[key] + max_upconv = max(max_upconv, int(key_num) * 3) + + # final layers + for key in state.keys(): + if key in ("HRconv.weight", "conv_hr.weight"): + old_state[f"model.{max_upconv + 2}.weight"] = state[key] + elif key in ("HRconv.bias", "conv_hr.bias"): + old_state[f"model.{max_upconv + 2}.bias"] = state[key] + elif key in ("conv_last.weight",): + old_state[f"model.{max_upconv + 4}.weight"] = state[key] + elif key in ("conv_last.bias",): + old_state[f"model.{max_upconv + 4}.bias"] = state[key] + + # Sort by first numeric value of each layer + def compare(item1, item2): + parts1 = item1.split(".") + parts2 = item2.split(".") + int1 = int(parts1[1]) + int2 = int(parts2[1]) + return int1 - int2 + + sorted_keys = sorted(old_state.keys(), key=functools.cmp_to_key(compare)) + + # Rebuild the output dict in the right order + out_dict = OrderedDict((k, old_state[k]) for k in sorted_keys) + + return out_dict + + def get_scale(self, min_part: int = 6) -> int: + n = 0 + for part in list(self.state): + parts = part.split(".")[1:] + if len(parts) == 2: + part_num = int(parts[0]) + if part_num > min_part and parts[1] == "weight": + n += 1 + return 2**n + + def get_num_blocks(self) -> int: + nbs = [] + state_keys = self.state_map[r"model.1.sub.\1.RDB\2.conv\3.0.\4"] + ( + r"model\.\d+\.sub\.(\d+)\.RDB(\d+)\.conv(\d+)\.0\.(weight|bias)", + ) + for state_key in state_keys: + for k in self.state: + m = re.search(state_key, k) + if m: + nbs.append(int(m.group(1))) + if nbs: + break + return max(*nbs) + 1 + + def forward(self, x): + if self.shuffle_factor: + _, _, h, w = x.size() + mod_pad_h = ( + self.shuffle_factor - h % self.shuffle_factor + ) % self.shuffle_factor + mod_pad_w = ( + self.shuffle_factor - w % self.shuffle_factor + ) % self.shuffle_factor + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect") + x = torch.pixel_unshuffle(x, downscale_factor=self.shuffle_factor) + x = self.model(x) + return x[:, :, : h * self.scale, : w * self.scale] + return self.model(x) diff --git a/ldm_patched/pfn/architecture/SCUNet.py b/ldm_patched/pfn/architecture/SCUNet.py new file mode 100644 index 0000000000000000000000000000000000000000..b8354a873085140e9ff7d582c43ba9818ed9524e --- /dev/null +++ b/ldm_patched/pfn/architecture/SCUNet.py @@ -0,0 +1,455 @@ +# pylint: skip-file +# ----------------------------------------------------------------------------------- +# SCUNet: Practical Blind Denoising via Swin-Conv-UNet and Data Synthesis, https://arxiv.org/abs/2203.13278 +# Zhang, Kai and Li, Yawei and Liang, Jingyun and Cao, Jiezhang and Zhang, Yulun and Tang, Hao and Timofte, Radu and Van Gool, Luc +# ----------------------------------------------------------------------------------- + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange +from einops.layers.torch import Rearrange + +from .timm.drop import DropPath +from .timm.weight_init import trunc_normal_ + + +# Borrowed from https://github.com/cszn/SCUNet/blob/main/models/network_scunet.py +class WMSA(nn.Module): + """Self-attention module in Swin Transformer""" + + def __init__(self, input_dim, output_dim, head_dim, window_size, type): + super(WMSA, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.head_dim = head_dim + self.scale = self.head_dim**-0.5 + self.n_heads = input_dim // head_dim + self.window_size = window_size + self.type = type + self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True) + + self.relative_position_params = nn.Parameter( + torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads) + ) + # TODO recover + # self.relative_position_params = nn.Parameter(torch.zeros(self.n_heads, 2 * window_size - 1, 2 * window_size -1)) + self.relative_position_params = nn.Parameter( + torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads) + ) + + self.linear = nn.Linear(self.input_dim, self.output_dim) + + trunc_normal_(self.relative_position_params, std=0.02) + self.relative_position_params = torch.nn.Parameter( + self.relative_position_params.view( + 2 * window_size - 1, 2 * window_size - 1, self.n_heads + ) + .transpose(1, 2) + .transpose(0, 1) + ) + + def generate_mask(self, h, w, p, shift): + """generating the mask of SW-MSA + Args: + shift: shift parameters in CyclicShift. + Returns: + attn_mask: should be (1 1 w p p), + """ + # supporting square. + attn_mask = torch.zeros( + h, + w, + p, + p, + p, + p, + dtype=torch.bool, + device=self.relative_position_params.device, + ) + if self.type == "W": + return attn_mask + + s = p - shift + attn_mask[-1, :, :s, :, s:, :] = True + attn_mask[-1, :, s:, :, :s, :] = True + attn_mask[:, -1, :, :s, :, s:] = True + attn_mask[:, -1, :, s:, :, :s] = True + attn_mask = rearrange( + attn_mask, "w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)" + ) + return attn_mask + + def forward(self, x): + """Forward pass of Window Multi-head Self-attention module. + Args: + x: input tensor with shape of [b h w c]; + attn_mask: attention mask, fill -inf where the value is True; + Returns: + output: tensor shape [b h w c] + """ + if self.type != "W": + x = torch.roll( + x, + shifts=(-(self.window_size // 2), -(self.window_size // 2)), + dims=(1, 2), + ) + + x = rearrange( + x, + "b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c", + p1=self.window_size, + p2=self.window_size, + ) + h_windows = x.size(1) + w_windows = x.size(2) + # square validation + # assert h_windows == w_windows + + x = rearrange( + x, + "b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c", + p1=self.window_size, + p2=self.window_size, + ) + qkv = self.embedding_layer(x) + q, k, v = rearrange( + qkv, "b nw np (threeh c) -> threeh b nw np c", c=self.head_dim + ).chunk(3, dim=0) + sim = torch.einsum("hbwpc,hbwqc->hbwpq", q, k) * self.scale + # Adding learnable relative embedding + sim = sim + rearrange(self.relative_embedding(), "h p q -> h 1 1 p q") + # Using Attn Mask to distinguish different subwindows. + if self.type != "W": + attn_mask = self.generate_mask( + h_windows, w_windows, self.window_size, shift=self.window_size // 2 + ) + sim = sim.masked_fill_(attn_mask, float("-inf")) + + probs = nn.functional.softmax(sim, dim=-1) + output = torch.einsum("hbwij,hbwjc->hbwic", probs, v) + output = rearrange(output, "h b w p c -> b w p (h c)") + output = self.linear(output) + output = rearrange( + output, + "b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c", + w1=h_windows, + p1=self.window_size, + ) + + if self.type != "W": + output = torch.roll( + output, + shifts=(self.window_size // 2, self.window_size // 2), + dims=(1, 2), + ) + + return output + + def relative_embedding(self): + cord = torch.tensor( + np.array( + [ + [i, j] + for i in range(self.window_size) + for j in range(self.window_size) + ] + ) + ) + relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1 + # negative is allowed + return self.relative_position_params[ + :, relation[:, :, 0].long(), relation[:, :, 1].long() + ] + + +class Block(nn.Module): + def __init__( + self, + input_dim, + output_dim, + head_dim, + window_size, + drop_path, + type="W", + input_resolution=None, + ): + """SwinTransformer Block""" + super(Block, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + assert type in ["W", "SW"] + self.type = type + if input_resolution <= window_size: + self.type = "W" + + self.ln1 = nn.LayerNorm(input_dim) + self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.ln2 = nn.LayerNorm(input_dim) + self.mlp = nn.Sequential( + nn.Linear(input_dim, 4 * input_dim), + nn.GELU(), + nn.Linear(4 * input_dim, output_dim), + ) + + def forward(self, x): + x = x + self.drop_path(self.msa(self.ln1(x))) + x = x + self.drop_path(self.mlp(self.ln2(x))) + return x + + +class ConvTransBlock(nn.Module): + def __init__( + self, + conv_dim, + trans_dim, + head_dim, + window_size, + drop_path, + type="W", + input_resolution=None, + ): + """SwinTransformer and Conv Block""" + super(ConvTransBlock, self).__init__() + self.conv_dim = conv_dim + self.trans_dim = trans_dim + self.head_dim = head_dim + self.window_size = window_size + self.drop_path = drop_path + self.type = type + self.input_resolution = input_resolution + + assert self.type in ["W", "SW"] + if self.input_resolution <= self.window_size: + self.type = "W" + + self.trans_block = Block( + self.trans_dim, + self.trans_dim, + self.head_dim, + self.window_size, + self.drop_path, + self.type, + self.input_resolution, + ) + self.conv1_1 = nn.Conv2d( + self.conv_dim + self.trans_dim, + self.conv_dim + self.trans_dim, + 1, + 1, + 0, + bias=True, + ) + self.conv1_2 = nn.Conv2d( + self.conv_dim + self.trans_dim, + self.conv_dim + self.trans_dim, + 1, + 1, + 0, + bias=True, + ) + + self.conv_block = nn.Sequential( + nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False), + nn.ReLU(True), + nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False), + ) + + def forward(self, x): + conv_x, trans_x = torch.split( + self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1 + ) + conv_x = self.conv_block(conv_x) + conv_x + trans_x = Rearrange("b c h w -> b h w c")(trans_x) + trans_x = self.trans_block(trans_x) + trans_x = Rearrange("b h w c -> b c h w")(trans_x) + res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1)) + x = x + res + + return x + + +class SCUNet(nn.Module): + def __init__( + self, + state_dict, + in_nc=3, + config=[4, 4, 4, 4, 4, 4, 4], + dim=64, + drop_path_rate=0.0, + input_resolution=256, + ): + super(SCUNet, self).__init__() + self.model_arch = "SCUNet" + self.sub_type = "SR" + + self.num_filters: int = 0 + + self.state = state_dict + self.config = config + self.dim = dim + self.head_dim = 32 + self.window_size = 8 + + self.in_nc = in_nc + self.out_nc = self.in_nc + self.scale = 1 + self.supports_fp16 = True + + # drop path rate for each layer + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))] + + self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)] + + begin = 0 + self.m_down1 = [ + ConvTransBlock( + dim // 2, + dim // 2, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution, + ) + for i in range(config[0]) + ] + [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)] + + begin += config[0] + self.m_down2 = [ + ConvTransBlock( + dim, + dim, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution // 2, + ) + for i in range(config[1]) + ] + [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)] + + begin += config[1] + self.m_down3 = [ + ConvTransBlock( + 2 * dim, + 2 * dim, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution // 4, + ) + for i in range(config[2]) + ] + [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)] + + begin += config[2] + self.m_body = [ + ConvTransBlock( + 4 * dim, + 4 * dim, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution // 8, + ) + for i in range(config[3]) + ] + + begin += config[3] + self.m_up3 = [ + nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), + ] + [ + ConvTransBlock( + 2 * dim, + 2 * dim, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution // 4, + ) + for i in range(config[4]) + ] + + begin += config[4] + self.m_up2 = [ + nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), + ] + [ + ConvTransBlock( + dim, + dim, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution // 2, + ) + for i in range(config[5]) + ] + + begin += config[5] + self.m_up1 = [ + nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), + ] + [ + ConvTransBlock( + dim // 2, + dim // 2, + self.head_dim, + self.window_size, + dpr[i + begin], + "W" if not i % 2 else "SW", + input_resolution, + ) + for i in range(config[6]) + ] + + self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)] + + self.m_head = nn.Sequential(*self.m_head) + self.m_down1 = nn.Sequential(*self.m_down1) + self.m_down2 = nn.Sequential(*self.m_down2) + self.m_down3 = nn.Sequential(*self.m_down3) + self.m_body = nn.Sequential(*self.m_body) + self.m_up3 = nn.Sequential(*self.m_up3) + self.m_up2 = nn.Sequential(*self.m_up2) + self.m_up1 = nn.Sequential(*self.m_up1) + self.m_tail = nn.Sequential(*self.m_tail) + # self.apply(self._init_weights) + self.load_state_dict(state_dict, strict=True) + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (64 - h % 64) % 64 + mod_pad_w = (64 - w % 64) % 64 + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect") + return x + + def forward(self, x0): + h, w = x0.size()[-2:] + x0 = self.check_image_size(x0) + + x1 = self.m_head(x0) + x2 = self.m_down1(x1) + x3 = self.m_down2(x2) + x4 = self.m_down3(x3) + x = self.m_body(x4) + x = self.m_up3(x + x4) + x = self.m_up2(x + x3) + x = self.m_up1(x + x2) + x = self.m_tail(x + x1) + + x = x[:, :, :h, :w] + return x + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) diff --git a/ldm_patched/pfn/architecture/SPSR.py b/ldm_patched/pfn/architecture/SPSR.py new file mode 100644 index 0000000000000000000000000000000000000000..c3cefff190292a63cf61fe3fa9c28131dac4f369 --- /dev/null +++ b/ldm_patched/pfn/architecture/SPSR.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import block as B + + +class Get_gradient_nopadding(nn.Module): + def __init__(self): + super(Get_gradient_nopadding, self).__init__() + kernel_v = [[0, -1, 0], [0, 0, 0], [0, 1, 0]] + kernel_h = [[0, 0, 0], [-1, 0, 1], [0, 0, 0]] + kernel_h = torch.FloatTensor(kernel_h).unsqueeze(0).unsqueeze(0) + kernel_v = torch.FloatTensor(kernel_v).unsqueeze(0).unsqueeze(0) + self.weight_h = nn.Parameter(data=kernel_h, requires_grad=False) # type: ignore + + self.weight_v = nn.Parameter(data=kernel_v, requires_grad=False) # type: ignore + + def forward(self, x): + x_list = [] + for i in range(x.shape[1]): + x_i = x[:, i] + x_i_v = F.conv2d(x_i.unsqueeze(1), self.weight_v, padding=1) + x_i_h = F.conv2d(x_i.unsqueeze(1), self.weight_h, padding=1) + x_i = torch.sqrt(torch.pow(x_i_v, 2) + torch.pow(x_i_h, 2) + 1e-6) + x_list.append(x_i) + + x = torch.cat(x_list, dim=1) + + return x + + +class SPSRNet(nn.Module): + def __init__( + self, + state_dict, + norm=None, + act: str = "leakyrelu", + upsampler: str = "upconv", + mode: B.ConvMode = "CNA", + ): + super(SPSRNet, self).__init__() + self.model_arch = "SPSR" + self.sub_type = "SR" + + self.state = state_dict + self.norm = norm + self.act = act + self.upsampler = upsampler + self.mode = mode + + self.num_blocks = self.get_num_blocks() + + self.in_nc: int = self.state["model.0.weight"].shape[1] + self.out_nc: int = self.state["f_HR_conv1.0.bias"].shape[0] + + self.scale = self.get_scale(4) + self.num_filters: int = self.state["model.0.weight"].shape[0] + + self.supports_fp16 = True + self.supports_bfp16 = True + self.min_size_restriction = None + + n_upscale = int(math.log(self.scale, 2)) + if self.scale == 3: + n_upscale = 1 + + fea_conv = B.conv_block( + self.in_nc, self.num_filters, kernel_size=3, norm_type=None, act_type=None + ) + rb_blocks = [ + B.RRDB( + self.num_filters, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + for _ in range(self.num_blocks) + ] + LR_conv = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=norm, + act_type=None, + mode=mode, + ) + + if upsampler == "upconv": + upsample_block = B.upconv_block + elif upsampler == "pixelshuffle": + upsample_block = B.pixelshuffle_block + else: + raise NotImplementedError(f"upsample mode [{upsampler}] is not found") + if self.scale == 3: + a_upsampler = upsample_block( + self.num_filters, self.num_filters, 3, act_type=act + ) + else: + a_upsampler = [ + upsample_block(self.num_filters, self.num_filters, act_type=act) + for _ in range(n_upscale) + ] + self.HR_conv0_new = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=act, + ) + self.HR_conv1_new = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + + self.model = B.sequential( + fea_conv, + B.ShortcutBlockSPSR(B.sequential(*rb_blocks, LR_conv)), + *a_upsampler, + self.HR_conv0_new, + ) + + self.get_g_nopadding = Get_gradient_nopadding() + + self.b_fea_conv = B.conv_block( + self.in_nc, self.num_filters, kernel_size=3, norm_type=None, act_type=None + ) + + self.b_concat_1 = B.conv_block( + 2 * self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + self.b_block_1 = B.RRDB( + self.num_filters * 2, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + + self.b_concat_2 = B.conv_block( + 2 * self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + self.b_block_2 = B.RRDB( + self.num_filters * 2, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + + self.b_concat_3 = B.conv_block( + 2 * self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + self.b_block_3 = B.RRDB( + self.num_filters * 2, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + + self.b_concat_4 = B.conv_block( + 2 * self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + self.b_block_4 = B.RRDB( + self.num_filters * 2, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + + self.b_LR_conv = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=norm, + act_type=None, + mode=mode, + ) + + if upsampler == "upconv": + upsample_block = B.upconv_block + elif upsampler == "pixelshuffle": + upsample_block = B.pixelshuffle_block + else: + raise NotImplementedError(f"upsample mode [{upsampler}] is not found") + if self.scale == 3: + b_upsampler = upsample_block( + self.num_filters, self.num_filters, 3, act_type=act + ) + else: + b_upsampler = [ + upsample_block(self.num_filters, self.num_filters, act_type=act) + for _ in range(n_upscale) + ] + + b_HR_conv0 = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=act, + ) + b_HR_conv1 = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + + self.b_module = B.sequential(*b_upsampler, b_HR_conv0, b_HR_conv1) + + self.conv_w = B.conv_block( + self.num_filters, self.out_nc, kernel_size=1, norm_type=None, act_type=None + ) + + self.f_concat = B.conv_block( + self.num_filters * 2, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=None, + ) + + self.f_block = B.RRDB( + self.num_filters * 2, + kernel_size=3, + gc=32, + stride=1, + bias=True, + pad_type="zero", + norm_type=norm, + act_type=act, + mode="CNA", + ) + + self.f_HR_conv0 = B.conv_block( + self.num_filters, + self.num_filters, + kernel_size=3, + norm_type=None, + act_type=act, + ) + self.f_HR_conv1 = B.conv_block( + self.num_filters, self.out_nc, kernel_size=3, norm_type=None, act_type=None + ) + + self.load_state_dict(self.state, strict=False) + + def get_scale(self, min_part: int = 4) -> int: + n = 0 + for part in list(self.state): + parts = part.split(".") + if len(parts) == 3: + part_num = int(parts[1]) + if part_num > min_part and parts[0] == "model" and parts[2] == "weight": + n += 1 + return 2**n + + def get_num_blocks(self) -> int: + nb = 0 + for part in list(self.state): + parts = part.split(".") + n_parts = len(parts) + if n_parts == 5 and parts[2] == "sub": + nb = int(parts[3]) + return nb + + def forward(self, x): + x_grad = self.get_g_nopadding(x) + x = self.model[0](x) + + x, block_list = self.model[1](x) + + x_ori = x + for i in range(5): + x = block_list[i](x) + x_fea1 = x + + for i in range(5): + x = block_list[i + 5](x) + x_fea2 = x + + for i in range(5): + x = block_list[i + 10](x) + x_fea3 = x + + for i in range(5): + x = block_list[i + 15](x) + x_fea4 = x + + x = block_list[20:](x) + # short cut + x = x_ori + x + x = self.model[2:](x) + x = self.HR_conv1_new(x) + + x_b_fea = self.b_fea_conv(x_grad) + x_cat_1 = torch.cat([x_b_fea, x_fea1], dim=1) + + x_cat_1 = self.b_block_1(x_cat_1) + x_cat_1 = self.b_concat_1(x_cat_1) + + x_cat_2 = torch.cat([x_cat_1, x_fea2], dim=1) + + x_cat_2 = self.b_block_2(x_cat_2) + x_cat_2 = self.b_concat_2(x_cat_2) + + x_cat_3 = torch.cat([x_cat_2, x_fea3], dim=1) + + x_cat_3 = self.b_block_3(x_cat_3) + x_cat_3 = self.b_concat_3(x_cat_3) + + x_cat_4 = torch.cat([x_cat_3, x_fea4], dim=1) + + x_cat_4 = self.b_block_4(x_cat_4) + x_cat_4 = self.b_concat_4(x_cat_4) + + x_cat_4 = self.b_LR_conv(x_cat_4) + + # short cut + x_cat_4 = x_cat_4 + x_b_fea + x_branch = self.b_module(x_cat_4) + + # x_out_branch = self.conv_w(x_branch) + ######## + x_branch_d = x_branch + x_f_cat = torch.cat([x_branch_d, x], dim=1) + x_f_cat = self.f_block(x_f_cat) + x_out = self.f_concat(x_f_cat) + x_out = self.f_HR_conv0(x_out) + x_out = self.f_HR_conv1(x_out) + + ######### + # return x_out_branch, x_out, x_grad + return x_out diff --git a/ldm_patched/pfn/architecture/SRVGG.py b/ldm_patched/pfn/architecture/SRVGG.py new file mode 100644 index 0000000000000000000000000000000000000000..7a8ec37ae5dc4effd0ba688cf4c3a51801e1f2c9 --- /dev/null +++ b/ldm_patched/pfn/architecture/SRVGG.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import math + +import torch.nn as nn +import torch.nn.functional as F + + +class SRVGGNetCompact(nn.Module): + """A compact VGG-style network structure for super-resolution. + It is a compact network structure, which performs upsampling in the last layer and no convolution is + conducted on the HR feature space. + Args: + num_in_ch (int): Channel number of inputs. Default: 3. + num_out_ch (int): Channel number of outputs. Default: 3. + num_feat (int): Channel number of intermediate features. Default: 64. + num_conv (int): Number of convolution layers in the body network. Default: 16. + upscale (int): Upsampling factor. Default: 4. + act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu. + """ + + def __init__( + self, + state_dict, + act_type: str = "prelu", + ): + super(SRVGGNetCompact, self).__init__() + self.model_arch = "SRVGG (RealESRGAN)" + self.sub_type = "SR" + + self.act_type = act_type + + self.state = state_dict + + if "params" in self.state: + self.state = self.state["params"] + + self.key_arr = list(self.state.keys()) + + self.in_nc = self.get_in_nc() + self.num_feat = self.get_num_feats() + self.num_conv = self.get_num_conv() + self.out_nc = self.in_nc # :( + self.pixelshuffle_shape = None # Defined in get_scale() + self.scale = self.get_scale() + + self.supports_fp16 = True + self.supports_bfp16 = True + self.min_size_restriction = None + + self.body = nn.ModuleList() + # the first conv + self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1)) + # the first activation + if act_type == "relu": + activation = nn.ReLU(inplace=True) + elif act_type == "prelu": + activation = nn.PReLU(num_parameters=self.num_feat) + elif act_type == "leakyrelu": + activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) + self.body.append(activation) # type: ignore + + # the body structure + for _ in range(self.num_conv): + self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1)) + # activation + if act_type == "relu": + activation = nn.ReLU(inplace=True) + elif act_type == "prelu": + activation = nn.PReLU(num_parameters=self.num_feat) + elif act_type == "leakyrelu": + activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) + self.body.append(activation) # type: ignore + + # the last conv + self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1)) # type: ignore + # upsample + self.upsampler = nn.PixelShuffle(self.scale) + + self.load_state_dict(self.state, strict=False) + + def get_num_conv(self) -> int: + return (int(self.key_arr[-1].split(".")[1]) - 2) // 2 + + def get_num_feats(self) -> int: + return self.state[self.key_arr[0]].shape[0] + + def get_in_nc(self) -> int: + return self.state[self.key_arr[0]].shape[1] + + def get_scale(self) -> int: + self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0] + # Assume out_nc is the same as in_nc + # I cant think of a better way to do that + self.out_nc = self.in_nc + scale = math.sqrt(self.pixelshuffle_shape / self.out_nc) + if scale - int(scale) > 0: + print( + "out_nc is probably different than in_nc, scale calculation might be wrong" + ) + scale = int(scale) + return scale + + def forward(self, x): + out = x + for i in range(0, len(self.body)): + out = self.body[i](out) + + out = self.upsampler(out) + # add the nearest upsampled image, so that the network learns the residual + base = F.interpolate(x, scale_factor=self.scale, mode="nearest") + out += base + return out diff --git a/ldm_patched/pfn/architecture/SwiftSRGAN.py b/ldm_patched/pfn/architecture/SwiftSRGAN.py new file mode 100644 index 0000000000000000000000000000000000000000..dbb7725b08dc2462661b7ba45db605a06fadacb9 --- /dev/null +++ b/ldm_patched/pfn/architecture/SwiftSRGAN.py @@ -0,0 +1,161 @@ +# From https://github.com/Koushik0901/Swift-SRGAN/blob/master/swift-srgan/models.py + +import torch +from torch import nn + + +class SeperableConv2d(nn.Module): + def __init__( + self, in_channels, out_channels, kernel_size, stride=1, padding=1, bias=True + ): + super(SeperableConv2d, self).__init__() + self.depthwise = nn.Conv2d( + in_channels, + in_channels, + kernel_size=kernel_size, + stride=stride, + groups=in_channels, + bias=bias, + padding=padding, + ) + self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) + + def forward(self, x): + return self.pointwise(self.depthwise(x)) + + +class ConvBlock(nn.Module): + def __init__( + self, + in_channels, + out_channels, + use_act=True, + use_bn=True, + discriminator=False, + **kwargs, + ): + super(ConvBlock, self).__init__() + + self.use_act = use_act + self.cnn = SeperableConv2d(in_channels, out_channels, **kwargs, bias=not use_bn) + self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity() + self.act = ( + nn.LeakyReLU(0.2, inplace=True) + if discriminator + else nn.PReLU(num_parameters=out_channels) + ) + + def forward(self, x): + return self.act(self.bn(self.cnn(x))) if self.use_act else self.bn(self.cnn(x)) + + +class UpsampleBlock(nn.Module): + def __init__(self, in_channels, scale_factor): + super(UpsampleBlock, self).__init__() + + self.conv = SeperableConv2d( + in_channels, + in_channels * scale_factor**2, + kernel_size=3, + stride=1, + padding=1, + ) + self.ps = nn.PixelShuffle( + scale_factor + ) # (in_channels * 4, H, W) -> (in_channels, H*2, W*2) + self.act = nn.PReLU(num_parameters=in_channels) + + def forward(self, x): + return self.act(self.ps(self.conv(x))) + + +class ResidualBlock(nn.Module): + def __init__(self, in_channels): + super(ResidualBlock, self).__init__() + + self.block1 = ConvBlock( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + self.block2 = ConvBlock( + in_channels, in_channels, kernel_size=3, stride=1, padding=1, use_act=False + ) + + def forward(self, x): + out = self.block1(x) + out = self.block2(out) + return out + x + + +class Generator(nn.Module): + """Swift-SRGAN Generator + Args: + in_channels (int): number of input image channels. + num_channels (int): number of hidden channels. + num_blocks (int): number of residual blocks. + upscale_factor (int): factor to upscale the image [2x, 4x, 8x]. + Returns: + torch.Tensor: super resolution image + """ + + def __init__( + self, + state_dict, + ): + super(Generator, self).__init__() + self.model_arch = "Swift-SRGAN" + self.sub_type = "SR" + self.state = state_dict + if "model" in self.state: + self.state = self.state["model"] + + self.in_nc: int = self.state["initial.cnn.depthwise.weight"].shape[0] + self.out_nc: int = self.state["final_conv.pointwise.weight"].shape[0] + self.num_filters: int = self.state["initial.cnn.pointwise.weight"].shape[0] + self.num_blocks = len( + set([x.split(".")[1] for x in self.state.keys() if "residual" in x]) + ) + self.scale: int = 2 ** len( + set([x.split(".")[1] for x in self.state.keys() if "upsampler" in x]) + ) + + in_channels = self.in_nc + num_channels = self.num_filters + num_blocks = self.num_blocks + upscale_factor = self.scale + + self.supports_fp16 = True + self.supports_bfp16 = True + self.min_size_restriction = None + + self.initial = ConvBlock( + in_channels, num_channels, kernel_size=9, stride=1, padding=4, use_bn=False + ) + self.residual = nn.Sequential( + *[ResidualBlock(num_channels) for _ in range(num_blocks)] + ) + self.convblock = ConvBlock( + num_channels, + num_channels, + kernel_size=3, + stride=1, + padding=1, + use_act=False, + ) + self.upsampler = nn.Sequential( + *[ + UpsampleBlock(num_channels, scale_factor=2) + for _ in range(upscale_factor // 2) + ] + ) + self.final_conv = SeperableConv2d( + num_channels, in_channels, kernel_size=9, stride=1, padding=4 + ) + + self.load_state_dict(self.state, strict=False) + + def forward(self, x): + initial = self.initial(x) + x = self.residual(initial) + x = self.convblock(x) + initial + x = self.upsampler(x) + return (torch.tanh(self.final_conv(x)) + 1) / 2 diff --git a/ldm_patched/pfn/architecture/Swin2SR.py b/ldm_patched/pfn/architecture/Swin2SR.py new file mode 100644 index 0000000000000000000000000000000000000000..cb57ecfc4ada45a6b087247017732437b1af0fcc --- /dev/null +++ b/ldm_patched/pfn/architecture/Swin2SR.py @@ -0,0 +1,1377 @@ +# pylint: skip-file +# ----------------------------------------------------------------------------------- +# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/2209.11345 +# Written by Conde and Choi et al. +# From: https://raw.githubusercontent.com/mv-lab/swin2sr/main/models/network_swin2sr.py +# ----------------------------------------------------------------------------------- + +import math +import re + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint + +# Originally from the timm package +from .timm.drop import DropPath +from .timm.helpers import to_2tuple +from .timm.weight_init import trunc_normal_ + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = ( + x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + ) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view( + B, H // window_size, W // window_size, window_size, window_size, -1 + ) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + r"""Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + pretrained_window_size (tuple[int]): The height and width of the window in pre-training. + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + attn_drop=0.0, + proj_drop=0.0, + pretrained_window_size=[0, 0], + ): + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.pretrained_window_size = pretrained_window_size + self.num_heads = num_heads + + self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True) # type: ignore + + # mlp to generate continuous relative position bias + self.cpb_mlp = nn.Sequential( + nn.Linear(2, 512, bias=True), + nn.ReLU(inplace=True), + nn.Linear(512, num_heads, bias=False), + ) + + # get relative_coords_table + relative_coords_h = torch.arange( + -(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32 + ) + relative_coords_w = torch.arange( + -(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32 + ) + relative_coords_table = ( + torch.stack(torch.meshgrid([relative_coords_h, relative_coords_w])) + .permute(1, 2, 0) + .contiguous() + .unsqueeze(0) + ) # 1, 2*Wh-1, 2*Ww-1, 2 + if pretrained_window_size[0] > 0: + relative_coords_table[:, :, :, 0] /= pretrained_window_size[0] - 1 + relative_coords_table[:, :, :, 1] /= pretrained_window_size[1] - 1 + else: + relative_coords_table[:, :, :, 0] /= self.window_size[0] - 1 + relative_coords_table[:, :, :, 1] /= self.window_size[1] - 1 + relative_coords_table *= 8 # normalize to -8, 8 + relative_coords_table = ( + torch.sign(relative_coords_table) + * torch.log2(torch.abs(relative_coords_table) + 1.0) + / np.log2(8) + ) + + self.register_buffer("relative_coords_table", relative_coords_table) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(dim)) # type: ignore + self.v_bias = nn.Parameter(torch.zeros(dim)) # type: ignore + else: + self.q_bias = None + self.v_bias = None + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias)) # type: ignore + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + # cosine attention + attn = F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1) + logit_scale = torch.clamp( + self.logit_scale, + max=torch.log(torch.tensor(1.0 / 0.01)).to(self.logit_scale.device), + ).exp() + attn = attn * logit_scale + + relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view( + -1, self.num_heads + ) + relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view( # type: ignore + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + relative_position_bias = 16 * torch.sigmoid(relative_position_bias) + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze( + 1 + ).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return ( + f"dim={self.dim}, window_size={self.window_size}, " + f"pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}" + ) + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Module): + r"""Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + pretrained_window_size (int): Window size in pre-training. + """ + + def __init__( + self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4.0, + qkv_bias=True, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + pretrained_window_size=0, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert ( + 0 <= self.shift_size < self.window_size + ), "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + attn_drop=attn_drop, + proj_drop=drop, + pretrained_window_size=to_2tuple(pretrained_window_size), + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( + attn_mask == 0, float(0.0) + ) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll( + x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2) + ) + else: + shifted_x = x + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nW*B, window_size, window_size, C + x_windows = x_windows.view( + -1, self.window_size * self.window_size, C + ) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn( + x_windows, mask=self.attn_mask + ) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn( + x_windows, mask=self.calculate_mask(x_size).to(x.device) + ) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2) + ) + else: + x = shifted_x + x = x.view(B, H * W, C) + x = shortcut + self.drop_path(self.norm1(x)) + + # FFN + x = x + self.drop_path(self.norm2(self.mlp(x))) + + return x + + def extra_repr(self) -> str: + return ( + f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + ) + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Module): + r"""Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(2 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.view(B, H, W, C) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.reduction(x) + x = self.norm(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + flops += H * W * self.dim // 2 + return flops + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + pretrained_window_size (int): Local window size in pre-training. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4.0, + qkv_bias=True, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + pretrained_window_size=0, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) + else drop_path, + norm_layer=norm_layer, + pretrained_window_size=pretrained_window_size, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer + ) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() # type: ignore + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + def _init_respostnorm(self): + for blk in self.blocks: + nn.init.constant_(blk.norm1.bias, 0) # type: ignore + nn.init.constant_(blk.norm1.weight, 0) # type: ignore + nn.init.constant_(blk.norm2.bias, 0) # type: ignore + nn.init.constant_(blk.norm2.weight, 0) # type: ignore + + +class PatchEmbed(nn.Module): + r"""Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] # type: ignore + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2d( + in_chans, embed_dim, kernel_size=patch_size, stride=patch_size # type: ignore + ) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + B, C, H, W = x.shape + # FIXME look at relaxing size constraints + # assert H == self.img_size[0] and W == self.img_size[1], + # f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + x = self.proj(x).flatten(2).transpose(1, 2) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + Ho, Wo = self.patches_resolution + flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) # type: ignore + if self.norm is not None: + flops += Ho * Wo * self.embed_dim + return flops + + +class RSTB(nn.Module): + """Residual Swin Transformer Block (RSTB). + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4.0, + qkv_bias=True, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection="1conv", + ): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer( + dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint, + ) + + if resi_connection == "1conv": + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == "3conv": + # to save parameters and memory + self.conv = nn.Sequential( + nn.Conv2d(dim, dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1), + ) + + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=dim, + embed_dim=dim, + norm_layer=None, + ) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=dim, + embed_dim=dim, + norm_layer=None, + ) + + def forward(self, x, x_size): + return ( + self.patch_embed( + self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size)) + ) + + x + ) + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchUnEmbed(nn.Module): + r"""Image to Patch Unembedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] # type: ignore + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError( + f"scale {scale} is not supported. " "Supported scales: 2^n and 3." + ) + super(Upsample, self).__init__(*m) + + +class Upsample_hf(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError( + f"scale {scale} is not supported. " "Supported scales: 2^n and 3." + ) + super(Upsample_hf, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution # type: ignore + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class Swin2SR(nn.Module): + r"""Swin2SR + A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`. + + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__( + self, + state_dict, + **kwargs, + ): + super(Swin2SR, self).__init__() + + # Defaults + img_size = 128 + patch_size = 1 + in_chans = 3 + embed_dim = 96 + depths = [6, 6, 6, 6] + num_heads = [6, 6, 6, 6] + window_size = 7 + mlp_ratio = 4.0 + qkv_bias = True + drop_rate = 0.0 + attn_drop_rate = 0.0 + drop_path_rate = 0.1 + norm_layer = nn.LayerNorm + ape = False + patch_norm = True + use_checkpoint = False + upscale = 2 + img_range = 1.0 + upsampler = "" + resi_connection = "1conv" + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + + self.model_arch = "Swin2SR" + self.sub_type = "SR" + self.state = state_dict + if "params_ema" in self.state: + self.state = self.state["params_ema"] + elif "params" in self.state: + self.state = self.state["params"] + + state_keys = self.state.keys() + + if "conv_before_upsample.0.weight" in state_keys: + if "conv_aux.weight" in state_keys: + upsampler = "pixelshuffle_aux" + elif "conv_up1.weight" in state_keys: + upsampler = "nearest+conv" + else: + upsampler = "pixelshuffle" + supports_fp16 = False + elif "upsample.0.weight" in state_keys: + upsampler = "pixelshuffledirect" + else: + upsampler = "" + + num_feat = ( + self.state.get("conv_before_upsample.0.weight", None).shape[1] + if self.state.get("conv_before_upsample.weight", None) + else 64 + ) + + num_in_ch = self.state["conv_first.weight"].shape[1] + in_chans = num_in_ch + if "conv_last.weight" in state_keys: + num_out_ch = self.state["conv_last.weight"].shape[0] + else: + num_out_ch = num_in_ch + + upscale = 1 + if upsampler == "nearest+conv": + upsample_keys = [ + x for x in state_keys if "conv_up" in x and "bias" not in x + ] + + for upsample_key in upsample_keys: + upscale *= 2 + elif upsampler == "pixelshuffle" or upsampler == "pixelshuffle_aux": + upsample_keys = [ + x + for x in state_keys + if "upsample" in x and "conv" not in x and "bias" not in x + ] + for upsample_key in upsample_keys: + shape = self.state[upsample_key].shape[0] + upscale *= math.sqrt(shape // num_feat) + upscale = int(upscale) + elif upsampler == "pixelshuffledirect": + upscale = int( + math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch) + ) + + max_layer_num = 0 + max_block_num = 0 + for key in state_keys: + result = re.match( + r"layers.(\d*).residual_group.blocks.(\d*).norm1.weight", key + ) + if result: + layer_num, block_num = result.groups() + max_layer_num = max(max_layer_num, int(layer_num)) + max_block_num = max(max_block_num, int(block_num)) + + depths = [max_block_num + 1 for _ in range(max_layer_num + 1)] + + if ( + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + in state_keys + ): + num_heads_num = self.state[ + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + ].shape[-1] + num_heads = [num_heads_num for _ in range(max_layer_num + 1)] + else: + num_heads = depths + + embed_dim = self.state["conv_first.weight"].shape[0] + + mlp_ratio = float( + self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0] + / embed_dim + ) + + # TODO: could actually count the layers, but this should do + if "layers.0.conv.4.weight" in state_keys: + resi_connection = "3conv" + else: + resi_connection = "1conv" + + window_size = int( + math.sqrt( + self.state[ + "layers.0.residual_group.blocks.0.attn.relative_position_index" + ].shape[0] + ) + ) + + if "layers.0.residual_group.blocks.1.attn_mask" in state_keys: + img_size = int( + math.sqrt( + self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0] + ) + * window_size + ) + + # The JPEG models are the only ones with window-size 7, and they also use this range + img_range = 255.0 if window_size == 7 else 1.0 + + self.in_nc = num_in_ch + self.out_nc = num_out_ch + self.num_feat = num_feat + self.embed_dim = embed_dim + self.num_heads = num_heads + self.depths = depths + self.window_size = window_size + self.mlp_ratio = mlp_ratio + self.scale = upscale + self.upsampler = upsampler + self.img_size = img_size + self.img_range = img_range + self.resi_connection = resi_connection + + self.supports_fp16 = False # Too much weirdness to support this at the moment + self.supports_bfp16 = True + self.min_size_restriction = 16 + + ## END AUTO DETECTION + + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) # type: ignore + trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])], # type: ignore # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection, + ) + self.layers.append(layer) + + if self.upsampler == "pixelshuffle_hf": + self.layers_hf = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])], # type: ignore # no impact on SR results # type: ignore + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection, + ) + self.layers_hf.append(layer) + + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == "1conv": + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == "3conv": + # to save parameters and memory + self.conv_after_body = nn.Sequential( + nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1), + ) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == "pixelshuffle": + # for classical SR + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == "pixelshuffle_aux": + self.conv_bicubic = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1) + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.conv_after_aux = nn.Sequential( + nn.Conv2d(3, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + + elif self.upsampler == "pixelshuffle_hf": + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.upsample_hf = Upsample_hf(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.conv_first_hf = nn.Sequential( + nn.Conv2d(num_feat, embed_dim, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.conv_after_body_hf = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + self.conv_before_upsample_hf = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep( + upscale, + embed_dim, + num_out_ch, + (patches_resolution[0], patches_resolution[1]), + ) + elif self.upsampler == "nearest+conv": + # for real-world SR (less artifacts) + assert self.upscale == 4, "only support x4 now." + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + self.load_state_dict(state_dict) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore # type: ignore + def no_weight_decay(self): + return {"absolute_pos_embed"} + + @torch.jit.ignore # type: ignore + def no_weight_decay_keywords(self): + return {"relative_position_bias_table"} + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect") + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward_features_hf(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers_hf: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.upsampler == "pixelshuffle": + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == "pixelshuffle_aux": + bicubic = F.interpolate( + x, + size=(H * self.upscale, W * self.upscale), + mode="bicubic", + align_corners=False, + ) + bicubic = self.conv_bicubic(bicubic) + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + aux = self.conv_aux(x) # b, 3, LR_H, LR_W + x = self.conv_after_aux(aux) + x = ( + self.upsample(x)[:, :, : H * self.upscale, : W * self.upscale] + + bicubic[:, :, : H * self.upscale, : W * self.upscale] + ) + x = self.conv_last(x) + aux = aux / self.img_range + self.mean + elif self.upsampler == "pixelshuffle_hf": + # for classical SR with HF + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x_before = self.conv_before_upsample(x) + x_out = self.conv_last(self.upsample(x_before)) + + x_hf = self.conv_first_hf(x_before) + x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf + x_hf = self.conv_before_upsample_hf(x_hf) + x_hf = self.conv_last_hf(self.upsample_hf(x_hf)) + x = x_out + x_hf + x_hf = x_hf / self.img_range + self.mean + + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == "nearest+conv": + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu( + self.conv_up1( + torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest") + ) + ) + x = self.lrelu( + self.conv_up2( + torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest") + ) + ) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + if self.upsampler == "pixelshuffle_aux": + # NOTE: I removed an "aux" output here. not sure what that was for + return x[:, :, : H * self.upscale, : W * self.upscale] # type: ignore + + elif self.upsampler == "pixelshuffle_hf": + x_out = x_out / self.img_range + self.mean # type: ignore + return x_out[:, :, : H * self.upscale, : W * self.upscale], x[:, :, : H * self.upscale, : W * self.upscale], x_hf[:, :, : H * self.upscale, : W * self.upscale] # type: ignore + + else: + return x[:, :, : H * self.upscale, : W * self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() # type: ignore + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() # type: ignore + return flops diff --git a/ldm_patched/pfn/architecture/SwinIR.py b/ldm_patched/pfn/architecture/SwinIR.py new file mode 100644 index 0000000000000000000000000000000000000000..439dcbcb2b12f7ff27a01490f4c2ae7b6e4eab9e --- /dev/null +++ b/ldm_patched/pfn/architecture/SwinIR.py @@ -0,0 +1,1224 @@ +# pylint: skip-file +# ----------------------------------------------------------------------------------- +# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257 +# Originally Written by Ze Liu, Modified by Jingyun Liang. +# ----------------------------------------------------------------------------------- + +import math +import re + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint + +# Originally from the timm package +from .timm.drop import DropPath +from .timm.helpers import to_2tuple +from .timm.weight_init import trunc_normal_ + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = ( + x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + ) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view( + B, H // window_size, W // window_size, window_size, window_size, -1 + ) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + r"""Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( # type: ignore + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = ( + self.qkv(x) + .reshape(B_, N, 3, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) # type: ignore + ].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze( + 1 + ).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}" + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Module): + r"""Swin Transformer Block. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__( + self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert ( + 0 <= self.shift_size < self.window_size + ), "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( + attn_mask == 0, float(0.0) + ) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll( + x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2) + ) + else: + shifted_x = x + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nW*B, window_size, window_size, C + x_windows = x_windows.view( + -1, self.window_size * self.window_size, C + ) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn( + x_windows, mask=self.attn_mask + ) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn( + x_windows, mask=self.calculate_mask(x_size).to(x.device) + ) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2) + ) + else: + x = shifted_x + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return ( + f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + ) + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Module): + r"""Patch Merging Layer. + + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.view(B, H, W, C) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) + else drop_path, + norm_layer=norm_layer, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer + ) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() # type: ignore + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class RSTB(nn.Module): + """Residual Swin Transformer Block (RSTB). + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection="1conv", + ): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer( + dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint, + ) + + if resi_connection == "1conv": + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == "3conv": + # to save parameters and memory + self.conv = nn.Sequential( + nn.Conv2d(dim, dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1), + ) + + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None, + ) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None, + ) + + def forward(self, x, x_size): + return ( + self.patch_embed( + self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size)) + ) + + x + ) + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchEmbed(nn.Module): + r"""Image to Patch Embedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], # type: ignore + img_size[1] // patch_size[1], # type: ignore + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose(1, 2) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + flops = 0 + H, W = self.img_size + if self.norm is not None: + flops += H * W * self.embed_dim # type: ignore + return flops + + +class PatchUnEmbed(nn.Module): + r"""Image to Patch Unembedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__( + self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], # type: ignore + img_size[1] // patch_size[1], # type: ignore + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError( + f"scale {scale} is not supported. " "Supported scales: 2^n and 3." + ) + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution # type: ignore + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class SwinIR(nn.Module): + r"""SwinIR + A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer. + + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__( + self, + state_dict, + **kwargs, + ): + super(SwinIR, self).__init__() + + # Defaults + img_size = 64 + patch_size = 1 + in_chans = 3 + embed_dim = 96 + depths = [6, 6, 6, 6] + num_heads = [6, 6, 6, 6] + window_size = 7 + mlp_ratio = 4.0 + qkv_bias = True + qk_scale = None + drop_rate = 0.0 + attn_drop_rate = 0.0 + drop_path_rate = 0.1 + norm_layer = nn.LayerNorm + ape = False + patch_norm = True + use_checkpoint = False + upscale = 2 + img_range = 1.0 + upsampler = "" + resi_connection = "1conv" + num_feat = 64 + num_in_ch = in_chans + num_out_ch = in_chans + supports_fp16 = True + self.start_unshuffle = 1 + + self.model_arch = "SwinIR" + self.sub_type = "SR" + self.state = state_dict + if "params_ema" in self.state: + self.state = self.state["params_ema"] + elif "params" in self.state: + self.state = self.state["params"] + + state_keys = self.state.keys() + + if "conv_before_upsample.0.weight" in state_keys: + if "conv_up1.weight" in state_keys: + upsampler = "nearest+conv" + else: + upsampler = "pixelshuffle" + supports_fp16 = False + elif "upsample.0.weight" in state_keys: + upsampler = "pixelshuffledirect" + else: + upsampler = "" + + num_feat = ( + self.state.get("conv_before_upsample.0.weight", None).shape[1] + if self.state.get("conv_before_upsample.weight", None) + else 64 + ) + + if "conv_first.1.weight" in self.state: + self.state["conv_first.weight"] = self.state.pop("conv_first.1.weight") + self.state["conv_first.bias"] = self.state.pop("conv_first.1.bias") + self.start_unshuffle = round(math.sqrt(self.state["conv_first.weight"].shape[1] // 3)) + + num_in_ch = self.state["conv_first.weight"].shape[1] + in_chans = num_in_ch + if "conv_last.weight" in state_keys: + num_out_ch = self.state["conv_last.weight"].shape[0] + else: + num_out_ch = num_in_ch + + upscale = 1 + if upsampler == "nearest+conv": + upsample_keys = [ + x for x in state_keys if "conv_up" in x and "bias" not in x + ] + + for upsample_key in upsample_keys: + upscale *= 2 + elif upsampler == "pixelshuffle": + upsample_keys = [ + x + for x in state_keys + if "upsample" in x and "conv" not in x and "bias" not in x + ] + for upsample_key in upsample_keys: + shape = self.state[upsample_key].shape[0] + upscale *= math.sqrt(shape // num_feat) + upscale = int(upscale) + elif upsampler == "pixelshuffledirect": + upscale = int( + math.sqrt(self.state["upsample.0.bias"].shape[0] // num_out_ch) + ) + + max_layer_num = 0 + max_block_num = 0 + for key in state_keys: + result = re.match( + r"layers.(\d*).residual_group.blocks.(\d*).norm1.weight", key + ) + if result: + layer_num, block_num = result.groups() + max_layer_num = max(max_layer_num, int(layer_num)) + max_block_num = max(max_block_num, int(block_num)) + + depths = [max_block_num + 1 for _ in range(max_layer_num + 1)] + + if ( + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + in state_keys + ): + num_heads_num = self.state[ + "layers.0.residual_group.blocks.0.attn.relative_position_bias_table" + ].shape[-1] + num_heads = [num_heads_num for _ in range(max_layer_num + 1)] + else: + num_heads = depths + + embed_dim = self.state["conv_first.weight"].shape[0] + + mlp_ratio = float( + self.state["layers.0.residual_group.blocks.0.mlp.fc1.bias"].shape[0] + / embed_dim + ) + + # TODO: could actually count the layers, but this should do + if "layers.0.conv.4.weight" in state_keys: + resi_connection = "3conv" + else: + resi_connection = "1conv" + + window_size = int( + math.sqrt( + self.state[ + "layers.0.residual_group.blocks.0.attn.relative_position_index" + ].shape[0] + ) + ) + + if "layers.0.residual_group.blocks.1.attn_mask" in state_keys: + img_size = int( + math.sqrt( + self.state["layers.0.residual_group.blocks.1.attn_mask"].shape[0] + ) + * window_size + ) + + # The JPEG models are the only ones with window-size 7, and they also use this range + img_range = 255.0 if window_size == 7 else 1.0 + + self.in_nc = num_in_ch + self.out_nc = num_out_ch + self.num_feat = num_feat + self.embed_dim = embed_dim + self.num_heads = num_heads + self.depths = depths + self.window_size = window_size + self.mlp_ratio = mlp_ratio + self.scale = upscale / self.start_unshuffle + self.upsampler = upsampler + self.img_size = img_size + self.img_range = img_range + self.resi_connection = resi_connection + + self.supports_fp16 = False # Too much weirdness to support this at the moment + self.supports_bfp16 = True + self.min_size_restriction = 16 + + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None, + ) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter( # type: ignore + torch.zeros(1, num_patches, embed_dim) + ) + trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[ + sum(depths[:i_layer]) : sum(depths[: i_layer + 1]) # type: ignore + ], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection, + ) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == "1conv": + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == "3conv": + # to save parameters and memory + self.conv_after_body = nn.Sequential( + nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1), + ) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == "pixelshuffle": + # for classical SR + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep( + upscale, + embed_dim, + num_out_ch, + (patches_resolution[0], patches_resolution[1]), + ) + elif self.upsampler == "nearest+conv": + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True) + ) + self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + elif self.upscale == 8: + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_up3 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + self.load_state_dict(self.state, strict=False) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore # type: ignore + def no_weight_decay(self): + return {"absolute_pos_embed"} + + @torch.jit.ignore # type: ignore + def no_weight_decay_keywords(self): + return {"relative_position_bias_table"} + + def check_image_size(self, x): + _, _, h, w = x.size() + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect") + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.start_unshuffle > 1: + x = torch.nn.functional.pixel_unshuffle(x, self.start_unshuffle) + + if self.upsampler == "pixelshuffle": + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == "pixelshuffledirect": + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == "nearest+conv": + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu( + self.conv_up1( + torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest") # type: ignore + ) + ) + if self.upscale == 4: + x = self.lrelu( + self.conv_up2( + torch.nn.functional.interpolate( # type: ignore + x, scale_factor=2, mode="nearest" + ) + ) + ) + elif self.upscale == 8: + x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.lrelu(self.conv_up3(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, : H * self.upscale, : W * self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() # type: ignore + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() # type: ignore + return flops diff --git a/ldm_patched/pfn/architecture/__init__.py b/ldm_patched/pfn/architecture/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ldm_patched/pfn/architecture/block.py b/ldm_patched/pfn/architecture/block.py new file mode 100644 index 0000000000000000000000000000000000000000..d7bc5d227008a73c40f9087da1ee3ae2ca25a896 --- /dev/null +++ b/ldm_patched/pfn/architecture/block.py @@ -0,0 +1,546 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from __future__ import annotations + +from collections import OrderedDict +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal + +import torch +import torch.nn as nn + +#################### +# Basic blocks +#################### + + +def act(act_type: str, inplace=True, neg_slope=0.2, n_prelu=1): + # helper selecting activation + # neg_slope: for leakyrelu and init of prelu + # n_prelu: for p_relu num_parameters + act_type = act_type.lower() + if act_type == "relu": + layer = nn.ReLU(inplace) + elif act_type == "leakyrelu": + layer = nn.LeakyReLU(neg_slope, inplace) + elif act_type == "prelu": + layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope) + else: + raise NotImplementedError( + "activation layer [{:s}] is not found".format(act_type) + ) + return layer + + +def norm(norm_type: str, nc: int): + # helper selecting normalization layer + norm_type = norm_type.lower() + if norm_type == "batch": + layer = nn.BatchNorm2d(nc, affine=True) + elif norm_type == "instance": + layer = nn.InstanceNorm2d(nc, affine=False) + else: + raise NotImplementedError( + "normalization layer [{:s}] is not found".format(norm_type) + ) + return layer + + +def pad(pad_type: str, padding): + # helper selecting padding layer + # if padding is 'zero', do by conv layers + pad_type = pad_type.lower() + if padding == 0: + return None + if pad_type == "reflect": + layer = nn.ReflectionPad2d(padding) + elif pad_type == "replicate": + layer = nn.ReplicationPad2d(padding) + else: + raise NotImplementedError( + "padding layer [{:s}] is not implemented".format(pad_type) + ) + return layer + + +def get_valid_padding(kernel_size, dilation): + kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1) + padding = (kernel_size - 1) // 2 + return padding + + +class ConcatBlock(nn.Module): + # Concat the output of a submodule to its input + def __init__(self, submodule): + super(ConcatBlock, self).__init__() + self.sub = submodule + + def forward(self, x): + output = torch.cat((x, self.sub(x)), dim=1) + return output + + def __repr__(self): + tmpstr = "Identity .. \n|" + modstr = self.sub.__repr__().replace("\n", "\n|") + tmpstr = tmpstr + modstr + return tmpstr + + +class ShortcutBlock(nn.Module): + # Elementwise sum the output of a submodule to its input + def __init__(self, submodule): + super(ShortcutBlock, self).__init__() + self.sub = submodule + + def forward(self, x): + output = x + self.sub(x) + return output + + def __repr__(self): + tmpstr = "Identity + \n|" + modstr = self.sub.__repr__().replace("\n", "\n|") + tmpstr = tmpstr + modstr + return tmpstr + + +class ShortcutBlockSPSR(nn.Module): + # Elementwise sum the output of a submodule to its input + def __init__(self, submodule): + super(ShortcutBlockSPSR, self).__init__() + self.sub = submodule + + def forward(self, x): + return x, self.sub + + def __repr__(self): + tmpstr = "Identity + \n|" + modstr = self.sub.__repr__().replace("\n", "\n|") + tmpstr = tmpstr + modstr + return tmpstr + + +def sequential(*args): + # Flatten Sequential. It unwraps nn.Sequential. + if len(args) == 1: + if isinstance(args[0], OrderedDict): + raise NotImplementedError("sequential does not support OrderedDict input.") + return args[0] # No sequential is needed. + modules = [] + for module in args: + if isinstance(module, nn.Sequential): + for submodule in module.children(): + modules.append(submodule) + elif isinstance(module, nn.Module): + modules.append(module) + return nn.Sequential(*modules) + + +ConvMode = Literal["CNA", "NAC", "CNAC"] + + +# 2x2x2 Conv Block +def conv_block_2c2( + in_nc, + out_nc, + act_type="relu", +): + return sequential( + nn.Conv2d(in_nc, out_nc, kernel_size=2, padding=1), + nn.Conv2d(out_nc, out_nc, kernel_size=2, padding=0), + act(act_type) if act_type else None, + ) + + +def conv_block( + in_nc: int, + out_nc: int, + kernel_size, + stride=1, + dilation=1, + groups=1, + bias=True, + pad_type="zero", + norm_type: str | None = None, + act_type: str | None = "relu", + mode: ConvMode = "CNA", + c2x2=False, +): + """ + Conv layer with padding, normalization, activation + mode: CNA --> Conv -> Norm -> Act + NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16) + """ + + if c2x2: + return conv_block_2c2(in_nc, out_nc, act_type=act_type) + + assert mode in ("CNA", "NAC", "CNAC"), "Wrong conv mode [{:s}]".format(mode) + padding = get_valid_padding(kernel_size, dilation) + p = pad(pad_type, padding) if pad_type and pad_type != "zero" else None + padding = padding if pad_type == "zero" else 0 + + c = nn.Conv2d( + in_nc, + out_nc, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias, + groups=groups, + ) + a = act(act_type) if act_type else None + if mode in ("CNA", "CNAC"): + n = norm(norm_type, out_nc) if norm_type else None + return sequential(p, c, n, a) + elif mode == "NAC": + if norm_type is None and act_type is not None: + a = act(act_type, inplace=False) + # Important! + # input----ReLU(inplace)----Conv--+----output + # |________________________| + # inplace ReLU will modify the input, therefore wrong output + n = norm(norm_type, in_nc) if norm_type else None + return sequential(n, a, p, c) + else: + assert False, f"Invalid conv mode {mode}" + + +#################### +# Useful blocks +#################### + + +class ResNetBlock(nn.Module): + """ + ResNet Block, 3-3 style + with extra residual scaling used in EDSR + (Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17) + """ + + def __init__( + self, + in_nc, + mid_nc, + out_nc, + kernel_size=3, + stride=1, + dilation=1, + groups=1, + bias=True, + pad_type="zero", + norm_type=None, + act_type="relu", + mode: ConvMode = "CNA", + res_scale=1, + ): + super(ResNetBlock, self).__init__() + conv0 = conv_block( + in_nc, + mid_nc, + kernel_size, + stride, + dilation, + groups, + bias, + pad_type, + norm_type, + act_type, + mode, + ) + if mode == "CNA": + act_type = None + if mode == "CNAC": # Residual path: |-CNAC-| + act_type = None + norm_type = None + conv1 = conv_block( + mid_nc, + out_nc, + kernel_size, + stride, + dilation, + groups, + bias, + pad_type, + norm_type, + act_type, + mode, + ) + # if in_nc != out_nc: + # self.project = conv_block(in_nc, out_nc, 1, stride, dilation, 1, bias, pad_type, \ + # None, None) + # print('Need a projecter in ResNetBlock.') + # else: + # self.project = lambda x:x + self.res = sequential(conv0, conv1) + self.res_scale = res_scale + + def forward(self, x): + res = self.res(x).mul(self.res_scale) + return x + res + + +class RRDB(nn.Module): + """ + Residual in Residual Dense Block + (ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks) + """ + + def __init__( + self, + nf, + kernel_size=3, + gc=32, + stride=1, + bias: bool = True, + pad_type="zero", + norm_type=None, + act_type="leakyrelu", + mode: ConvMode = "CNA", + _convtype="Conv2D", + _spectral_norm=False, + plus=False, + c2x2=False, + ): + super(RRDB, self).__init__() + self.RDB1 = ResidualDenseBlock_5C( + nf, + kernel_size, + gc, + stride, + bias, + pad_type, + norm_type, + act_type, + mode, + plus=plus, + c2x2=c2x2, + ) + self.RDB2 = ResidualDenseBlock_5C( + nf, + kernel_size, + gc, + stride, + bias, + pad_type, + norm_type, + act_type, + mode, + plus=plus, + c2x2=c2x2, + ) + self.RDB3 = ResidualDenseBlock_5C( + nf, + kernel_size, + gc, + stride, + bias, + pad_type, + norm_type, + act_type, + mode, + plus=plus, + c2x2=c2x2, + ) + + def forward(self, x): + out = self.RDB1(x) + out = self.RDB2(out) + out = self.RDB3(out) + return out * 0.2 + x + + +class ResidualDenseBlock_5C(nn.Module): + """ + Residual Dense Block + style: 5 convs + The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18) + Modified options that can be used: + - "Partial Convolution based Padding" arXiv:1811.11718 + - "Spectral normalization" arXiv:1802.05957 + - "ICASSP 2020 - ESRGAN+ : Further Improving ESRGAN" N. C. + {Rakotonirina} and A. {Rasoanaivo} + + Args: + nf (int): Channel number of intermediate features (num_feat). + gc (int): Channels for each growth (num_grow_ch: growth channel, + i.e. intermediate channels). + convtype (str): the type of convolution to use. Default: 'Conv2D' + gaussian_noise (bool): enable the ESRGAN+ gaussian noise (no new + trainable parameters) + plus (bool): enable the additional residual paths from ESRGAN+ + (adds trainable parameters) + """ + + def __init__( + self, + nf=64, + kernel_size=3, + gc=32, + stride=1, + bias: bool = True, + pad_type="zero", + norm_type=None, + act_type="leakyrelu", + mode: ConvMode = "CNA", + plus=False, + c2x2=False, + ): + super(ResidualDenseBlock_5C, self).__init__() + + ## + + self.conv1x1 = conv1x1(nf, gc) if plus else None + ## + + + self.conv1 = conv_block( + nf, + gc, + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=act_type, + mode=mode, + c2x2=c2x2, + ) + self.conv2 = conv_block( + nf + gc, + gc, + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=act_type, + mode=mode, + c2x2=c2x2, + ) + self.conv3 = conv_block( + nf + 2 * gc, + gc, + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=act_type, + mode=mode, + c2x2=c2x2, + ) + self.conv4 = conv_block( + nf + 3 * gc, + gc, + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=act_type, + mode=mode, + c2x2=c2x2, + ) + if mode == "CNA": + last_act = None + else: + last_act = act_type + self.conv5 = conv_block( + nf + 4 * gc, + nf, + 3, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=last_act, + mode=mode, + c2x2=c2x2, + ) + + def forward(self, x): + x1 = self.conv1(x) + x2 = self.conv2(torch.cat((x, x1), 1)) + if self.conv1x1: + # pylint: disable=not-callable + x2 = x2 + self.conv1x1(x) # + + x3 = self.conv3(torch.cat((x, x1, x2), 1)) + x4 = self.conv4(torch.cat((x, x1, x2, x3), 1)) + if self.conv1x1: + x4 = x4 + x2 # + + x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) + return x5 * 0.2 + x + + +def conv1x1(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +#################### +# Upsampler +#################### + + +def pixelshuffle_block( + in_nc: int, + out_nc: int, + upscale_factor=2, + kernel_size=3, + stride=1, + bias=True, + pad_type="zero", + norm_type: str | None = None, + act_type="relu", +): + """ + Pixel shuffle layer + (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional + Neural Network, CVPR17) + """ + conv = conv_block( + in_nc, + out_nc * (upscale_factor**2), + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=None, + act_type=None, + ) + pixel_shuffle = nn.PixelShuffle(upscale_factor) + + n = norm(norm_type, out_nc) if norm_type else None + a = act(act_type) if act_type else None + return sequential(conv, pixel_shuffle, n, a) + + +def upconv_block( + in_nc: int, + out_nc: int, + upscale_factor=2, + kernel_size=3, + stride=1, + bias=True, + pad_type="zero", + norm_type: str | None = None, + act_type="relu", + mode="nearest", + c2x2=False, +): + # Up conv + # described in https://distill.pub/2016/deconv-checkerboard/ + upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode) + conv = conv_block( + in_nc, + out_nc, + kernel_size, + stride, + bias=bias, + pad_type=pad_type, + norm_type=norm_type, + act_type=act_type, + c2x2=c2x2, + ) + return sequential(upsample, conv) diff --git a/ldm_patched/pfn/architecture/face/LICENSE-GFPGAN b/ldm_patched/pfn/architecture/face/LICENSE-GFPGAN new file mode 100644 index 0000000000000000000000000000000000000000..5ac273fd509e328f396e6e4444673a3b051a4968 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/LICENSE-GFPGAN @@ -0,0 +1,351 @@ +Tencent is pleased to support the open source community by making GFPGAN available. + +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + +GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below. + + +Terms of the Apache License Version 2.0: +--------------------------------------------- +Apache License + +Version 2.0, January 2004 + +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +1. Definitions. + +“License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +“Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + + + +Other dependencies and licenses: + + +Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. basicsr +Copyright 2018-2020 BasicSR Authors + + +This BasicSR project is released under the Apache 2.0 license. + +A copy of Apache 2.0 is included in this file. + +StyleGAN2 +The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch. +The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license. +DFDNet +The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. + +Terms of the Nvidia License: +--------------------------------------------- + +1. Definitions + +"Licensor" means any person or entity that distributes its Work. + +"Software" means the original work of authorship made available under +this License. + +"Work" means the Software and any additions to or derivative works of +the Software that are made available under this License. + +"Nvidia Processors" means any central processing unit (CPU), graphics +processing unit (GPU), field-programmable gate array (FPGA), +application-specific integrated circuit (ASIC) or any combination +thereof designed, made, sold, or provided by Nvidia or its affiliates. + +The terms "reproduce," "reproduction," "derivative works," and +"distribution" have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work. + +Works, including the Software, are "made available" under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License. + +2. License Grants + + 2.1 Copyright Grant. Subject to the terms and conditions of this + License, each Licensor grants to you a perpetual, worldwide, + non-exclusive, royalty-free, copyright license to reproduce, + prepare derivative works of, publicly display, publicly perform, + sublicense and distribute its Work and any resulting derivative + works in any form. + +3. Limitations + + 3.1 Redistribution. You may reproduce or distribute the Work only + if (a) you do so under this License, (b) you include a complete + copy of this License with your distribution, and (c) you retain + without modification any copyright, patent, trademark, or + attribution notices that are present in the Work. + + 3.2 Derivative Works. You may specify that additional or different + terms apply to the use, reproduction, and distribution of your + derivative works of the Work ("Your Terms") only if (a) Your Terms + provide that the use limitation in Section 3.3 applies to your + derivative works, and (b) you identify the specific derivative + works that are subject to Your Terms. Notwithstanding Your Terms, + this License (including the redistribution requirements in Section + 3.1) will continue to apply to the Work itself. + + 3.3 Use Limitation. The Work and any derivative works thereof only + may be used or intended for use non-commercially. The Work or + derivative works thereof may be used or intended for use by Nvidia + or its affiliates commercially or non-commercially. As used herein, + "non-commercially" means for research or evaluation purposes only. + + 3.4 Patent Claims. If you bring or threaten to bring a patent claim + against any Licensor (including any claim, cross-claim or + counterclaim in a lawsuit) to enforce any patents that you allege + are infringed by any Work, then your rights under this License from + such Licensor (including the grants in Sections 2.1 and 2.2) will + terminate immediately. + + 3.5 Trademarks. This License does not grant any rights to use any + Licensor's or its affiliates' names, logos, or trademarks, except + as necessary to reproduce the notices described in this License. + + 3.6 Termination. If you violate any term of this License, then your + rights under this License (including the grants in Sections 2.1 and + 2.2) will terminate immediately. + +4. Disclaimer of Warranty. + +THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE. + +5. Limitation of Liability. + +EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +MIT License + +Copyright (c) 2019 Kim Seonghyeon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + + +Open Source Software licensed under the BSD 3-Clause license: +--------------------------------------------- +1. torchvision +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +2. torch +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + + +Terms of the BSD 3-Clause License: +--------------------------------------------- +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. numpy +Copyright (c) 2005-2020, NumPy Developers. +All rights reserved. + +A copy of BSD 3-Clause License is included in this file. + +The NumPy repository and source distributions bundle several libraries that are +compatibly licensed. We list these here. + +Name: Numpydoc +Files: doc/sphinxext/numpydoc/* +License: BSD-2-Clause + For details, see doc/sphinxext/LICENSE.txt + +Name: scipy-sphinx-theme +Files: doc/scipy-sphinx-theme/* +License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0 + For details, see doc/scipy-sphinx-theme/LICENSE.txt + +Name: lapack-lite +Files: numpy/linalg/lapack_lite/* +License: BSD-3-Clause + For details, see numpy/linalg/lapack_lite/LICENSE.txt + +Name: tempita +Files: tools/npy_tempita/* +License: MIT + For details, see tools/npy_tempita/license.txt + +Name: dragon4 +Files: numpy/core/src/multiarray/dragon4.c +License: MIT + For license text, see numpy/core/src/multiarray/dragon4.c + + + +Open Source Software licensed under the MIT license: +--------------------------------------------- +1. facexlib +Copyright (c) 2020 Xintao Wang + +2. opencv-python +Copyright (c) Olli-Pekka Heinisuo +Please note that only files in cv2 package are used. + + +Terms of the MIT License: +--------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + +Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. tqdm +Copyright (c) 2013 noamraph + +`tqdm` is a product of collaborative work. +Unless otherwise stated, all authors (see commit logs) retain copyright +for their respective work, and release the work under the MIT licence +(text below). + +Exceptions or notable authors are listed below +in reverse chronological order: + +* files: * + MPLv2.0 2015-2020 (c) Casper da Costa-Luis + [casperdcl](https://github.com/casperdcl). +* files: tqdm/_tqdm.py + MIT 2016 (c) [PR #96] on behalf of Google Inc. +* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore + MIT 2013 (c) Noam Yorav-Raphael, original author. + +[PR #96]: https://github.com/tqdm/tqdm/pull/96 + + +Mozilla Public Licence (MPL) v. 2.0 - Exhibit A +----------------------------------------------- + +This Source Code Form is subject to the terms of the +Mozilla Public License, v. 2.0. +If a copy of the MPL was not distributed with this file, +You can obtain one at https://mozilla.org/MPL/2.0/. + + +MIT License (MIT) +----------------- + +Copyright (c) 2013 noamraph + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/pfn/architecture/face/LICENSE-RestoreFormer b/ldm_patched/pfn/architecture/face/LICENSE-RestoreFormer new file mode 100644 index 0000000000000000000000000000000000000000..5ac273fd509e328f396e6e4444673a3b051a4968 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/LICENSE-RestoreFormer @@ -0,0 +1,351 @@ +Tencent is pleased to support the open source community by making GFPGAN available. + +Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. + +GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below. + + +Terms of the Apache License Version 2.0: +--------------------------------------------- +Apache License + +Version 2.0, January 2004 + +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +1. Definitions. + +“License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +“Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + + + +Other dependencies and licenses: + + +Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. basicsr +Copyright 2018-2020 BasicSR Authors + + +This BasicSR project is released under the Apache 2.0 license. + +A copy of Apache 2.0 is included in this file. + +StyleGAN2 +The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch. +The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license. +DFDNet +The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. + +Terms of the Nvidia License: +--------------------------------------------- + +1. Definitions + +"Licensor" means any person or entity that distributes its Work. + +"Software" means the original work of authorship made available under +this License. + +"Work" means the Software and any additions to or derivative works of +the Software that are made available under this License. + +"Nvidia Processors" means any central processing unit (CPU), graphics +processing unit (GPU), field-programmable gate array (FPGA), +application-specific integrated circuit (ASIC) or any combination +thereof designed, made, sold, or provided by Nvidia or its affiliates. + +The terms "reproduce," "reproduction," "derivative works," and +"distribution" have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work. + +Works, including the Software, are "made available" under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License. + +2. License Grants + + 2.1 Copyright Grant. Subject to the terms and conditions of this + License, each Licensor grants to you a perpetual, worldwide, + non-exclusive, royalty-free, copyright license to reproduce, + prepare derivative works of, publicly display, publicly perform, + sublicense and distribute its Work and any resulting derivative + works in any form. + +3. Limitations + + 3.1 Redistribution. You may reproduce or distribute the Work only + if (a) you do so under this License, (b) you include a complete + copy of this License with your distribution, and (c) you retain + without modification any copyright, patent, trademark, or + attribution notices that are present in the Work. + + 3.2 Derivative Works. You may specify that additional or different + terms apply to the use, reproduction, and distribution of your + derivative works of the Work ("Your Terms") only if (a) Your Terms + provide that the use limitation in Section 3.3 applies to your + derivative works, and (b) you identify the specific derivative + works that are subject to Your Terms. Notwithstanding Your Terms, + this License (including the redistribution requirements in Section + 3.1) will continue to apply to the Work itself. + + 3.3 Use Limitation. The Work and any derivative works thereof only + may be used or intended for use non-commercially. The Work or + derivative works thereof may be used or intended for use by Nvidia + or its affiliates commercially or non-commercially. As used herein, + "non-commercially" means for research or evaluation purposes only. + + 3.4 Patent Claims. If you bring or threaten to bring a patent claim + against any Licensor (including any claim, cross-claim or + counterclaim in a lawsuit) to enforce any patents that you allege + are infringed by any Work, then your rights under this License from + such Licensor (including the grants in Sections 2.1 and 2.2) will + terminate immediately. + + 3.5 Trademarks. This License does not grant any rights to use any + Licensor's or its affiliates' names, logos, or trademarks, except + as necessary to reproduce the notices described in this License. + + 3.6 Termination. If you violate any term of this License, then your + rights under this License (including the grants in Sections 2.1 and + 2.2) will terminate immediately. + +4. Disclaimer of Warranty. + +THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE. + +5. Limitation of Liability. + +EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +MIT License + +Copyright (c) 2019 Kim Seonghyeon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + + +Open Source Software licensed under the BSD 3-Clause license: +--------------------------------------------- +1. torchvision +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +2. torch +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + + +Terms of the BSD 3-Clause License: +--------------------------------------------- +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. numpy +Copyright (c) 2005-2020, NumPy Developers. +All rights reserved. + +A copy of BSD 3-Clause License is included in this file. + +The NumPy repository and source distributions bundle several libraries that are +compatibly licensed. We list these here. + +Name: Numpydoc +Files: doc/sphinxext/numpydoc/* +License: BSD-2-Clause + For details, see doc/sphinxext/LICENSE.txt + +Name: scipy-sphinx-theme +Files: doc/scipy-sphinx-theme/* +License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0 + For details, see doc/scipy-sphinx-theme/LICENSE.txt + +Name: lapack-lite +Files: numpy/linalg/lapack_lite/* +License: BSD-3-Clause + For details, see numpy/linalg/lapack_lite/LICENSE.txt + +Name: tempita +Files: tools/npy_tempita/* +License: MIT + For details, see tools/npy_tempita/license.txt + +Name: dragon4 +Files: numpy/core/src/multiarray/dragon4.c +License: MIT + For license text, see numpy/core/src/multiarray/dragon4.c + + + +Open Source Software licensed under the MIT license: +--------------------------------------------- +1. facexlib +Copyright (c) 2020 Xintao Wang + +2. opencv-python +Copyright (c) Olli-Pekka Heinisuo +Please note that only files in cv2 package are used. + + +Terms of the MIT License: +--------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + +Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein: +--------------------------------------------- +1. tqdm +Copyright (c) 2013 noamraph + +`tqdm` is a product of collaborative work. +Unless otherwise stated, all authors (see commit logs) retain copyright +for their respective work, and release the work under the MIT licence +(text below). + +Exceptions or notable authors are listed below +in reverse chronological order: + +* files: * + MPLv2.0 2015-2020 (c) Casper da Costa-Luis + [casperdcl](https://github.com/casperdcl). +* files: tqdm/_tqdm.py + MIT 2016 (c) [PR #96] on behalf of Google Inc. +* files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore + MIT 2013 (c) Noam Yorav-Raphael, original author. + +[PR #96]: https://github.com/tqdm/tqdm/pull/96 + + +Mozilla Public Licence (MPL) v. 2.0 - Exhibit A +----------------------------------------------- + +This Source Code Form is subject to the terms of the +Mozilla Public License, v. 2.0. +If a copy of the MPL was not distributed with this file, +You can obtain one at https://mozilla.org/MPL/2.0/. + + +MIT License (MIT) +----------------- + +Copyright (c) 2013 noamraph + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/ldm_patched/pfn/architecture/face/LICENSE-codeformer b/ldm_patched/pfn/architecture/face/LICENSE-codeformer new file mode 100644 index 0000000000000000000000000000000000000000..be6c4ed8048a7cb436376bbea84cb0bd726ab721 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/LICENSE-codeformer @@ -0,0 +1,35 @@ +S-Lab License 1.0 + +Copyright 2022 S-Lab + +Redistribution and use for non-commercial purpose in source and +binary forms, with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +In the event that redistribution and/or use for commercial purpose in +source or binary forms, with or without modification is required, +please contact the contributor(s) of the work. diff --git a/ldm_patched/pfn/architecture/face/arcface_arch.py b/ldm_patched/pfn/architecture/face/arcface_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..b548af059a71b38c6c18cd35cbfed7bae7e55441 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/arcface_arch.py @@ -0,0 +1,265 @@ +import torch.nn as nn + + +def conv3x3(inplanes, outplanes, stride=1): + """A simple wrapper for 3x3 convolution with padding. + + Args: + inplanes (int): Channel number of inputs. + outplanes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + """ + return nn.Conv2d( + inplanes, outplanes, kernel_size=3, stride=stride, padding=1, bias=False + ) + + +class BasicBlock(nn.Module): + """Basic residual block used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + """ + + expansion = 1 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class IRBlock(nn.Module): + """Improved residual block (IR Block) used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True. + """ + + expansion = 1 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True): + super(IRBlock, self).__init__() + self.bn0 = nn.BatchNorm2d(inplanes) + self.conv1 = conv3x3(inplanes, inplanes) + self.bn1 = nn.BatchNorm2d(inplanes) + self.prelu = nn.PReLU() + self.conv2 = conv3x3(inplanes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.use_se = use_se + if self.use_se: + self.se = SEBlock(planes) + + def forward(self, x): + residual = x + out = self.bn0(x) + out = self.conv1(out) + out = self.bn1(out) + out = self.prelu(out) + + out = self.conv2(out) + out = self.bn2(out) + if self.use_se: + out = self.se(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.prelu(out) + + return out + + +class Bottleneck(nn.Module): + """Bottleneck block used in the ResNetArcFace architecture. + + Args: + inplanes (int): Channel number of inputs. + planes (int): Channel number of outputs. + stride (int): Stride in convolution. Default: 1. + downsample (nn.Module): The downsample module. Default: None. + """ + + expansion = 4 # output channel expansion ratio + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class SEBlock(nn.Module): + """The squeeze-and-excitation block (SEBlock) used in the IRBlock. + + Args: + channel (int): Channel number of inputs. + reduction (int): Channel reduction ration. Default: 16. + """ + + def __init__(self, channel, reduction=16): + super(SEBlock, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d( + 1 + ) # pool to 1x1 without spatial information + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.PReLU(), + nn.Linear(channel // reduction, channel), + nn.Sigmoid(), + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y + + +class ResNetArcFace(nn.Module): + """ArcFace with ResNet architectures. + + Ref: ArcFace: Additive Angular Margin Loss for Deep Face Recognition. + + Args: + block (str): Block used in the ArcFace architecture. + layers (tuple(int)): Block numbers in each layer. + use_se (bool): Whether use the SEBlock (squeeze and excitation block). Default: True. + """ + + def __init__(self, block, layers, use_se=True): + if block == "IRBlock": + block = IRBlock + self.inplanes = 64 + self.use_se = use_se + super(ResNetArcFace, self).__init__() + + self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.prelu = nn.PReLU() + self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.bn4 = nn.BatchNorm2d(512) + self.dropout = nn.Dropout() + self.fc5 = nn.Linear(512 * 8 * 8, 512) + self.bn5 = nn.BatchNorm1d(512) + + # initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, num_blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(planes * block.expansion), + ) + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, use_se=self.use_se) + ) + self.inplanes = planes + for _ in range(1, num_blocks): + layers.append(block(self.inplanes, planes, use_se=self.use_se)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.prelu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.bn4(x) + x = self.dropout(x) + x = x.view(x.size(0), -1) + x = self.fc5(x) + x = self.bn5(x) + + return x diff --git a/ldm_patched/pfn/architecture/face/codeformer.py b/ldm_patched/pfn/architecture/face/codeformer.py new file mode 100644 index 0000000000000000000000000000000000000000..066140078643d2274259283163cd392bb692b409 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/codeformer.py @@ -0,0 +1,790 @@ +""" +Modified from https://github.com/sczhou/CodeFormer +VQGAN code, adapted from the original created by the Unleashing Transformers authors: +https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py +This verison of the arch specifically was gathered from an old version of GFPGAN. If this is a problem, please contact me. +""" +import math +from typing import Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +import logging as logger +from torch import Tensor + + +class VectorQuantizer(nn.Module): + def __init__(self, codebook_size, emb_dim, beta): + super(VectorQuantizer, self).__init__() + self.codebook_size = codebook_size # number of embeddings + self.emb_dim = emb_dim # dimension of embedding + self.beta = beta # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2 + self.embedding = nn.Embedding(self.codebook_size, self.emb_dim) + self.embedding.weight.data.uniform_( + -1.0 / self.codebook_size, 1.0 / self.codebook_size + ) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.permute(0, 2, 3, 1).contiguous() + z_flattened = z.view(-1, self.emb_dim) + + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + d = ( + (z_flattened**2).sum(dim=1, keepdim=True) + + (self.embedding.weight**2).sum(1) + - 2 * torch.matmul(z_flattened, self.embedding.weight.t()) + ) + + mean_distance = torch.mean(d) + # find closest encodings + # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1) + min_encoding_scores, min_encoding_indices = torch.topk( + d, 1, dim=1, largest=False + ) + # [0-1], higher score, higher confidence + min_encoding_scores = torch.exp(-min_encoding_scores / 10) + + min_encodings = torch.zeros( + min_encoding_indices.shape[0], self.codebook_size + ).to(z) + min_encodings.scatter_(1, min_encoding_indices, 1) + + # get quantized latent vectors + z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape) + # compute loss for embedding + loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean( + (z_q - z.detach()) ** 2 + ) + # preserve gradients + z_q = z + (z_q - z).detach() + + # perplexity + e_mean = torch.mean(min_encodings, dim=0) + perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10))) + # reshape back to match original input shape + z_q = z_q.permute(0, 3, 1, 2).contiguous() + + return ( + z_q, + loss, + { + "perplexity": perplexity, + "min_encodings": min_encodings, + "min_encoding_indices": min_encoding_indices, + "min_encoding_scores": min_encoding_scores, + "mean_distance": mean_distance, + }, + ) + + def get_codebook_feat(self, indices, shape): + # input indices: batch*token_num -> (batch*token_num)*1 + # shape: batch, height, width, channel + indices = indices.view(-1, 1) + min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices) + min_encodings.scatter_(1, indices, 1) + # get quantized latent vectors + z_q = torch.matmul(min_encodings.float(), self.embedding.weight) + + if shape is not None: # reshape back to match original input shape + z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous() + + return z_q + + +class GumbelQuantizer(nn.Module): + def __init__( + self, + codebook_size, + emb_dim, + num_hiddens, + straight_through=False, + kl_weight=5e-4, + temp_init=1.0, + ): + super().__init__() + self.codebook_size = codebook_size # number of embeddings + self.emb_dim = emb_dim # dimension of embedding + self.straight_through = straight_through + self.temperature = temp_init + self.kl_weight = kl_weight + self.proj = nn.Conv2d( + num_hiddens, codebook_size, 1 + ) # projects last encoder layer to quantized logits + self.embed = nn.Embedding(codebook_size, emb_dim) + + def forward(self, z): + hard = self.straight_through if self.training else True + + logits = self.proj(z) + + soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard) + + z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight) + + # + kl divergence to the prior loss + qy = F.softmax(logits, dim=1) + diff = ( + self.kl_weight + * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean() + ) + min_encoding_indices = soft_one_hot.argmax(dim=1) + + return z_q, diff, {"min_encoding_indices": min_encoding_indices} + + +class Downsample(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=2, padding=0 + ) + + def forward(self, x): + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.conv = nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + + return x + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = normalize(in_channels) + self.q = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.k = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.v = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.proj_out = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, c, h * w) + q = q.permute(0, 2, 1) + k = k.reshape(b, c, h * w) + w_ = torch.bmm(q, k) + w_ = w_ * (int(c) ** (-0.5)) + w_ = F.softmax(w_, dim=2) + + # attend to values + v = v.reshape(b, c, h * w) + w_ = w_.permute(0, 2, 1) + h_ = torch.bmm(v, w_) + h_ = h_.reshape(b, c, h, w) + + h_ = self.proj_out(h_) + + return x + h_ + + +class Encoder(nn.Module): + def __init__( + self, + in_channels, + nf, + out_channels, + ch_mult, + num_res_blocks, + resolution, + attn_resolutions, + ): + super().__init__() + self.nf = nf + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.attn_resolutions = attn_resolutions + + curr_res = self.resolution + in_ch_mult = (1,) + tuple(ch_mult) + + blocks = [] + # initial convultion + blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1)) + + # residual and downsampling blocks, with attention on smaller res (16x16) + for i in range(self.num_resolutions): + block_in_ch = nf * in_ch_mult[i] + block_out_ch = nf * ch_mult[i] + for _ in range(self.num_res_blocks): + blocks.append(ResBlock(block_in_ch, block_out_ch)) + block_in_ch = block_out_ch + if curr_res in attn_resolutions: + blocks.append(AttnBlock(block_in_ch)) + + if i != self.num_resolutions - 1: + blocks.append(Downsample(block_in_ch)) + curr_res = curr_res // 2 + + # non-local attention block + blocks.append(ResBlock(block_in_ch, block_in_ch)) # type: ignore + blocks.append(AttnBlock(block_in_ch)) # type: ignore + blocks.append(ResBlock(block_in_ch, block_in_ch)) # type: ignore + + # normalise and convert to latent size + blocks.append(normalize(block_in_ch)) # type: ignore + blocks.append( + nn.Conv2d(block_in_ch, out_channels, kernel_size=3, stride=1, padding=1) # type: ignore + ) + self.blocks = nn.ModuleList(blocks) + + def forward(self, x): + for block in self.blocks: + x = block(x) + + return x + + +class Generator(nn.Module): + def __init__(self, nf, ch_mult, res_blocks, img_size, attn_resolutions, emb_dim): + super().__init__() + self.nf = nf + self.ch_mult = ch_mult + self.num_resolutions = len(self.ch_mult) + self.num_res_blocks = res_blocks + self.resolution = img_size + self.attn_resolutions = attn_resolutions + self.in_channels = emb_dim + self.out_channels = 3 + block_in_ch = self.nf * self.ch_mult[-1] + curr_res = self.resolution // 2 ** (self.num_resolutions - 1) + + blocks = [] + # initial conv + blocks.append( + nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1) + ) + + # non-local attention block + blocks.append(ResBlock(block_in_ch, block_in_ch)) + blocks.append(AttnBlock(block_in_ch)) + blocks.append(ResBlock(block_in_ch, block_in_ch)) + + for i in reversed(range(self.num_resolutions)): + block_out_ch = self.nf * self.ch_mult[i] + + for _ in range(self.num_res_blocks): + blocks.append(ResBlock(block_in_ch, block_out_ch)) + block_in_ch = block_out_ch + + if curr_res in self.attn_resolutions: + blocks.append(AttnBlock(block_in_ch)) + + if i != 0: + blocks.append(Upsample(block_in_ch)) + curr_res = curr_res * 2 + + blocks.append(normalize(block_in_ch)) + blocks.append( + nn.Conv2d( + block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1 + ) + ) + + self.blocks = nn.ModuleList(blocks) + + def forward(self, x): + for block in self.blocks: + x = block(x) + + return x + + +class VQAutoEncoder(nn.Module): + def __init__( + self, + img_size, + nf, + ch_mult, + quantizer="nearest", + res_blocks=2, + attn_resolutions=[16], + codebook_size=1024, + emb_dim=256, + beta=0.25, + gumbel_straight_through=False, + gumbel_kl_weight=1e-8, + model_path=None, + ): + super().__init__() + self.in_channels = 3 + self.nf = nf + self.n_blocks = res_blocks + self.codebook_size = codebook_size + self.embed_dim = emb_dim + self.ch_mult = ch_mult + self.resolution = img_size + self.attn_resolutions = attn_resolutions + self.quantizer_type = quantizer + self.encoder = Encoder( + self.in_channels, + self.nf, + self.embed_dim, + self.ch_mult, + self.n_blocks, + self.resolution, + self.attn_resolutions, + ) + if self.quantizer_type == "nearest": + self.beta = beta # 0.25 + self.quantize = VectorQuantizer( + self.codebook_size, self.embed_dim, self.beta + ) + elif self.quantizer_type == "gumbel": + self.gumbel_num_hiddens = emb_dim + self.straight_through = gumbel_straight_through + self.kl_weight = gumbel_kl_weight + self.quantize = GumbelQuantizer( + self.codebook_size, + self.embed_dim, + self.gumbel_num_hiddens, + self.straight_through, + self.kl_weight, + ) + self.generator = Generator( + nf, ch_mult, res_blocks, img_size, attn_resolutions, emb_dim + ) + + if model_path is not None: + chkpt = torch.load(model_path, map_location="cpu") + if "params_ema" in chkpt: + self.load_state_dict( + torch.load(model_path, map_location="cpu")["params_ema"] + ) + logger.info(f"vqgan is loaded from: {model_path} [params_ema]") + elif "params" in chkpt: + self.load_state_dict( + torch.load(model_path, map_location="cpu")["params"] + ) + logger.info(f"vqgan is loaded from: {model_path} [params]") + else: + raise ValueError("Wrong params!") + + def forward(self, x): + x = self.encoder(x) + quant, codebook_loss, quant_stats = self.quantize(x) + x = self.generator(quant) + return x, codebook_loss, quant_stats + + +def calc_mean_std(feat, eps=1e-5): + """Calculate mean and std for adaptive_instance_normalization. + Args: + feat (Tensor): 4D tensor. + eps (float): A small value added to the variance to avoid + divide-by-zero. Default: 1e-5. + """ + size = feat.size() + assert len(size) == 4, "The input feature should be 4D tensor." + b, c = size[:2] + feat_var = feat.view(b, c, -1).var(dim=2) + eps + feat_std = feat_var.sqrt().view(b, c, 1, 1) + feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1) + return feat_mean, feat_std + + +def adaptive_instance_normalization(content_feat, style_feat): + """Adaptive instance normalization. + Adjust the reference features to have the similar color and illuminations + as those in the degradate features. + Args: + content_feat (Tensor): The reference feature. + style_feat (Tensor): The degradate features. + """ + size = content_feat.size() + style_mean, style_std = calc_mean_std(style_feat) + content_mean, content_std = calc_mean_std(content_feat) + normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand( + size + ) + return normalized_feat * style_std.expand(size) + style_mean.expand(size) + + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + + def __init__( + self, num_pos_feats=64, temperature=10000, normalize=False, scale=None + ): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + + def forward(self, x, mask=None): + if mask is None: + mask = torch.zeros( + (x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool + ) + not_mask = ~mask # pylint: disable=invalid-unary-operand-type + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale + x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + + pos_x = x_embed[:, :, :, None] / dim_t + pos_y = y_embed[:, :, :, None] / dim_t + pos_x = torch.stack( + (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos_y = torch.stack( + (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) + return pos + + +def _get_activation_fn(activation): + """Return an activation function given a string""" + if activation == "relu": + return F.relu + if activation == "gelu": + return F.gelu + if activation == "glu": + return F.glu + raise RuntimeError(f"activation should be relu/gelu, not {activation}.") + + +class TransformerSALayer(nn.Module): + def __init__( + self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu" + ): + super().__init__() + self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout) + # Implementation of Feedforward model - MLP + self.linear1 = nn.Linear(embed_dim, dim_mlp) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_mlp, embed_dim) + + self.norm1 = nn.LayerNorm(embed_dim) + self.norm2 = nn.LayerNorm(embed_dim) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + self.activation = _get_activation_fn(activation) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward( + self, + tgt, + tgt_mask: Optional[Tensor] = None, + tgt_key_padding_mask: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None, + ): + # self attention + tgt2 = self.norm1(tgt) + q = k = self.with_pos_embed(tgt2, query_pos) + tgt2 = self.self_attn( + q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask + )[0] + tgt = tgt + self.dropout1(tgt2) + + # ffn + tgt2 = self.norm2(tgt) + tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) + tgt = tgt + self.dropout2(tgt2) + return tgt + + +def normalize(in_channels): + return torch.nn.GroupNorm( + num_groups=32, num_channels=in_channels, eps=1e-6, affine=True + ) + + +@torch.jit.script # type: ignore +def swish(x): + return x * torch.sigmoid(x) + + +class ResBlock(nn.Module): + def __init__(self, in_channels, out_channels=None): + super(ResBlock, self).__init__() + self.in_channels = in_channels + self.out_channels = in_channels if out_channels is None else out_channels + self.norm1 = normalize(in_channels) + self.conv1 = nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 # type: ignore + ) + self.norm2 = normalize(out_channels) + self.conv2 = nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1 # type: ignore + ) + if self.in_channels != self.out_channels: + self.conv_out = nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0 # type: ignore + ) + + def forward(self, x_in): + x = x_in + x = self.norm1(x) + x = swish(x) + x = self.conv1(x) + x = self.norm2(x) + x = swish(x) + x = self.conv2(x) + if self.in_channels != self.out_channels: + x_in = self.conv_out(x_in) + + return x + x_in + + +class Fuse_sft_block(nn.Module): + def __init__(self, in_ch, out_ch): + super().__init__() + self.encode_enc = ResBlock(2 * in_ch, out_ch) + + self.scale = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1), + ) + + self.shift = nn.Sequential( + nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1), + ) + + def forward(self, enc_feat, dec_feat, w=1): + enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1)) + scale = self.scale(enc_feat) + shift = self.shift(enc_feat) + residual = w * (dec_feat * scale + shift) + out = dec_feat + residual + return out + + +class CodeFormer(VQAutoEncoder): + def __init__(self, state_dict): + dim_embd = 512 + n_head = 8 + n_layers = 9 + codebook_size = 1024 + latent_size = 256 + connect_list = ["32", "64", "128", "256"] + fix_modules = ["quantize", "generator"] + + # This is just a guess as I only have one model to look at + position_emb = state_dict["position_emb"] + dim_embd = position_emb.shape[1] + latent_size = position_emb.shape[0] + + try: + n_layers = len( + set([x.split(".")[1] for x in state_dict.keys() if "ft_layers" in x]) + ) + except: + pass + + codebook_size = state_dict["quantize.embedding.weight"].shape[0] + + # This is also just another guess + n_head_exp = ( + state_dict["ft_layers.0.self_attn.in_proj_weight"].shape[0] // dim_embd + ) + n_head = 2**n_head_exp + + in_nc = state_dict["encoder.blocks.0.weight"].shape[1] + + self.model_arch = "CodeFormer" + self.sub_type = "Face SR" + self.scale = 8 + self.in_nc = in_nc + self.out_nc = in_nc + + self.state = state_dict + + self.supports_fp16 = False + self.supports_bf16 = True + self.min_size_restriction = 16 + + super(CodeFormer, self).__init__( + 512, 64, [1, 2, 2, 4, 4, 8], "nearest", 2, [16], codebook_size + ) + + if fix_modules is not None: + for module in fix_modules: + for param in getattr(self, module).parameters(): + param.requires_grad = False + + self.connect_list = connect_list + self.n_layers = n_layers + self.dim_embd = dim_embd + self.dim_mlp = dim_embd * 2 + + self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd)) # type: ignore + self.feat_emb = nn.Linear(256, self.dim_embd) + + # transformer + self.ft_layers = nn.Sequential( + *[ + TransformerSALayer( + embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0 + ) + for _ in range(self.n_layers) + ] + ) + + # logits_predict head + self.idx_pred_layer = nn.Sequential( + nn.LayerNorm(dim_embd), nn.Linear(dim_embd, codebook_size, bias=False) + ) + + self.channels = { + "16": 512, + "32": 256, + "64": 256, + "128": 128, + "256": 128, + "512": 64, + } + + # after second residual block for > 16, before attn layer for ==16 + self.fuse_encoder_block = { + "512": 2, + "256": 5, + "128": 8, + "64": 11, + "32": 14, + "16": 18, + } + # after first residual block for > 16, before attn layer for ==16 + self.fuse_generator_block = { + "16": 6, + "32": 9, + "64": 12, + "128": 15, + "256": 18, + "512": 21, + } + + # fuse_convs_dict + self.fuse_convs_dict = nn.ModuleDict() + for f_size in self.connect_list: + in_ch = self.channels[f_size] + self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch) + + self.load_state_dict(state_dict) + + def _init_weights(self, module): + if isinstance(module, (nn.Linear, nn.Embedding)): + module.weight.data.normal_(mean=0.0, std=0.02) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + def forward(self, x, weight=0.5, **kwargs): + detach_16 = True + code_only = False + adain = True + # ################### Encoder ##################### + enc_feat_dict = {} + out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list] + for i, block in enumerate(self.encoder.blocks): + x = block(x) + if i in out_list: + enc_feat_dict[str(x.shape[-1])] = x.clone() + + lq_feat = x + # ################# Transformer ################### + # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat) + pos_emb = self.position_emb.unsqueeze(1).repeat(1, x.shape[0], 1) + # BCHW -> BC(HW) -> (HW)BC + feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2, 0, 1)) + query_emb = feat_emb + # Transformer encoder + for layer in self.ft_layers: + query_emb = layer(query_emb, query_pos=pos_emb) + + # output logits + logits = self.idx_pred_layer(query_emb) # (hw)bn + logits = logits.permute(1, 0, 2) # (hw)bn -> b(hw)n + + if code_only: # for training stage II + # logits doesn't need softmax before cross_entropy loss + return logits, lq_feat + + # ################# Quantization ################### + # if self.training: + # quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight]) + # # b(hw)c -> bc(hw) -> bchw + # quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape) + # ------------ + soft_one_hot = F.softmax(logits, dim=2) + _, top_idx = torch.topk(soft_one_hot, 1, dim=2) + quant_feat = self.quantize.get_codebook_feat( + top_idx, shape=[x.shape[0], 16, 16, 256] # type: ignore + ) + # preserve gradients + # quant_feat = lq_feat + (quant_feat - lq_feat).detach() + + if detach_16: + quant_feat = quant_feat.detach() # for training stage III + if adain: + quant_feat = adaptive_instance_normalization(quant_feat, lq_feat) + + # ################## Generator #################### + x = quant_feat + fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list] + + for i, block in enumerate(self.generator.blocks): + x = block(x) + if i in fuse_list: # fuse after i-th block + f_size = str(x.shape[-1]) + if weight > 0: + x = self.fuse_convs_dict[f_size]( + enc_feat_dict[f_size].detach(), x, weight + ) + out = x + # logits doesn't need softmax before cross_entropy loss + # return out, logits, lq_feat + return out, logits diff --git a/ldm_patched/pfn/architecture/face/fused_act.py b/ldm_patched/pfn/architecture/face/fused_act.py new file mode 100644 index 0000000000000000000000000000000000000000..7ed526547b4644ac6341947a801b76d9ed798f26 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/fused_act.py @@ -0,0 +1,81 @@ +# pylint: skip-file +# type: ignore +# modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 + +import torch +from torch import nn +from torch.autograd import Function + +fused_act_ext = None + + +class FusedLeakyReLUFunctionBackward(Function): + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = fused_act_ext.fused_bias_act( + grad_output, empty, out, 3, 1, negative_slope, scale + ) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + (out,) = ctx.saved_tensors + gradgrad_out = fused_act_ext.fused_bias_act( + gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale + ) + + return gradgrad_out, None, None, None + + +class FusedLeakyReLUFunction(Function): + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + out = fused_act_ext.fused_bias_act( + input, bias, empty, 3, 0, negative_slope, scale + ) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + (out,) = ctx.saved_tensors + + grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale + ) + + return grad_input, grad_bias, None, None + + +class FusedLeakyReLU(nn.Module): + def __init__(self, channel, negative_slope=0.2, scale=2**0.5): + super().__init__() + + self.bias = nn.Parameter(torch.zeros(channel)) + self.negative_slope = negative_slope + self.scale = scale + + def forward(self, input): + return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) + + +def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): + return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) diff --git a/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py b/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..b6e820e006f52936c3399d3d37fdf571f2385dcb --- /dev/null +++ b/ldm_patched/pfn/architecture/face/gfpgan_bilinear_arch.py @@ -0,0 +1,389 @@ +# pylint: skip-file +# type: ignore +import math +import random + +import torch +from torch import nn + +from .gfpganv1_arch import ResUpBlock +from .stylegan2_bilinear_arch import ( + ConvLayer, + EqualConv2d, + EqualLinear, + ResBlock, + ScaledLeakyReLU, + StyleGAN2GeneratorBilinear, +) + + +class StyleGAN2GeneratorBilinearSFT(StyleGAN2GeneratorBilinear): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + It is the bilinear version. It does not use the complicated UpFirDnSmooth function that is not friendly for + deployment. It can be easily converted to the clean version: StyleGAN2GeneratorCSFT. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + lr_mlp=0.01, + narrow=1, + sft_half=False, + ): + super(StyleGAN2GeneratorBilinearSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + lr_mlp=lr_mlp, + narrow=narrow, + ) + self.sft_half = sft_half + + def forward( + self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2GeneratorBilinearSFT. + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class GFPGANBilinear(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + It is the bilinear version and it does not use the complicated UpFirDnSmooth function that is not friendly for + deployment. It can be easily converted to the clean version: GFPGANv1Clean. + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + num_mlp (int): Layer number of MLP style layers. Default: 8. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + channel_multiplier=1, + decoder_load_path=None, + fix_decoder=True, + # for stylegan decoder + num_mlp=8, + lr_mlp=0.01, + input_is_latent=False, + different_w=False, + narrow=1, + sft_half=False, + ): + super(GFPGANBilinear, self).__init__() + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + self.min_size_restriction = 512 + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + "4": int(512 * unet_narrow), + "8": int(512 * unet_narrow), + "16": int(512 * unet_narrow), + "32": int(512 * unet_narrow), + "64": int(256 * channel_multiplier * unet_narrow), + "128": int(128 * channel_multiplier * unet_narrow), + "256": int(64 * channel_multiplier * unet_narrow), + "512": int(32 * channel_multiplier * unet_narrow), + "1024": int(16 * channel_multiplier * unet_narrow), + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2 ** (int(math.log(out_size, 2))) + + self.conv_body_first = ConvLayer( + 3, channels[f"{first_out_size}"], 1, bias=True, activate=True + ) + + # downsample + in_channels = channels[f"{first_out_size}"] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f"{2**(i - 1)}"] + self.conv_body_down.append(ResBlock(in_channels, out_channels)) + in_channels = out_channels + + self.final_conv = ConvLayer( + in_channels, channels["4"], 3, bias=True, activate=True + ) + + # upsample + in_channels = channels["4"] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.conv_body_up.append(ResUpBlock(in_channels, out_channels)) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append( + EqualConv2d( + channels[f"{2**i}"], + 3, + 1, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + ) + ) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = EqualLinear( + channels["4"] * 4 * 4, + linear_out_channel, + bias=True, + bias_init_val=0, + lr_mul=1, + activation=None, + ) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorBilinearSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + lr_mlp=lr_mlp, + narrow=narrow, + sft_half=sft_half, + ) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load( + decoder_load_path, map_location=lambda storage, loc: storage + )["params_ema"] + ) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + EqualConv2d( + out_channels, + out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ScaledLeakyReLU(0.2), + EqualConv2d( + out_channels, + sft_out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=1, + ), + ) + ) + self.condition_shift.append( + nn.Sequential( + EqualConv2d( + out_channels, + out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ScaledLeakyReLU(0.2), + EqualConv2d( + out_channels, + sft_out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ) + ) + + def forward(self, x, return_latents=False, return_rgb=True, randomize_noise=True): + """Forward function for GFPGANBilinear. + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = self.conv_body_first(x) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + + feat = self.final_conv(feat) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder( + [style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise, + ) + + return image, out_rgbs diff --git a/ldm_patched/pfn/architecture/face/gfpganv1_arch.py b/ldm_patched/pfn/architecture/face/gfpganv1_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..72d72fc865ec35b2ccd23f13b3d8ef0be5dbaf7a --- /dev/null +++ b/ldm_patched/pfn/architecture/face/gfpganv1_arch.py @@ -0,0 +1,566 @@ +# pylint: skip-file +# type: ignore +import math +import random + +import torch +from torch import nn +from torch.nn import functional as F + +from .fused_act import FusedLeakyReLU +from .stylegan2_arch import ( + ConvLayer, + EqualConv2d, + EqualLinear, + ResBlock, + ScaledLeakyReLU, + StyleGAN2Generator, +) + + +class StyleGAN2GeneratorSFT(StyleGAN2Generator): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be + applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1). + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + resample_kernel=(1, 3, 3, 1), + lr_mlp=0.01, + narrow=1, + sft_half=False, + ): + super(StyleGAN2GeneratorSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + resample_kernel=resample_kernel, + lr_mlp=lr_mlp, + narrow=narrow, + ) + self.sft_half = sft_half + + def forward( + self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2GeneratorSFT. + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ConvUpLayer(nn.Module): + """Convolutional upsampling layer. It uses bilinear upsampler + Conv. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + stride (int): Stride of the convolution. Default: 1 + padding (int): Zero-padding added to both sides of the input. Default: 0. + bias (bool): If ``True``, adds a learnable bias to the output. Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + activate (bool): Whether use activateion. Default: True. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + activate=True, + ): + super(ConvUpLayer, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + # self.scale is used to scale the convolution weights, which is related to the common initializations. + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.weight = nn.Parameter( + torch.randn(out_channels, in_channels, kernel_size, kernel_size) + ) + + if bias and not activate: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter("bias", None) + + # activation + if activate: + if bias: + self.activation = FusedLeakyReLU(out_channels) + else: + self.activation = ScaledLeakyReLU(0.2) + else: + self.activation = None + + def forward(self, x): + # bilinear upsample + out = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False) + # conv + out = F.conv2d( + out, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + # activation + if self.activation is not None: + out = self.activation(out) + return out + + +class ResUpBlock(nn.Module): + """Residual block with upsampling. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + """ + + def __init__(self, in_channels, out_channels): + super(ResUpBlock, self).__init__() + + self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True) + self.conv2 = ConvUpLayer( + in_channels, out_channels, 3, stride=1, padding=1, bias=True, activate=True + ) + self.skip = ConvUpLayer( + in_channels, out_channels, 1, bias=False, activate=False + ) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2(out) + skip = self.skip(x) + out = (out + skip) / math.sqrt(2) + return out + + +class GFPGANv1(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + resample_kernel (list[int]): A list indicating the 1D resample kernel magnitude. A cross production will be + applied to extent 1D resample kernel to 2D resample kernel. Default: (1, 3, 3, 1). + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + num_mlp (int): Layer number of MLP style layers. Default: 8. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + channel_multiplier=1, + resample_kernel=(1, 3, 3, 1), + decoder_load_path=None, + fix_decoder=True, + # for stylegan decoder + num_mlp=8, + lr_mlp=0.01, + input_is_latent=False, + different_w=False, + narrow=1, + sft_half=False, + ): + super(GFPGANv1, self).__init__() + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + "4": int(512 * unet_narrow), + "8": int(512 * unet_narrow), + "16": int(512 * unet_narrow), + "32": int(512 * unet_narrow), + "64": int(256 * channel_multiplier * unet_narrow), + "128": int(128 * channel_multiplier * unet_narrow), + "256": int(64 * channel_multiplier * unet_narrow), + "512": int(32 * channel_multiplier * unet_narrow), + "1024": int(16 * channel_multiplier * unet_narrow), + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2 ** (int(math.log(out_size, 2))) + + self.conv_body_first = ConvLayer( + 3, channels[f"{first_out_size}"], 1, bias=True, activate=True + ) + + # downsample + in_channels = channels[f"{first_out_size}"] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f"{2**(i - 1)}"] + self.conv_body_down.append( + ResBlock(in_channels, out_channels, resample_kernel) + ) + in_channels = out_channels + + self.final_conv = ConvLayer( + in_channels, channels["4"], 3, bias=True, activate=True + ) + + # upsample + in_channels = channels["4"] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.conv_body_up.append(ResUpBlock(in_channels, out_channels)) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append( + EqualConv2d( + channels[f"{2**i}"], + 3, + 1, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + ) + ) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = EqualLinear( + channels["4"] * 4 * 4, + linear_out_channel, + bias=True, + bias_init_val=0, + lr_mul=1, + activation=None, + ) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + resample_kernel=resample_kernel, + lr_mlp=lr_mlp, + narrow=narrow, + sft_half=sft_half, + ) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load( + decoder_load_path, map_location=lambda storage, loc: storage + )["params_ema"] + ) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + EqualConv2d( + out_channels, + out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ScaledLeakyReLU(0.2), + EqualConv2d( + out_channels, + sft_out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=1, + ), + ) + ) + self.condition_shift.append( + nn.Sequential( + EqualConv2d( + out_channels, + out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ScaledLeakyReLU(0.2), + EqualConv2d( + out_channels, + sft_out_channels, + 3, + stride=1, + padding=1, + bias=True, + bias_init_val=0, + ), + ) + ) + + def forward( + self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs + ): + """Forward function for GFPGANv1. + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = self.conv_body_first(x) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + + feat = self.final_conv(feat) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder( + [style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise, + ) + + return image, out_rgbs + + +class FacialComponentDiscriminator(nn.Module): + """Facial component (eyes, mouth, noise) discriminator used in GFPGAN.""" + + def __init__(self): + super(FacialComponentDiscriminator, self).__init__() + # It now uses a VGG-style architectrue with fixed model size + self.conv1 = ConvLayer( + 3, + 64, + 3, + downsample=False, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ) + self.conv2 = ConvLayer( + 64, + 128, + 3, + downsample=True, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ) + self.conv3 = ConvLayer( + 128, + 128, + 3, + downsample=False, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ) + self.conv4 = ConvLayer( + 128, + 256, + 3, + downsample=True, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ) + self.conv5 = ConvLayer( + 256, + 256, + 3, + downsample=False, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ) + self.final_conv = ConvLayer(256, 1, 3, bias=True, activate=False) + + def forward(self, x, return_feats=False, **kwargs): + """Forward function for FacialComponentDiscriminator. + Args: + x (Tensor): Input images. + return_feats (bool): Whether to return intermediate features. Default: False. + """ + feat = self.conv1(x) + feat = self.conv3(self.conv2(feat)) + rlt_feats = [] + if return_feats: + rlt_feats.append(feat.clone()) + feat = self.conv5(self.conv4(feat)) + if return_feats: + rlt_feats.append(feat.clone()) + out = self.final_conv(feat) + + if return_feats: + return out, rlt_feats + else: + return out, None diff --git a/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py b/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..16470d6345f71ed1517ff26f65b9cd125d80d99e --- /dev/null +++ b/ldm_patched/pfn/architecture/face/gfpganv1_clean_arch.py @@ -0,0 +1,370 @@ +# pylint: skip-file +# type: ignore +import math +import random + +import torch +from torch import nn +from torch.nn import functional as F + +from .stylegan2_clean_arch import StyleGAN2GeneratorClean + + +class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean): + """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform). + It is the clean version without custom compiled CUDA extensions used in StyleGAN2. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + narrow=1, + sft_half=False, + ): + super(StyleGAN2GeneratorCSFT, self).__init__( + out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + narrow=narrow, + ) + self.sft_half = sft_half + + def forward( + self, + styles, + conditions, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2GeneratorCSFT. + Args: + styles (list[Tensor]): Sample codes of styles. + conditions (list[Tensor]): SFT conditions to generators. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + + # the conditions may have fewer levels + if i < len(conditions): + # SFT part to combine the conditions + if self.sft_half: # only apply SFT to half of the channels + out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1) + out_sft = out_sft * conditions[i - 1] + conditions[i] + out = torch.cat([out_same, out_sft], dim=1) + else: # apply SFT to all the channels + out = out * conditions[i - 1] + conditions[i] + + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ResBlock(nn.Module): + """Residual block with bilinear upsampling/downsampling. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + mode (str): Upsampling/downsampling mode. Options: down | up. Default: down. + """ + + def __init__(self, in_channels, out_channels, mode="down"): + super(ResBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1) + self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1) + self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False) + if mode == "down": + self.scale_factor = 0.5 + elif mode == "up": + self.scale_factor = 2 + + def forward(self, x): + out = F.leaky_relu_(self.conv1(x), negative_slope=0.2) + # upsample/downsample + out = F.interpolate( + out, scale_factor=self.scale_factor, mode="bilinear", align_corners=False + ) + out = F.leaky_relu_(self.conv2(out), negative_slope=0.2) + # skip + x = F.interpolate( + x, scale_factor=self.scale_factor, mode="bilinear", align_corners=False + ) + skip = self.skip(x) + out = out + skip + return out + + +class GFPGANv1Clean(nn.Module): + """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT. + It is the clean version without custom compiled CUDA extensions used in StyleGAN2. + Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None. + fix_decoder (bool): Whether to fix the decoder. Default: True. + num_mlp (int): Layer number of MLP style layers. Default: 8. + input_is_latent (bool): Whether input is latent style. Default: False. + different_w (bool): Whether to use different latent w for different layers. Default: False. + narrow (float): The narrow ratio for channels. Default: 1. + sft_half (bool): Whether to apply SFT on half of the input channels. Default: False. + """ + + def __init__( + self, + state_dict, + ): + super(GFPGANv1Clean, self).__init__() + + out_size = 512 + num_style_feat = 512 + channel_multiplier = 2 + decoder_load_path = None + fix_decoder = False + num_mlp = 8 + input_is_latent = True + different_w = True + narrow = 1 + sft_half = True + + self.model_arch = "GFPGAN" + self.sub_type = "Face SR" + self.scale = 8 + self.in_nc = 3 + self.out_nc = 3 + self.state = state_dict + + self.supports_fp16 = False + self.supports_bf16 = True + self.min_size_restriction = 512 + + self.input_is_latent = input_is_latent + self.different_w = different_w + self.num_style_feat = num_style_feat + + unet_narrow = narrow * 0.5 # by default, use a half of input channels + channels = { + "4": int(512 * unet_narrow), + "8": int(512 * unet_narrow), + "16": int(512 * unet_narrow), + "32": int(512 * unet_narrow), + "64": int(256 * channel_multiplier * unet_narrow), + "128": int(128 * channel_multiplier * unet_narrow), + "256": int(64 * channel_multiplier * unet_narrow), + "512": int(32 * channel_multiplier * unet_narrow), + "1024": int(16 * channel_multiplier * unet_narrow), + } + + self.log_size = int(math.log(out_size, 2)) + first_out_size = 2 ** (int(math.log(out_size, 2))) + + self.conv_body_first = nn.Conv2d(3, channels[f"{first_out_size}"], 1) + + # downsample + in_channels = channels[f"{first_out_size}"] + self.conv_body_down = nn.ModuleList() + for i in range(self.log_size, 2, -1): + out_channels = channels[f"{2**(i - 1)}"] + self.conv_body_down.append(ResBlock(in_channels, out_channels, mode="down")) + in_channels = out_channels + + self.final_conv = nn.Conv2d(in_channels, channels["4"], 3, 1, 1) + + # upsample + in_channels = channels["4"] + self.conv_body_up = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.conv_body_up.append(ResBlock(in_channels, out_channels, mode="up")) + in_channels = out_channels + + # to RGB + self.toRGB = nn.ModuleList() + for i in range(3, self.log_size + 1): + self.toRGB.append(nn.Conv2d(channels[f"{2**i}"], 3, 1)) + + if different_w: + linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat + else: + linear_out_channel = num_style_feat + + self.final_linear = nn.Linear(channels["4"] * 4 * 4, linear_out_channel) + + # the decoder: stylegan2 generator with SFT modulations + self.stylegan_decoder = StyleGAN2GeneratorCSFT( + out_size=out_size, + num_style_feat=num_style_feat, + num_mlp=num_mlp, + channel_multiplier=channel_multiplier, + narrow=narrow, + sft_half=sft_half, + ) + + # load pre-trained stylegan2 model if necessary + if decoder_load_path: + self.stylegan_decoder.load_state_dict( + torch.load( + decoder_load_path, map_location=lambda storage, loc: storage + )["params_ema"] + ) + # fix decoder without updating params + if fix_decoder: + for _, param in self.stylegan_decoder.named_parameters(): + param.requires_grad = False + + # for SFT modulations (scale and shift) + self.condition_scale = nn.ModuleList() + self.condition_shift = nn.ModuleList() + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + if sft_half: + sft_out_channels = out_channels + else: + sft_out_channels = out_channels * 2 + self.condition_scale.append( + nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1), + ) + ) + self.condition_shift.append( + nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), + nn.LeakyReLU(0.2, True), + nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1), + ) + ) + self.load_state_dict(state_dict) + + def forward( + self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs + ): + """Forward function for GFPGANv1Clean. + Args: + x (Tensor): Input images. + return_latents (bool): Whether to return style latents. Default: False. + return_rgb (bool): Whether return intermediate rgb images. Default: True. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + """ + conditions = [] + unet_skips = [] + out_rgbs = [] + + # encoder + feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2) + for i in range(self.log_size - 2): + feat = self.conv_body_down[i](feat) + unet_skips.insert(0, feat) + feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2) + + # style code + style_code = self.final_linear(feat.view(feat.size(0), -1)) + if self.different_w: + style_code = style_code.view(style_code.size(0), -1, self.num_style_feat) + + # decode + for i in range(self.log_size - 2): + # add unet skip + feat = feat + unet_skips[i] + # ResUpLayer + feat = self.conv_body_up[i](feat) + # generate scale and shift for SFT layers + scale = self.condition_scale[i](feat) + conditions.append(scale.clone()) + shift = self.condition_shift[i](feat) + conditions.append(shift.clone()) + # generate rgb images + if return_rgb: + out_rgbs.append(self.toRGB[i](feat)) + + # decoder + image, _ = self.stylegan_decoder( + [style_code], + conditions, + return_latents=return_latents, + input_is_latent=self.input_is_latent, + randomize_noise=randomize_noise, + ) + + return image, out_rgbs diff --git a/ldm_patched/pfn/architecture/face/restoreformer_arch.py b/ldm_patched/pfn/architecture/face/restoreformer_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..4492260291d6d74b2c0d38130f7aa8b50ba2fc11 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/restoreformer_arch.py @@ -0,0 +1,776 @@ +# pylint: skip-file +# type: ignore +"""Modified from https://github.com/wzhouxiff/RestoreFormer +""" +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class VectorQuantizer(nn.Module): + """ + see https://github.com/MishaLaskin/vqvae/blob/d761a999e2267766400dc646d82d3ac3657771d4/models/quantizer.py + ____________________________________________ + Discretization bottleneck part of the VQ-VAE. + Inputs: + - n_e : number of embeddings + - e_dim : dimension of embedding + - beta : commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2 + _____________________________________________ + """ + + def __init__(self, n_e, e_dim, beta): + super(VectorQuantizer, self).__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + def forward(self, z): + """ + Inputs the output of the encoder network z and maps it to a discrete + one-hot vector that is the index of the closest embedding vector e_j + z (continuous) -> z_q (discrete) + z.shape = (batch, channel, height, width) + quantization pipeline: + 1. get encoder input (B,C,H,W) + 2. flatten input to (B*H*W,C) + """ + # reshape z -> (batch, height, width, channel) and flatten + z = z.permute(0, 2, 3, 1).contiguous() + z_flattened = z.view(-1, self.e_dim) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = ( + torch.sum(z_flattened**2, dim=1, keepdim=True) + + torch.sum(self.embedding.weight**2, dim=1) + - 2 * torch.matmul(z_flattened, self.embedding.weight.t()) + ) + + # could possible replace this here + # #\start... + # find closest encodings + + min_value, min_encoding_indices = torch.min(d, dim=1) + + min_encoding_indices = min_encoding_indices.unsqueeze(1) + + min_encodings = torch.zeros(min_encoding_indices.shape[0], self.n_e).to(z) + min_encodings.scatter_(1, min_encoding_indices, 1) + + # dtype min encodings: torch.float32 + # min_encodings shape: torch.Size([2048, 512]) + # min_encoding_indices.shape: torch.Size([2048, 1]) + + # get quantized latent vectors + z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape) + # .........\end + + # with: + # .........\start + # min_encoding_indices = torch.argmin(d, dim=1) + # z_q = self.embedding(min_encoding_indices) + # ......\end......... (TODO) + + # compute loss for embedding + loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean( + (z_q - z.detach()) ** 2 + ) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # perplexity + + e_mean = torch.mean(min_encodings, dim=0) + perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10))) + + # reshape back to match original input shape + z_q = z_q.permute(0, 3, 1, 2).contiguous() + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices, d) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + # TODO: check for more easy handling with nn.Embedding + min_encodings = torch.zeros(indices.shape[0], self.n_e).to(indices) + min_encodings.scatter_(1, indices[:, None], 1) + + # get quantized latent vectors + z_q = torch.matmul(min_encodings.float(), self.embedding.weight) + + if shape is not None: + z_q = z_q.view(shape) + + # reshape back to match original input shape + z_q = z_q.permute(0, 3, 1, 2).contiguous() + + return z_q + + +# pytorch_diffusion + derived encoder decoder +def nonlinearity(x): + # swish + return x * torch.sigmoid(x) + + +def Normalize(in_channels): + return torch.nn.GroupNorm( + num_groups=32, num_channels=in_channels, eps=1e-6, affine=True + ) + + +class Upsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=2, padding=0 + ) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) + return x + + +class ResnetBlock(nn.Module): + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout, + temb_channels=512 + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + + self.norm1 = Normalize(in_channels) + self.conv1 = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + if temb_channels > 0: + self.temb_proj = torch.nn.Linear(temb_channels, out_channels) + self.norm2 = Normalize(out_channels) + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + else: + self.nin_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = nonlinearity(h) + h = self.conv1(h) + + if temb is not None: + h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] + + h = self.norm2(h) + h = nonlinearity(h) + h = self.dropout(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + + return x + h + + +class MultiHeadAttnBlock(nn.Module): + def __init__(self, in_channels, head_size=1): + super().__init__() + self.in_channels = in_channels + self.head_size = head_size + self.att_size = in_channels // head_size + assert ( + in_channels % head_size == 0 + ), "The size of head should be divided by the number of channels." + + self.norm1 = Normalize(in_channels) + self.norm2 = Normalize(in_channels) + + self.q = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.k = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.v = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.proj_out = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.num = 0 + + def forward(self, x, y=None): + h_ = x + h_ = self.norm1(h_) + if y is None: + y = h_ + else: + y = self.norm2(y) + + q = self.q(y) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, self.head_size, self.att_size, h * w) + q = q.permute(0, 3, 1, 2) # b, hw, head, att + + k = k.reshape(b, self.head_size, self.att_size, h * w) + k = k.permute(0, 3, 1, 2) + + v = v.reshape(b, self.head_size, self.att_size, h * w) + v = v.permute(0, 3, 1, 2) + + q = q.transpose(1, 2) + v = v.transpose(1, 2) + k = k.transpose(1, 2).transpose(2, 3) + + scale = int(self.att_size) ** (-0.5) + q.mul_(scale) + w_ = torch.matmul(q, k) + w_ = F.softmax(w_, dim=3) + + w_ = w_.matmul(v) + + w_ = w_.transpose(1, 2).contiguous() # [b, h*w, head, att] + w_ = w_.view(b, h, w, -1) + w_ = w_.permute(0, 3, 1, 2) + + w_ = self.proj_out(w_) + + return x + w_ + + +class MultiHeadEncoder(nn.Module): + def __init__( + self, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks=2, + attn_resolutions=(16,), + dropout=0.0, + resamp_with_conv=True, + in_channels=3, + resolution=512, + z_channels=256, + double_z=True, + enable_mid=True, + head_size=1, + **ignore_kwargs + ): + super().__init__() + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.enable_mid = enable_mid + + # downsampling + self.conv_in = torch.nn.Conv2d( + in_channels, self.ch, kernel_size=3, stride=1, padding=1 + ) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(MultiHeadAttnBlock(block_in, head_size)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + if self.enable_mid: + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, + 2 * z_channels if double_z else z_channels, + kernel_size=3, + stride=1, + padding=1, + ) + + def forward(self, x): + hs = {} + # timestep embedding + temb = None + + # downsampling + h = self.conv_in(x) + hs["in"] = h + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](h, temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + + if i_level != self.num_resolutions - 1: + # hs.append(h) + hs["block_" + str(i_level)] = h + h = self.down[i_level].downsample(h) + + # middle + # h = hs[-1] + if self.enable_mid: + h = self.mid.block_1(h, temb) + hs["block_" + str(i_level) + "_atten"] = h + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + hs["mid_atten"] = h + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + # hs.append(h) + hs["out"] = h + + return hs + + +class MultiHeadDecoder(nn.Module): + def __init__( + self, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks=2, + attn_resolutions=(16,), + dropout=0.0, + resamp_with_conv=True, + in_channels=3, + resolution=512, + z_channels=256, + give_pre_end=False, + enable_mid=True, + head_size=1, + **ignorekwargs + ): + super().__init__() + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.enable_mid = enable_mid + + # compute in_ch_mult, block_in and curr_res at lowest res + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + print( + "Working with z of shape {} = {} dimensions.".format( + self.z_shape, np.prod(self.z_shape) + ) + ) + + # z to block_in + self.conv_in = torch.nn.Conv2d( + z_channels, block_in, kernel_size=3, stride=1, padding=1 + ) + + # middle + if self.enable_mid: + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(MultiHeadAttnBlock(block_in, head_size)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, out_ch, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, z): + # assert z.shape[1:] == self.z_shape[1:] + self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + if self.enable_mid: + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h, temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class MultiHeadDecoderTransformer(nn.Module): + def __init__( + self, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks=2, + attn_resolutions=(16,), + dropout=0.0, + resamp_with_conv=True, + in_channels=3, + resolution=512, + z_channels=256, + give_pre_end=False, + enable_mid=True, + head_size=1, + **ignorekwargs + ): + super().__init__() + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.enable_mid = enable_mid + + # compute in_ch_mult, block_in and curr_res at lowest res + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + print( + "Working with z of shape {} = {} dimensions.".format( + self.z_shape, np.prod(self.z_shape) + ) + ) + + # z to block_in + self.conv_in = torch.nn.Conv2d( + z_channels, block_in, kernel_size=3, stride=1, padding=1 + ) + + # middle + if self.enable_mid: + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = MultiHeadAttnBlock(block_in, head_size) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(MultiHeadAttnBlock(block_in, head_size)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, out_ch, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, z, hs): + # assert z.shape[1:] == self.z_shape[1:] + # self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + if self.enable_mid: + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h, hs["mid_atten"]) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h, temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block]( + h, hs["block_" + str(i_level) + "_atten"] + ) + # hfeature = h.clone() + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class RestoreFormer(nn.Module): + def __init__( + self, + state_dict, + ): + super(RestoreFormer, self).__init__() + + n_embed = 1024 + embed_dim = 256 + ch = 64 + out_ch = 3 + ch_mult = (1, 2, 2, 4, 4, 8) + num_res_blocks = 2 + attn_resolutions = (16,) + dropout = 0.0 + in_channels = 3 + resolution = 512 + z_channels = 256 + double_z = False + enable_mid = True + fix_decoder = False + fix_codebook = True + fix_encoder = False + head_size = 8 + + self.model_arch = "RestoreFormer" + self.sub_type = "Face SR" + self.scale = 8 + self.in_nc = 3 + self.out_nc = out_ch + self.state = state_dict + + self.supports_fp16 = False + self.supports_bf16 = True + self.min_size_restriction = 16 + + self.encoder = MultiHeadEncoder( + ch=ch, + out_ch=out_ch, + ch_mult=ch_mult, + num_res_blocks=num_res_blocks, + attn_resolutions=attn_resolutions, + dropout=dropout, + in_channels=in_channels, + resolution=resolution, + z_channels=z_channels, + double_z=double_z, + enable_mid=enable_mid, + head_size=head_size, + ) + self.decoder = MultiHeadDecoderTransformer( + ch=ch, + out_ch=out_ch, + ch_mult=ch_mult, + num_res_blocks=num_res_blocks, + attn_resolutions=attn_resolutions, + dropout=dropout, + in_channels=in_channels, + resolution=resolution, + z_channels=z_channels, + enable_mid=enable_mid, + head_size=head_size, + ) + + self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25) + + self.quant_conv = torch.nn.Conv2d(z_channels, embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, z_channels, 1) + + if fix_decoder: + for _, param in self.decoder.named_parameters(): + param.requires_grad = False + for _, param in self.post_quant_conv.named_parameters(): + param.requires_grad = False + for _, param in self.quantize.named_parameters(): + param.requires_grad = False + elif fix_codebook: + for _, param in self.quantize.named_parameters(): + param.requires_grad = False + + if fix_encoder: + for _, param in self.encoder.named_parameters(): + param.requires_grad = False + + self.load_state_dict(state_dict) + + def encode(self, x): + hs = self.encoder(x) + h = self.quant_conv(hs["out"]) + quant, emb_loss, info = self.quantize(h) + return quant, emb_loss, info, hs + + def decode(self, quant, hs): + quant = self.post_quant_conv(quant) + dec = self.decoder(quant, hs) + + return dec + + def forward(self, input, **kwargs): + quant, diff, info, hs = self.encode(input) + dec = self.decode(quant, hs) + + return dec, None diff --git a/ldm_patched/pfn/architecture/face/stylegan2_arch.py b/ldm_patched/pfn/architecture/face/stylegan2_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..1eb0e9f15f706e2b9759bde4d0244d424c3ae76f --- /dev/null +++ b/ldm_patched/pfn/architecture/face/stylegan2_arch.py @@ -0,0 +1,865 @@ +# pylint: skip-file +# type: ignore +import math +import random + +import torch +from torch import nn +from torch.nn import functional as F + +from .fused_act import FusedLeakyReLU, fused_leaky_relu +from .upfirdn2d import upfirdn2d + + +class NormStyleCode(nn.Module): + def forward(self, x): + """Normalize the style codes. + + Args: + x (Tensor): Style codes with shape (b, c). + + Returns: + Tensor: Normalized tensor. + """ + return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8) + + +def make_resample_kernel(k): + """Make resampling kernel for UpFirDn. + + Args: + k (list[int]): A list indicating the 1D resample kernel magnitude. + + Returns: + Tensor: 2D resampled kernel. + """ + k = torch.tensor(k, dtype=torch.float32) + if k.ndim == 1: + k = k[None, :] * k[:, None] # to 2D kernel, outer product + # normalize + k /= k.sum() + return k + + +class UpFirDnUpsample(nn.Module): + """Upsample, FIR filter, and downsample (upsampole version). + + References: + 1. https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.upfirdn.html # noqa: E501 + 2. http://www.ece.northwestern.edu/local-apps/matlabhelp/toolbox/signal/upfirdn.html # noqa: E501 + + Args: + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. + factor (int): Upsampling scale factor. Default: 2. + """ + + def __init__(self, resample_kernel, factor=2): + super(UpFirDnUpsample, self).__init__() + self.kernel = make_resample_kernel(resample_kernel) * (factor**2) + self.factor = factor + + pad = self.kernel.shape[0] - factor + self.pad = ((pad + 1) // 2 + factor - 1, pad // 2) + + def forward(self, x): + out = upfirdn2d(x, self.kernel.type_as(x), up=self.factor, down=1, pad=self.pad) + return out + + def __repr__(self): + return f"{self.__class__.__name__}(factor={self.factor})" + + +class UpFirDnDownsample(nn.Module): + """Upsample, FIR filter, and downsample (downsampole version). + + Args: + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. + factor (int): Downsampling scale factor. Default: 2. + """ + + def __init__(self, resample_kernel, factor=2): + super(UpFirDnDownsample, self).__init__() + self.kernel = make_resample_kernel(resample_kernel) + self.factor = factor + + pad = self.kernel.shape[0] - factor + self.pad = ((pad + 1) // 2, pad // 2) + + def forward(self, x): + out = upfirdn2d(x, self.kernel.type_as(x), up=1, down=self.factor, pad=self.pad) + return out + + def __repr__(self): + return f"{self.__class__.__name__}(factor={self.factor})" + + +class UpFirDnSmooth(nn.Module): + """Upsample, FIR filter, and downsample (smooth version). + + Args: + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. + upsample_factor (int): Upsampling scale factor. Default: 1. + downsample_factor (int): Downsampling scale factor. Default: 1. + kernel_size (int): Kernel size: Default: 1. + """ + + def __init__( + self, resample_kernel, upsample_factor=1, downsample_factor=1, kernel_size=1 + ): + super(UpFirDnSmooth, self).__init__() + self.upsample_factor = upsample_factor + self.downsample_factor = downsample_factor + self.kernel = make_resample_kernel(resample_kernel) + if upsample_factor > 1: + self.kernel = self.kernel * (upsample_factor**2) + + if upsample_factor > 1: + pad = (self.kernel.shape[0] - upsample_factor) - (kernel_size - 1) + self.pad = ((pad + 1) // 2 + upsample_factor - 1, pad // 2 + 1) + elif downsample_factor > 1: + pad = (self.kernel.shape[0] - downsample_factor) + (kernel_size - 1) + self.pad = ((pad + 1) // 2, pad // 2) + else: + raise NotImplementedError + + def forward(self, x): + out = upfirdn2d(x, self.kernel.type_as(x), up=1, down=1, pad=self.pad) + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(upsample_factor={self.upsample_factor}" + f", downsample_factor={self.downsample_factor})" + ) + + +class EqualLinear(nn.Module): + """Equalized Linear as StyleGAN2. + + Args: + in_channels (int): Size of each sample. + out_channels (int): Size of each output sample. + bias (bool): If set to ``False``, the layer will not learn an additive + bias. Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + lr_mul (float): Learning rate multiplier. Default: 1. + activation (None | str): The activation after ``linear`` operation. + Supported: 'fused_lrelu', None. Default: None. + """ + + def __init__( + self, + in_channels, + out_channels, + bias=True, + bias_init_val=0, + lr_mul=1, + activation=None, + ): + super(EqualLinear, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.lr_mul = lr_mul + self.activation = activation + if self.activation not in ["fused_lrelu", None]: + raise ValueError( + f"Wrong activation value in EqualLinear: {activation}" + "Supported ones are: ['fused_lrelu', None]." + ) + self.scale = (1 / math.sqrt(in_channels)) * lr_mul + + self.weight = nn.Parameter(torch.randn(out_channels, in_channels).div_(lr_mul)) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter("bias", None) + + def forward(self, x): + if self.bias is None: + bias = None + else: + bias = self.bias * self.lr_mul + if self.activation == "fused_lrelu": + out = F.linear(x, self.weight * self.scale) + out = fused_leaky_relu(out, bias) + else: + out = F.linear(x, self.weight * self.scale, bias=bias) + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, bias={self.bias is not None})" + ) + + +class ModulatedConv2d(nn.Module): + """Modulated Conv2d used in StyleGAN2. + + There is no bias in ModulatedConv2d. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether to demodulate in the conv layer. + Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. Default: (1, 3, 3, 1). + eps (float): A value added to the denominator for numerical stability. + Default: 1e-8. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + resample_kernel=(1, 3, 3, 1), + eps=1e-8, + ): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + + if self.sample_mode == "upsample": + self.smooth = UpFirDnSmooth( + resample_kernel, + upsample_factor=2, + downsample_factor=1, + kernel_size=kernel_size, + ) + elif self.sample_mode == "downsample": + self.smooth = UpFirDnSmooth( + resample_kernel, + upsample_factor=1, + downsample_factor=2, + kernel_size=kernel_size, + ) + elif self.sample_mode is None: + pass + else: + raise ValueError( + f"Wrong sample mode {self.sample_mode}, " + "supported ones are ['upsample', 'downsample', None]." + ) + + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + # modulation inside each modulated conv + self.modulation = EqualLinear( + num_style_feat, + in_channels, + bias=True, + bias_init_val=1, + lr_mul=1, + activation=None, + ) + + self.weight = nn.Parameter( + torch.randn(1, out_channels, in_channels, kernel_size, kernel_size) + ) + self.padding = kernel_size // 2 + + def forward(self, x, style): + """Forward function. + + Args: + x (Tensor): Tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + + Returns: + Tensor: Modulated tensor after convolution. + """ + b, c, h, w = x.shape # c = c_in + # weight modulation + style = self.modulation(style).view(b, 1, c, 1, 1) + # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1) + weight = self.scale * self.weight * style # (b, c_out, c_in, k, k) + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view( + b * self.out_channels, c, self.kernel_size, self.kernel_size + ) + + if self.sample_mode == "upsample": + x = x.view(1, b * c, h, w) + weight = weight.view( + b, self.out_channels, c, self.kernel_size, self.kernel_size + ) + weight = weight.transpose(1, 2).reshape( + b * c, self.out_channels, self.kernel_size, self.kernel_size + ) + out = F.conv_transpose2d(x, weight, padding=0, stride=2, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + out = self.smooth(out) + elif self.sample_mode == "downsample": + x = self.smooth(x) + x = x.view(1, b * c, *x.shape[2:4]) + out = F.conv2d(x, weight, padding=0, stride=2, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + else: + x = x.view(1, b * c, h, w) + # weight: (b*c_out, c_in, k, k), groups=b + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, " + f"kernel_size={self.kernel_size}, " + f"demodulate={self.demodulate}, sample_mode={self.sample_mode})" + ) + + +class StyleConv(nn.Module): + """Style conv. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. Default: (1, 3, 3, 1). + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + resample_kernel=(1, 3, 3, 1), + ): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=demodulate, + sample_mode=sample_mode, + resample_kernel=resample_kernel, + ) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.activate = FusedLeakyReLU(out_channels) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # activation (with bias) + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + """To RGB from features. + + Args: + in_channels (int): Channel number of input. + num_style_feat (int): Channel number of style features. + upsample (bool): Whether to upsample. Default: True. + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. Default: (1, 3, 3, 1). + """ + + def __init__( + self, in_channels, num_style_feat, upsample=True, resample_kernel=(1, 3, 3, 1) + ): + super(ToRGB, self).__init__() + if upsample: + self.upsample = UpFirDnUpsample(resample_kernel, factor=2) + else: + self.upsample = None + self.modulated_conv = ModulatedConv2d( + in_channels, + 3, + kernel_size=1, + num_style_feat=num_style_feat, + demodulate=False, + sample_mode=None, + ) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + """Forward function. + + Args: + x (Tensor): Feature tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + skip (Tensor): Base/skip tensor. Default: None. + + Returns: + Tensor: RGB images. + """ + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = self.upsample(skip) + out = out + skip + return out + + +class ConstantInput(nn.Module): + """Constant input. + + Args: + num_channel (int): Channel number of constant input. + size (int): Spatial size of constant input. + """ + + def __init__(self, num_channel, size): + super(ConstantInput, self).__init__() + self.weight = nn.Parameter(torch.randn(1, num_channel, size, size)) + + def forward(self, batch): + out = self.weight.repeat(batch, 1, 1, 1) + return out + + +class StyleGAN2Generator(nn.Module): + """StyleGAN2 Generator. + + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of + StyleGAN2. Default: 2. + resample_kernel (list[int]): A list indicating the 1D resample kernel + magnitude. A cross production will be applied to extent 1D resample + kernel to 2D resample kernel. Default: (1, 3, 3, 1). + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): Narrow ratio for channels. Default: 1.0. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + resample_kernel=(1, 3, 3, 1), + lr_mlp=0.01, + narrow=1, + ): + super(StyleGAN2Generator, self).__init__() + # Style MLP layers + self.num_style_feat = num_style_feat + style_mlp_layers = [NormStyleCode()] + for i in range(num_mlp): + style_mlp_layers.append( + EqualLinear( + num_style_feat, + num_style_feat, + bias=True, + bias_init_val=0, + lr_mul=lr_mlp, + activation="fused_lrelu", + ) + ) + self.style_mlp = nn.Sequential(*style_mlp_layers) + + channels = { + "4": int(512 * narrow), + "8": int(512 * narrow), + "16": int(512 * narrow), + "32": int(512 * narrow), + "64": int(256 * channel_multiplier * narrow), + "128": int(128 * channel_multiplier * narrow), + "256": int(64 * channel_multiplier * narrow), + "512": int(32 * channel_multiplier * narrow), + "1024": int(16 * channel_multiplier * narrow), + } + self.channels = channels + + self.constant_input = ConstantInput(channels["4"], size=4) + self.style_conv1 = StyleConv( + channels["4"], + channels["4"], + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + resample_kernel=resample_kernel, + ) + self.to_rgb1 = ToRGB( + channels["4"], + num_style_feat, + upsample=False, + resample_kernel=resample_kernel, + ) + + self.log_size = int(math.log(out_size, 2)) + self.num_layers = (self.log_size - 2) * 2 + 1 + self.num_latent = self.log_size * 2 - 2 + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + in_channels = channels["4"] + # noise + for layer_idx in range(self.num_layers): + resolution = 2 ** ((layer_idx + 5) // 2) + shape = [1, 1, resolution, resolution] + self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape)) + # style convs and to_rgbs + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode="upsample", + resample_kernel=resample_kernel, + ) + ) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + resample_kernel=resample_kernel, + ) + ) + self.to_rgbs.append( + ToRGB( + out_channels, + num_style_feat, + upsample=True, + resample_kernel=resample_kernel, + ) + ) + in_channels = out_channels + + def make_noise(self): + """Make noise for noise injection.""" + device = self.constant_input.weight.device + noises = [torch.randn(1, 1, 4, 4, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2**i, 2**i, device=device)) + + return noises + + def get_latent(self, x): + return self.style_mlp(x) + + def mean_latent(self, num_latent): + latent_in = torch.randn( + num_latent, self.num_style_feat, device=self.constant_input.weight.device + ) + latent = self.style_mlp(latent_in).mean(0, keepdim=True) + return latent + + def forward( + self, + styles, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2Generator. + + Args: + styles (list[Tensor]): Sample codes of styles. + input_is_latent (bool): Whether input is latent style. + Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is + False. Default: True. + truncation (float): TODO. Default: 1. + truncation_latent (Tensor | None): TODO. Default: None. + inject_index (int | None): The injection index for mixing noise. + Default: None. + return_latents (bool): Whether to return style latents. + Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latent with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ScaledLeakyReLU(nn.Module): + """Scaled LeakyReLU. + + Args: + negative_slope (float): Negative slope. Default: 0.2. + """ + + def __init__(self, negative_slope=0.2): + super(ScaledLeakyReLU, self).__init__() + self.negative_slope = negative_slope + + def forward(self, x): + out = F.leaky_relu(x, negative_slope=self.negative_slope) + return out * math.sqrt(2) + + +class EqualConv2d(nn.Module): + """Equalized Linear as StyleGAN2. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + stride (int): Stride of the convolution. Default: 1 + padding (int): Zero-padding added to both sides of the input. + Default: 0. + bias (bool): If ``True``, adds a learnable bias to the output. + Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + ): + super(EqualConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.weight = nn.Parameter( + torch.randn(out_channels, in_channels, kernel_size, kernel_size) + ) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter("bias", None) + + def forward(self, x): + out = F.conv2d( + x, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, " + f"kernel_size={self.kernel_size}," + f" stride={self.stride}, padding={self.padding}, " + f"bias={self.bias is not None})" + ) + + +class ConvLayer(nn.Sequential): + """Conv Layer used in StyleGAN2 Discriminator. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Kernel size. + downsample (bool): Whether downsample by a factor of 2. + Default: False. + resample_kernel (list[int]): A list indicating the 1D resample + kernel magnitude. A cross production will be applied to + extent 1D resample kernel to 2D resample kernel. + Default: (1, 3, 3, 1). + bias (bool): Whether with bias. Default: True. + activate (bool): Whether use activateion. Default: True. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + downsample=False, + resample_kernel=(1, 3, 3, 1), + bias=True, + activate=True, + ): + layers = [] + # downsample + if downsample: + layers.append( + UpFirDnSmooth( + resample_kernel, + upsample_factor=1, + downsample_factor=2, + kernel_size=kernel_size, + ) + ) + stride = 2 + self.padding = 0 + else: + stride = 1 + self.padding = kernel_size // 2 + # conv + layers.append( + EqualConv2d( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=self.padding, + bias=bias and not activate, + ) + ) + # activation + if activate: + if bias: + layers.append(FusedLeakyReLU(out_channels)) + else: + layers.append(ScaledLeakyReLU(0.2)) + + super(ConvLayer, self).__init__(*layers) + + +class ResBlock(nn.Module): + """Residual block used in StyleGAN2 Discriminator. + + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + resample_kernel (list[int]): A list indicating the 1D resample + kernel magnitude. A cross production will be applied to + extent 1D resample kernel to 2D resample kernel. + Default: (1, 3, 3, 1). + """ + + def __init__(self, in_channels, out_channels, resample_kernel=(1, 3, 3, 1)): + super(ResBlock, self).__init__() + + self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True) + self.conv2 = ConvLayer( + in_channels, + out_channels, + 3, + downsample=True, + resample_kernel=resample_kernel, + bias=True, + activate=True, + ) + self.skip = ConvLayer( + in_channels, + out_channels, + 1, + downsample=True, + resample_kernel=resample_kernel, + bias=False, + activate=False, + ) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2(out) + skip = self.skip(x) + out = (out + skip) / math.sqrt(2) + return out diff --git a/ldm_patched/pfn/architecture/face/stylegan2_bilinear_arch.py b/ldm_patched/pfn/architecture/face/stylegan2_bilinear_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..601f8cc4b33bdbb371d710a2bb0656e8ce102e26 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/stylegan2_bilinear_arch.py @@ -0,0 +1,709 @@ +# pylint: skip-file +# type: ignore +import math +import random + +import torch +from torch import nn +from torch.nn import functional as F + +from .fused_act import FusedLeakyReLU, fused_leaky_relu + + +class NormStyleCode(nn.Module): + def forward(self, x): + """Normalize the style codes. + Args: + x (Tensor): Style codes with shape (b, c). + Returns: + Tensor: Normalized tensor. + """ + return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8) + + +class EqualLinear(nn.Module): + """Equalized Linear as StyleGAN2. + Args: + in_channels (int): Size of each sample. + out_channels (int): Size of each output sample. + bias (bool): If set to ``False``, the layer will not learn an additive + bias. Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + lr_mul (float): Learning rate multiplier. Default: 1. + activation (None | str): The activation after ``linear`` operation. + Supported: 'fused_lrelu', None. Default: None. + """ + + def __init__( + self, + in_channels, + out_channels, + bias=True, + bias_init_val=0, + lr_mul=1, + activation=None, + ): + super(EqualLinear, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.lr_mul = lr_mul + self.activation = activation + if self.activation not in ["fused_lrelu", None]: + raise ValueError( + f"Wrong activation value in EqualLinear: {activation}" + "Supported ones are: ['fused_lrelu', None]." + ) + self.scale = (1 / math.sqrt(in_channels)) * lr_mul + + self.weight = nn.Parameter(torch.randn(out_channels, in_channels).div_(lr_mul)) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter("bias", None) + + def forward(self, x): + if self.bias is None: + bias = None + else: + bias = self.bias * self.lr_mul + if self.activation == "fused_lrelu": + out = F.linear(x, self.weight * self.scale) + out = fused_leaky_relu(out, bias) + else: + out = F.linear(x, self.weight * self.scale, bias=bias) + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, bias={self.bias is not None})" + ) + + +class ModulatedConv2d(nn.Module): + """Modulated Conv2d used in StyleGAN2. + There is no bias in ModulatedConv2d. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether to demodulate in the conv layer. + Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + eps (float): A value added to the denominator for numerical stability. + Default: 1e-8. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + eps=1e-8, + interpolation_mode="bilinear", + ): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + self.interpolation_mode = interpolation_mode + if self.interpolation_mode == "nearest": + self.align_corners = None + else: + self.align_corners = False + + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + # modulation inside each modulated conv + self.modulation = EqualLinear( + num_style_feat, + in_channels, + bias=True, + bias_init_val=1, + lr_mul=1, + activation=None, + ) + + self.weight = nn.Parameter( + torch.randn(1, out_channels, in_channels, kernel_size, kernel_size) + ) + self.padding = kernel_size // 2 + + def forward(self, x, style): + """Forward function. + Args: + x (Tensor): Tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + Returns: + Tensor: Modulated tensor after convolution. + """ + b, c, h, w = x.shape # c = c_in + # weight modulation + style = self.modulation(style).view(b, 1, c, 1, 1) + # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1) + weight = self.scale * self.weight * style # (b, c_out, c_in, k, k) + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view( + b * self.out_channels, c, self.kernel_size, self.kernel_size + ) + + if self.sample_mode == "upsample": + x = F.interpolate( + x, + scale_factor=2, + mode=self.interpolation_mode, + align_corners=self.align_corners, + ) + elif self.sample_mode == "downsample": + x = F.interpolate( + x, + scale_factor=0.5, + mode=self.interpolation_mode, + align_corners=self.align_corners, + ) + + b, c, h, w = x.shape + x = x.view(1, b * c, h, w) + # weight: (b*c_out, c_in, k, k), groups=b + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, " + f"kernel_size={self.kernel_size}, " + f"demodulate={self.demodulate}, sample_mode={self.sample_mode})" + ) + + +class StyleConv(nn.Module): + """Style conv. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. + Default: None. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode="bilinear", + ): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=demodulate, + sample_mode=sample_mode, + interpolation_mode=interpolation_mode, + ) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.activate = FusedLeakyReLU(out_channels) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # activation (with bias) + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + """To RGB from features. + Args: + in_channels (int): Channel number of input. + num_style_feat (int): Channel number of style features. + upsample (bool): Whether to upsample. Default: True. + """ + + def __init__( + self, in_channels, num_style_feat, upsample=True, interpolation_mode="bilinear" + ): + super(ToRGB, self).__init__() + self.upsample = upsample + self.interpolation_mode = interpolation_mode + if self.interpolation_mode == "nearest": + self.align_corners = None + else: + self.align_corners = False + self.modulated_conv = ModulatedConv2d( + in_channels, + 3, + kernel_size=1, + num_style_feat=num_style_feat, + demodulate=False, + sample_mode=None, + interpolation_mode=interpolation_mode, + ) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + """Forward function. + Args: + x (Tensor): Feature tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + skip (Tensor): Base/skip tensor. Default: None. + Returns: + Tensor: RGB images. + """ + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = F.interpolate( + skip, + scale_factor=2, + mode=self.interpolation_mode, + align_corners=self.align_corners, + ) + out = out + skip + return out + + +class ConstantInput(nn.Module): + """Constant input. + Args: + num_channel (int): Channel number of constant input. + size (int): Spatial size of constant input. + """ + + def __init__(self, num_channel, size): + super(ConstantInput, self).__init__() + self.weight = nn.Parameter(torch.randn(1, num_channel, size, size)) + + def forward(self, batch): + out = self.weight.repeat(batch, 1, 1, 1) + return out + + +class StyleGAN2GeneratorBilinear(nn.Module): + """StyleGAN2 Generator. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of + StyleGAN2. Default: 2. + lr_mlp (float): Learning rate multiplier for mlp layers. Default: 0.01. + narrow (float): Narrow ratio for channels. Default: 1.0. + """ + + def __init__( + self, + out_size, + num_style_feat=512, + num_mlp=8, + channel_multiplier=2, + lr_mlp=0.01, + narrow=1, + interpolation_mode="bilinear", + ): + super(StyleGAN2GeneratorBilinear, self).__init__() + # Style MLP layers + self.num_style_feat = num_style_feat + style_mlp_layers = [NormStyleCode()] + for i in range(num_mlp): + style_mlp_layers.append( + EqualLinear( + num_style_feat, + num_style_feat, + bias=True, + bias_init_val=0, + lr_mul=lr_mlp, + activation="fused_lrelu", + ) + ) + self.style_mlp = nn.Sequential(*style_mlp_layers) + + channels = { + "4": int(512 * narrow), + "8": int(512 * narrow), + "16": int(512 * narrow), + "32": int(512 * narrow), + "64": int(256 * channel_multiplier * narrow), + "128": int(128 * channel_multiplier * narrow), + "256": int(64 * channel_multiplier * narrow), + "512": int(32 * channel_multiplier * narrow), + "1024": int(16 * channel_multiplier * narrow), + } + self.channels = channels + + self.constant_input = ConstantInput(channels["4"], size=4) + self.style_conv1 = StyleConv( + channels["4"], + channels["4"], + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode=interpolation_mode, + ) + self.to_rgb1 = ToRGB( + channels["4"], + num_style_feat, + upsample=False, + interpolation_mode=interpolation_mode, + ) + + self.log_size = int(math.log(out_size, 2)) + self.num_layers = (self.log_size - 2) * 2 + 1 + self.num_latent = self.log_size * 2 - 2 + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + in_channels = channels["4"] + # noise + for layer_idx in range(self.num_layers): + resolution = 2 ** ((layer_idx + 5) // 2) + shape = [1, 1, resolution, resolution] + self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape)) + # style convs and to_rgbs + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode="upsample", + interpolation_mode=interpolation_mode, + ) + ) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + interpolation_mode=interpolation_mode, + ) + ) + self.to_rgbs.append( + ToRGB( + out_channels, + num_style_feat, + upsample=True, + interpolation_mode=interpolation_mode, + ) + ) + in_channels = out_channels + + def make_noise(self): + """Make noise for noise injection.""" + device = self.constant_input.weight.device + noises = [torch.randn(1, 1, 4, 4, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2**i, 2**i, device=device)) + + return noises + + def get_latent(self, x): + return self.style_mlp(x) + + def mean_latent(self, num_latent): + latent_in = torch.randn( + num_latent, self.num_style_feat, device=self.constant_input.weight.device + ) + latent = self.style_mlp(latent_in).mean(0, keepdim=True) + return latent + + def forward( + self, + styles, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2Generator. + Args: + styles (list[Tensor]): Sample codes of styles. + input_is_latent (bool): Whether input is latent style. + Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is + False. Default: True. + truncation (float): TODO. Default: 1. + truncation_latent (Tensor | None): TODO. Default: None. + inject_index (int | None): The injection index for mixing noise. + Default: None. + return_latents (bool): Whether to return style latents. + Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latent with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None + + +class ScaledLeakyReLU(nn.Module): + """Scaled LeakyReLU. + Args: + negative_slope (float): Negative slope. Default: 0.2. + """ + + def __init__(self, negative_slope=0.2): + super(ScaledLeakyReLU, self).__init__() + self.negative_slope = negative_slope + + def forward(self, x): + out = F.leaky_relu(x, negative_slope=self.negative_slope) + return out * math.sqrt(2) + + +class EqualConv2d(nn.Module): + """Equalized Linear as StyleGAN2. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + stride (int): Stride of the convolution. Default: 1 + padding (int): Zero-padding added to both sides of the input. + Default: 0. + bias (bool): If ``True``, adds a learnable bias to the output. + Default: ``True``. + bias_init_val (float): Bias initialized value. Default: 0. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + bias=True, + bias_init_val=0, + ): + super(EqualConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.weight = nn.Parameter( + torch.randn(out_channels, in_channels, kernel_size, kernel_size) + ) + if bias: + self.bias = nn.Parameter(torch.zeros(out_channels).fill_(bias_init_val)) + else: + self.register_parameter("bias", None) + + def forward(self, x): + out = F.conv2d( + x, + self.weight * self.scale, + bias=self.bias, + stride=self.stride, + padding=self.padding, + ) + + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, " + f"out_channels={self.out_channels}, " + f"kernel_size={self.kernel_size}," + f" stride={self.stride}, padding={self.padding}, " + f"bias={self.bias is not None})" + ) + + +class ConvLayer(nn.Sequential): + """Conv Layer used in StyleGAN2 Discriminator. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Kernel size. + downsample (bool): Whether downsample by a factor of 2. + Default: False. + bias (bool): Whether with bias. Default: True. + activate (bool): Whether use activateion. Default: True. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + downsample=False, + bias=True, + activate=True, + interpolation_mode="bilinear", + ): + layers = [] + self.interpolation_mode = interpolation_mode + # downsample + if downsample: + if self.interpolation_mode == "nearest": + self.align_corners = None + else: + self.align_corners = False + + layers.append( + torch.nn.Upsample( + scale_factor=0.5, + mode=interpolation_mode, + align_corners=self.align_corners, + ) + ) + stride = 1 + self.padding = kernel_size // 2 + # conv + layers.append( + EqualConv2d( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=self.padding, + bias=bias and not activate, + ) + ) + # activation + if activate: + if bias: + layers.append(FusedLeakyReLU(out_channels)) + else: + layers.append(ScaledLeakyReLU(0.2)) + + super(ConvLayer, self).__init__(*layers) + + +class ResBlock(nn.Module): + """Residual block used in StyleGAN2 Discriminator. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + """ + + def __init__(self, in_channels, out_channels, interpolation_mode="bilinear"): + super(ResBlock, self).__init__() + + self.conv1 = ConvLayer(in_channels, in_channels, 3, bias=True, activate=True) + self.conv2 = ConvLayer( + in_channels, + out_channels, + 3, + downsample=True, + interpolation_mode=interpolation_mode, + bias=True, + activate=True, + ) + self.skip = ConvLayer( + in_channels, + out_channels, + 1, + downsample=True, + interpolation_mode=interpolation_mode, + bias=False, + activate=False, + ) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2(out) + skip = self.skip(x) + out = (out + skip) / math.sqrt(2) + return out diff --git a/ldm_patched/pfn/architecture/face/stylegan2_clean_arch.py b/ldm_patched/pfn/architecture/face/stylegan2_clean_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..c48de9af6904b8d1891a84efa8e4d76104d5d710 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/stylegan2_clean_arch.py @@ -0,0 +1,453 @@ +# pylint: skip-file +# type: ignore +import math + +import torch +from torch import nn +from torch.nn import functional as F +from torch.nn import init +from torch.nn.modules.batchnorm import _BatchNorm + + +@torch.no_grad() +def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs): + """Initialize network weights. + Args: + module_list (list[nn.Module] | nn.Module): Modules to be initialized. + scale (float): Scale initialized weights, especially for residual + blocks. Default: 1. + bias_fill (float): The value to fill bias. Default: 0 + kwargs (dict): Other arguments for initialization function. + """ + if not isinstance(module_list, list): + module_list = [module_list] + for module in module_list: + for m in module.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal_(m.weight, **kwargs) + m.weight.data *= scale + if m.bias is not None: + m.bias.data.fill_(bias_fill) + elif isinstance(m, nn.Linear): + init.kaiming_normal_(m.weight, **kwargs) + m.weight.data *= scale + if m.bias is not None: + m.bias.data.fill_(bias_fill) + elif isinstance(m, _BatchNorm): + init.constant_(m.weight, 1) + if m.bias is not None: + m.bias.data.fill_(bias_fill) + + +class NormStyleCode(nn.Module): + def forward(self, x): + """Normalize the style codes. + Args: + x (Tensor): Style codes with shape (b, c). + Returns: + Tensor: Normalized tensor. + """ + return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8) + + +class ModulatedConv2d(nn.Module): + """Modulated Conv2d used in StyleGAN2. + There is no bias in ModulatedConv2d. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether to demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None. + eps (float): A value added to the denominator for numerical stability. Default: 1e-8. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + eps=1e-8, + ): + super(ModulatedConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.demodulate = demodulate + self.sample_mode = sample_mode + self.eps = eps + + # modulation inside each modulated conv + self.modulation = nn.Linear(num_style_feat, in_channels, bias=True) + # initialization + default_init_weights( + self.modulation, + scale=1, + bias_fill=1, + a=0, + mode="fan_in", + nonlinearity="linear", + ) + + self.weight = nn.Parameter( + torch.randn(1, out_channels, in_channels, kernel_size, kernel_size) + / math.sqrt(in_channels * kernel_size**2) + ) + self.padding = kernel_size // 2 + + def forward(self, x, style): + """Forward function. + Args: + x (Tensor): Tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + Returns: + Tensor: Modulated tensor after convolution. + """ + b, c, h, w = x.shape # c = c_in + # weight modulation + style = self.modulation(style).view(b, 1, c, 1, 1) + # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1) + weight = self.weight * style # (b, c_out, c_in, k, k) + + if self.demodulate: + demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps) + weight = weight * demod.view(b, self.out_channels, 1, 1, 1) + + weight = weight.view( + b * self.out_channels, c, self.kernel_size, self.kernel_size + ) + + # upsample or downsample if necessary + if self.sample_mode == "upsample": + x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False) + elif self.sample_mode == "downsample": + x = F.interpolate(x, scale_factor=0.5, mode="bilinear", align_corners=False) + + b, c, h, w = x.shape + x = x.view(1, b * c, h, w) + # weight: (b*c_out, c_in, k, k), groups=b + out = F.conv2d(x, weight, padding=self.padding, groups=b) + out = out.view(b, self.out_channels, *out.shape[2:4]) + + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, " + f"kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})" + ) + + +class StyleConv(nn.Module): + """Style conv used in StyleGAN2. + Args: + in_channels (int): Channel number of the input. + out_channels (int): Channel number of the output. + kernel_size (int): Size of the convolving kernel. + num_style_feat (int): Channel number of style features. + demodulate (bool): Whether demodulate in the conv layer. Default: True. + sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None. + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=True, + sample_mode=None, + ): + super(StyleConv, self).__init__() + self.modulated_conv = ModulatedConv2d( + in_channels, + out_channels, + kernel_size, + num_style_feat, + demodulate=demodulate, + sample_mode=sample_mode, + ) + self.weight = nn.Parameter(torch.zeros(1)) # for noise injection + self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1)) + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + def forward(self, x, style, noise=None): + # modulate + out = self.modulated_conv(x, style) * 2**0.5 # for conversion + # noise injection + if noise is None: + b, _, h, w = out.shape + noise = out.new_empty(b, 1, h, w).normal_() + out = out + self.weight * noise + # add bias + out = out + self.bias + # activation + out = self.activate(out) + return out + + +class ToRGB(nn.Module): + """To RGB (image space) from features. + Args: + in_channels (int): Channel number of input. + num_style_feat (int): Channel number of style features. + upsample (bool): Whether to upsample. Default: True. + """ + + def __init__(self, in_channels, num_style_feat, upsample=True): + super(ToRGB, self).__init__() + self.upsample = upsample + self.modulated_conv = ModulatedConv2d( + in_channels, + 3, + kernel_size=1, + num_style_feat=num_style_feat, + demodulate=False, + sample_mode=None, + ) + self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1)) + + def forward(self, x, style, skip=None): + """Forward function. + Args: + x (Tensor): Feature tensor with shape (b, c, h, w). + style (Tensor): Tensor with shape (b, num_style_feat). + skip (Tensor): Base/skip tensor. Default: None. + Returns: + Tensor: RGB images. + """ + out = self.modulated_conv(x, style) + out = out + self.bias + if skip is not None: + if self.upsample: + skip = F.interpolate( + skip, scale_factor=2, mode="bilinear", align_corners=False + ) + out = out + skip + return out + + +class ConstantInput(nn.Module): + """Constant input. + Args: + num_channel (int): Channel number of constant input. + size (int): Spatial size of constant input. + """ + + def __init__(self, num_channel, size): + super(ConstantInput, self).__init__() + self.weight = nn.Parameter(torch.randn(1, num_channel, size, size)) + + def forward(self, batch): + out = self.weight.repeat(batch, 1, 1, 1) + return out + + +class StyleGAN2GeneratorClean(nn.Module): + """Clean version of StyleGAN2 Generator. + Args: + out_size (int): The spatial size of outputs. + num_style_feat (int): Channel number of style features. Default: 512. + num_mlp (int): Layer number of MLP style layers. Default: 8. + channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2. + narrow (float): Narrow ratio for channels. Default: 1.0. + """ + + def __init__( + self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1 + ): + super(StyleGAN2GeneratorClean, self).__init__() + # Style MLP layers + self.num_style_feat = num_style_feat + style_mlp_layers = [NormStyleCode()] + for i in range(num_mlp): + style_mlp_layers.extend( + [ + nn.Linear(num_style_feat, num_style_feat, bias=True), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + ] + ) + self.style_mlp = nn.Sequential(*style_mlp_layers) + # initialization + default_init_weights( + self.style_mlp, + scale=1, + bias_fill=0, + a=0.2, + mode="fan_in", + nonlinearity="leaky_relu", + ) + + # channel list + channels = { + "4": int(512 * narrow), + "8": int(512 * narrow), + "16": int(512 * narrow), + "32": int(512 * narrow), + "64": int(256 * channel_multiplier * narrow), + "128": int(128 * channel_multiplier * narrow), + "256": int(64 * channel_multiplier * narrow), + "512": int(32 * channel_multiplier * narrow), + "1024": int(16 * channel_multiplier * narrow), + } + self.channels = channels + + self.constant_input = ConstantInput(channels["4"], size=4) + self.style_conv1 = StyleConv( + channels["4"], + channels["4"], + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + ) + self.to_rgb1 = ToRGB(channels["4"], num_style_feat, upsample=False) + + self.log_size = int(math.log(out_size, 2)) + self.num_layers = (self.log_size - 2) * 2 + 1 + self.num_latent = self.log_size * 2 - 2 + + self.style_convs = nn.ModuleList() + self.to_rgbs = nn.ModuleList() + self.noises = nn.Module() + + in_channels = channels["4"] + # noise + for layer_idx in range(self.num_layers): + resolution = 2 ** ((layer_idx + 5) // 2) + shape = [1, 1, resolution, resolution] + self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape)) + # style convs and to_rgbs + for i in range(3, self.log_size + 1): + out_channels = channels[f"{2**i}"] + self.style_convs.append( + StyleConv( + in_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode="upsample", + ) + ) + self.style_convs.append( + StyleConv( + out_channels, + out_channels, + kernel_size=3, + num_style_feat=num_style_feat, + demodulate=True, + sample_mode=None, + ) + ) + self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True)) + in_channels = out_channels + + def make_noise(self): + """Make noise for noise injection.""" + device = self.constant_input.weight.device + noises = [torch.randn(1, 1, 4, 4, device=device)] + + for i in range(3, self.log_size + 1): + for _ in range(2): + noises.append(torch.randn(1, 1, 2**i, 2**i, device=device)) + + return noises + + def get_latent(self, x): + return self.style_mlp(x) + + def mean_latent(self, num_latent): + latent_in = torch.randn( + num_latent, self.num_style_feat, device=self.constant_input.weight.device + ) + latent = self.style_mlp(latent_in).mean(0, keepdim=True) + return latent + + def forward( + self, + styles, + input_is_latent=False, + noise=None, + randomize_noise=True, + truncation=1, + truncation_latent=None, + inject_index=None, + return_latents=False, + ): + """Forward function for StyleGAN2GeneratorClean. + Args: + styles (list[Tensor]): Sample codes of styles. + input_is_latent (bool): Whether input is latent style. Default: False. + noise (Tensor | None): Input noise or None. Default: None. + randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True. + truncation (float): The truncation ratio. Default: 1. + truncation_latent (Tensor | None): The truncation latent tensor. Default: None. + inject_index (int | None): The injection index for mixing noise. Default: None. + return_latents (bool): Whether to return style latents. Default: False. + """ + # style codes -> latents with Style MLP layer + if not input_is_latent: + styles = [self.style_mlp(s) for s in styles] + # noises + if noise is None: + if randomize_noise: + noise = [None] * self.num_layers # for each style conv layer + else: # use the stored noise + noise = [ + getattr(self.noises, f"noise{i}") for i in range(self.num_layers) + ] + # style truncation + if truncation < 1: + style_truncation = [] + for style in styles: + style_truncation.append( + truncation_latent + truncation * (style - truncation_latent) + ) + styles = style_truncation + # get style latents with injection + if len(styles) == 1: + inject_index = self.num_latent + + if styles[0].ndim < 3: + # repeat latent code for all the layers + latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + else: # used for encoder with different latent code for each layer + latent = styles[0] + elif len(styles) == 2: # mixing noises + if inject_index is None: + inject_index = random.randint(1, self.num_latent - 1) + latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1) + latent2 = ( + styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1) + ) + latent = torch.cat([latent1, latent2], 1) + + # main generation + out = self.constant_input(latent.shape[0]) + out = self.style_conv1(out, latent[:, 0], noise=noise[0]) + skip = self.to_rgb1(out, latent[:, 1]) + + i = 1 + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.style_convs[::2], + self.style_convs[1::2], + noise[1::2], + noise[2::2], + self.to_rgbs, + ): + out = conv1(out, latent[:, i], noise=noise1) + out = conv2(out, latent[:, i + 1], noise=noise2) + skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space + i += 2 + + image = skip + + if return_latents: + return image, latent + else: + return image, None diff --git a/ldm_patched/pfn/architecture/face/upfirdn2d.py b/ldm_patched/pfn/architecture/face/upfirdn2d.py new file mode 100644 index 0000000000000000000000000000000000000000..4ea4541513f27e3c9dddcee864cfeb87efddadb7 --- /dev/null +++ b/ldm_patched/pfn/architecture/face/upfirdn2d.py @@ -0,0 +1,194 @@ +# pylint: skip-file +# type: ignore +# modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 + +import os + +import torch +from torch.autograd import Function +from torch.nn import functional as F + +upfirdn2d_ext = None + + +class UpFirDn2dBackward(Function): + @staticmethod + def forward( + ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size + ): + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_ext.upfirdn2d( + grad_output, + grad_kernel, + down_x, + down_y, + up_x, + up_y, + g_pad_x0, + g_pad_x1, + g_pad_y0, + g_pad_y1, + ) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + (kernel,) = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_ext.upfirdn2d( + gradgrad_input, + kernel, + ctx.up_x, + ctx.up_y, + ctx.down_x, + ctx.down_y, + ctx.pad_x0, + ctx.pad_x1, + ctx.pad_y0, + ctx.pad_y1, + ) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], + # ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view( + ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1] + ) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + _, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_ext.upfirdn2d( + input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1 + ) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, + kernel, + grad_kernel, + ctx.up, + ctx.down, + ctx.pad, + ctx.g_pad, + ctx.in_size, + ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): + if input.device.type == "cpu": + out = upfirdn2d_native( + input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1] + ) + else: + out = UpFirDn2d.apply( + input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1]) + ) + + return out + + +def upfirdn2d_native( + input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1 +): + _, channel, in_h, in_w = input.shape + input = input.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad( + out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)] + ) + out = out[ + :, + max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), + :, + ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape( + [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1] + ) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) diff --git a/ldm_patched/pfn/architecture/timm/LICENSE b/ldm_patched/pfn/architecture/timm/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b4e9438bd1e07e17abf58cfd86e536ec880348a3 --- /dev/null +++ b/ldm_patched/pfn/architecture/timm/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Ross Wightman + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/ldm_patched/pfn/architecture/timm/drop.py b/ldm_patched/pfn/architecture/timm/drop.py new file mode 100644 index 0000000000000000000000000000000000000000..14f0da914b2a198af7e6124cd90bad6adaf8a84e --- /dev/null +++ b/ldm_patched/pfn/architecture/timm/drop.py @@ -0,0 +1,223 @@ +""" DropBlock, DropPath + +PyTorch implementations of DropBlock and DropPath (Stochastic Depth) regularization layers. + +Papers: +DropBlock: A regularization method for convolutional networks (https://arxiv.org/abs/1810.12890) + +Deep Networks with Stochastic Depth (https://arxiv.org/abs/1603.09382) + +Code: +DropBlock impl inspired by two Tensorflow impl that I liked: + - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74 + - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py + +Hacked together by / Copyright 2020 Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def drop_block_2d( + x, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, + batchwise: bool = False, +): + """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf + + DropBlock with an experimental gaussian noise option. This layer has been tested on a few training + runs with success, but needs further validation and possibly optimization for lower runtime impact. + """ + _, C, H, W = x.shape + total_size = W * H + clipped_block_size = min(block_size, min(W, H)) + # seed_drop_rate, the gamma parameter + gamma = ( + gamma_scale + * drop_prob + * total_size + / clipped_block_size**2 + / ((W - block_size + 1) * (H - block_size + 1)) + ) + + # Forces the block to be inside the feature map. + w_i, h_i = torch.meshgrid( + torch.arange(W).to(x.device), torch.arange(H).to(x.device) + ) + valid_block = ( + (w_i >= clipped_block_size // 2) & (w_i < W - (clipped_block_size - 1) // 2) + ) & ((h_i >= clipped_block_size // 2) & (h_i < H - (clipped_block_size - 1) // 2)) + valid_block = torch.reshape(valid_block, (1, 1, H, W)).to(dtype=x.dtype) + + if batchwise: + # one mask for whole batch, quite a bit faster + uniform_noise = torch.rand((1, C, H, W), dtype=x.dtype, device=x.device) + else: + uniform_noise = torch.rand_like(x) + block_mask = ((2 - gamma - valid_block + uniform_noise) >= 1).to(dtype=x.dtype) + block_mask = -F.max_pool2d( + -block_mask, + kernel_size=clipped_block_size, # block_size, + stride=1, + padding=clipped_block_size // 2, + ) + + if with_noise: + normal_noise = ( + torch.randn((1, C, H, W), dtype=x.dtype, device=x.device) + if batchwise + else torch.randn_like(x) + ) + if inplace: + x.mul_(block_mask).add_(normal_noise * (1 - block_mask)) + else: + x = x * block_mask + normal_noise * (1 - block_mask) + else: + normalize_scale = ( + block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-7) + ).to(x.dtype) + if inplace: + x.mul_(block_mask * normalize_scale) + else: + x = x * block_mask * normalize_scale + return x + + +def drop_block_fast_2d( + x: torch.Tensor, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, +): + """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf + + DropBlock with an experimental gaussian noise option. Simplied from above without concern for valid + block mask at edges. + """ + _, _, H, W = x.shape + total_size = W * H + clipped_block_size = min(block_size, min(W, H)) + gamma = ( + gamma_scale + * drop_prob + * total_size + / clipped_block_size**2 + / ((W - block_size + 1) * (H - block_size + 1)) + ) + + block_mask = torch.empty_like(x).bernoulli_(gamma) + block_mask = F.max_pool2d( + block_mask.to(x.dtype), + kernel_size=clipped_block_size, + stride=1, + padding=clipped_block_size // 2, + ) + + if with_noise: + normal_noise = torch.empty_like(x).normal_() + if inplace: + x.mul_(1.0 - block_mask).add_(normal_noise * block_mask) + else: + x = x * (1.0 - block_mask) + normal_noise * block_mask + else: + block_mask = 1 - block_mask + normalize_scale = ( + block_mask.numel() / block_mask.to(dtype=torch.float32).sum().add(1e-6) + ).to(dtype=x.dtype) + if inplace: + x.mul_(block_mask * normalize_scale) + else: + x = x * block_mask * normalize_scale + return x + + +class DropBlock2d(nn.Module): + """DropBlock. See https://arxiv.org/pdf/1810.12890.pdf""" + + def __init__( + self, + drop_prob: float = 0.1, + block_size: int = 7, + gamma_scale: float = 1.0, + with_noise: bool = False, + inplace: bool = False, + batchwise: bool = False, + fast: bool = True, + ): + super(DropBlock2d, self).__init__() + self.drop_prob = drop_prob + self.gamma_scale = gamma_scale + self.block_size = block_size + self.with_noise = with_noise + self.inplace = inplace + self.batchwise = batchwise + self.fast = fast # FIXME finish comparisons of fast vs not + + def forward(self, x): + if not self.training or not self.drop_prob: + return x + if self.fast: + return drop_block_fast_2d( + x, + self.drop_prob, + self.block_size, + self.gamma_scale, + self.with_noise, + self.inplace, + ) + else: + return drop_block_2d( + x, + self.drop_prob, + self.block_size, + self.gamma_scale, + self.with_noise, + self.inplace, + self.batchwise, + ) + + +def drop_path( + x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True +): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) + + def extra_repr(self): + return f"drop_prob={round(self.drop_prob,3):0.3f}" diff --git a/ldm_patched/pfn/architecture/timm/helpers.py b/ldm_patched/pfn/architecture/timm/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..cdafee0709165dd992118e3b09b8d26f70ea8a2a --- /dev/null +++ b/ldm_patched/pfn/architecture/timm/helpers.py @@ -0,0 +1,31 @@ +""" Layer/Module Helpers +Hacked together by / Copyright 2020 Ross Wightman +""" +import collections.abc +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): + min_value = min_value or divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < round_limit * v: + new_v += divisor + return new_v diff --git a/ldm_patched/pfn/architecture/timm/weight_init.py b/ldm_patched/pfn/architecture/timm/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..b0169774657d86c1946008e746f2f4f7e833a44c --- /dev/null +++ b/ldm_patched/pfn/architecture/timm/weight_init.py @@ -0,0 +1,128 @@ +import math +import warnings + +import torch +from torch.nn.init import _calculate_fan_in_and_fan_out + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + # Cut & paste from PyTorch official master until it's in a few official releases - RW + # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " + "The distribution of values may be incorrect.", + stacklevel=2, + ) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + l = norm_cdf((a - mean) / std) + u = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * l - 1, 2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.0)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_( + tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 +) -> torch.Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are + applied while sampling the normal with mean/std applied, therefore a, b args + should be adjusted to match the range of mean, std args. + + Args: + tensor: an n-dimensional `torch.Tensor` + mean: the mean of the normal distribution + std: the standard deviation of the normal distribution + a: the minimum cutoff value + b: the maximum cutoff value + Examples: + >>> w = torch.empty(3, 5) + >>> nn.init.trunc_normal_(w) + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) + + +def trunc_normal_tf_( + tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 +) -> torch.Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the + bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0 + and the result is subsquently scaled and shifted by the mean and std args. + + Args: + tensor: an n-dimensional `torch.Tensor` + mean: the mean of the normal distribution + std: the standard deviation of the normal distribution + a: the minimum cutoff value + b: the maximum cutoff value + Examples: + >>> w = torch.empty(3, 5) + >>> nn.init.trunc_normal_(w) + """ + _no_grad_trunc_normal_(tensor, 0, 1.0, a, b) + with torch.no_grad(): + tensor.mul_(std).add_(mean) + return tensor + + +def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + if mode == "fan_in": + denom = fan_in + elif mode == "fan_out": + denom = fan_out + elif mode == "fan_avg": + denom = (fan_in + fan_out) / 2 + + variance = scale / denom # type: ignore + + if distribution == "truncated_normal": + # constant is stddev of standard normal truncated to (-2, 2) + trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978) + elif distribution == "normal": + tensor.normal_(std=math.sqrt(variance)) + elif distribution == "uniform": + bound = math.sqrt(3 * variance) + # pylint: disable=invalid-unary-operand-type + tensor.uniform_(-bound, bound) + else: + raise ValueError(f"invalid distribution {distribution}") + + +def lecun_normal_(tensor): + variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") diff --git a/ldm_patched/pfn/model_loading.py b/ldm_patched/pfn/model_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..e000871c1bfe66a07dc13b51ad709cb0de092a41 --- /dev/null +++ b/ldm_patched/pfn/model_loading.py @@ -0,0 +1,99 @@ +import logging as logger + +from .architecture.DAT import DAT +from .architecture.face.codeformer import CodeFormer +from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean +from .architecture.face.restoreformer_arch import RestoreFormer +from .architecture.HAT import HAT +from .architecture.LaMa import LaMa +from .architecture.OmniSR.OmniSR import OmniSR +from .architecture.RRDB import RRDBNet as ESRGAN +from .architecture.SCUNet import SCUNet +from .architecture.SPSR import SPSRNet as SPSR +from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 +from .architecture.SwiftSRGAN import Generator as SwiftSRGAN +from .architecture.Swin2SR import Swin2SR +from .architecture.SwinIR import SwinIR +from .types import PyTorchModel + + +class UnsupportedModel(Exception): + pass + + +def load_state_dict(state_dict) -> PyTorchModel: + logger.debug(f"Loading state dict into pytorch model arch") + + state_dict_keys = list(state_dict.keys()) + + if "params_ema" in state_dict_keys: + state_dict = state_dict["params_ema"] + elif "params-ema" in state_dict_keys: + state_dict = state_dict["params-ema"] + elif "params" in state_dict_keys: + state_dict = state_dict["params"] + + state_dict_keys = list(state_dict.keys()) + # SRVGGNet Real-ESRGAN (v2) + if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: + model = RealESRGANv2(state_dict) + # SPSR (ESRGAN with lots of extra layers) + elif "f_HR_conv1.0.weight" in state_dict: + model = SPSR(state_dict) + # Swift-SRGAN + elif ( + "model" in state_dict_keys + and "initial.cnn.depthwise.weight" in state_dict["model"].keys() + ): + model = SwiftSRGAN(state_dict) + # SwinIR, Swin2SR, HAT + elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: + if ( + "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" + in state_dict_keys + ): + model = HAT(state_dict) + elif "patch_embed.proj.weight" in state_dict_keys: + model = Swin2SR(state_dict) + else: + model = SwinIR(state_dict) + # GFPGAN + elif ( + "toRGB.0.weight" in state_dict_keys + and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys + ): + model = GFPGANv1Clean(state_dict) + # RestoreFormer + elif ( + "encoder.conv_in.weight" in state_dict_keys + and "encoder.down.0.block.0.norm1.weight" in state_dict_keys + ): + model = RestoreFormer(state_dict) + elif ( + "encoder.blocks.0.weight" in state_dict_keys + and "quantize.embedding.weight" in state_dict_keys + ): + model = CodeFormer(state_dict) + # LaMa + elif ( + "model.model.1.bn_l.running_mean" in state_dict_keys + or "generator.model.1.bn_l.running_mean" in state_dict_keys + ): + model = LaMa(state_dict) + # Omni-SR + elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: + model = OmniSR(state_dict) + # SCUNet + elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: + model = SCUNet(state_dict) + # DAT + elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: + model = DAT(state_dict) + # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 + else: + try: + model = ESRGAN(state_dict) + except: + # pylint: disable=raise-missing-from + raise UnsupportedModel + return model diff --git a/ldm_patched/pfn/types.py b/ldm_patched/pfn/types.py new file mode 100644 index 0000000000000000000000000000000000000000..193333b9e8049d9558ca2ea253d41ee44b0b294b --- /dev/null +++ b/ldm_patched/pfn/types.py @@ -0,0 +1,69 @@ +from typing import Union + +from .architecture.DAT import DAT +from .architecture.face.codeformer import CodeFormer +from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean +from .architecture.face.restoreformer_arch import RestoreFormer +from .architecture.HAT import HAT +from .architecture.LaMa import LaMa +from .architecture.OmniSR.OmniSR import OmniSR +from .architecture.RRDB import RRDBNet as ESRGAN +from .architecture.SCUNet import SCUNet +from .architecture.SPSR import SPSRNet as SPSR +from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 +from .architecture.SwiftSRGAN import Generator as SwiftSRGAN +from .architecture.Swin2SR import Swin2SR +from .architecture.SwinIR import SwinIR + +PyTorchSRModels = ( + RealESRGANv2, + SPSR, + SwiftSRGAN, + ESRGAN, + SwinIR, + Swin2SR, + HAT, + OmniSR, + SCUNet, + DAT, +) +PyTorchSRModel = Union[ + RealESRGANv2, + SPSR, + SwiftSRGAN, + ESRGAN, + SwinIR, + Swin2SR, + HAT, + OmniSR, + SCUNet, + DAT, +] + + +def is_pytorch_sr_model(model: object): + return isinstance(model, PyTorchSRModels) + + +PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) +PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] + + +def is_pytorch_face_model(model: object): + return isinstance(model, PyTorchFaceModels) + + +PyTorchInpaintModels = (LaMa,) +PyTorchInpaintModel = Union[LaMa] + + +def is_pytorch_inpaint_model(model: object): + return isinstance(model, PyTorchInpaintModels) + + +PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) +PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] + + +def is_pytorch_model(model: object): + return isinstance(model, PyTorchModels) diff --git a/ldm_patched/t2ia/adapter.py b/ldm_patched/t2ia/adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..e9a606b1cd67fd9a955a0ea0a86d1bd5498d85e5 --- /dev/null +++ b/ldm_patched/t2ia/adapter.py @@ -0,0 +1,293 @@ +#taken from https://github.com/TencentARC/T2I-Adapter +import torch +import torch.nn as nn +from collections import OrderedDict + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd( + dims, self.channels, self.out_channels, 3, stride=stride, padding=padding + ) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + if not self.use_conv: + padding = [x.shape[2] % 2, x.shape[3] % 2] + self.op.padding = padding + + x = self.op(x) + return x + + +class ResnetBlock(nn.Module): + def __init__(self, in_c, out_c, down, ksize=3, sk=False, use_conv=True): + super().__init__() + ps = ksize // 2 + if in_c != out_c or sk == False: + self.in_conv = nn.Conv2d(in_c, out_c, ksize, 1, ps) + else: + # print('n_in') + self.in_conv = None + self.block1 = nn.Conv2d(out_c, out_c, 3, 1, 1) + self.act = nn.ReLU() + self.block2 = nn.Conv2d(out_c, out_c, ksize, 1, ps) + if sk == False: + self.skep = nn.Conv2d(in_c, out_c, ksize, 1, ps) + else: + self.skep = None + + self.down = down + if self.down == True: + self.down_opt = Downsample(in_c, use_conv=use_conv) + + def forward(self, x): + if self.down == True: + x = self.down_opt(x) + if self.in_conv is not None: # edit + x = self.in_conv(x) + + h = self.block1(x) + h = self.act(h) + h = self.block2(h) + if self.skep is not None: + return h + self.skep(x) + else: + return h + x + + +class Adapter(nn.Module): + def __init__(self, channels=[320, 640, 1280, 1280], nums_rb=3, cin=64, ksize=3, sk=False, use_conv=True, xl=True): + super(Adapter, self).__init__() + self.unshuffle_amount = 8 + resblock_no_downsample = [] + resblock_downsample = [3, 2, 1] + self.xl = xl + if self.xl: + self.unshuffle_amount = 16 + resblock_no_downsample = [1] + resblock_downsample = [2] + + self.input_channels = cin // (self.unshuffle_amount * self.unshuffle_amount) + self.unshuffle = nn.PixelUnshuffle(self.unshuffle_amount) + self.channels = channels + self.nums_rb = nums_rb + self.body = [] + for i in range(len(channels)): + for j in range(nums_rb): + if (i in resblock_downsample) and (j == 0): + self.body.append( + ResnetBlock(channels[i - 1], channels[i], down=True, ksize=ksize, sk=sk, use_conv=use_conv)) + elif (i in resblock_no_downsample) and (j == 0): + self.body.append( + ResnetBlock(channels[i - 1], channels[i], down=False, ksize=ksize, sk=sk, use_conv=use_conv)) + else: + self.body.append( + ResnetBlock(channels[i], channels[i], down=False, ksize=ksize, sk=sk, use_conv=use_conv)) + self.body = nn.ModuleList(self.body) + self.conv_in = nn.Conv2d(cin, channels[0], 3, 1, 1) + + def forward(self, x): + # unshuffle + x = self.unshuffle(x) + # extract features + features = [] + x = self.conv_in(x) + for i in range(len(self.channels)): + for j in range(self.nums_rb): + idx = i * self.nums_rb + j + x = self.body[idx](x) + if self.xl: + features.append(None) + if i == 0: + features.append(None) + features.append(None) + if i == 2: + features.append(None) + else: + features.append(None) + features.append(None) + features.append(x) + + return features + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class QuickGELU(nn.Module): + + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential( + OrderedDict([("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))])) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class StyleAdapter(nn.Module): + + def __init__(self, width=1024, context_dim=768, num_head=8, n_layes=3, num_token=4): + super().__init__() + + scale = width ** -0.5 + self.transformer_layes = nn.Sequential(*[ResidualAttentionBlock(width, num_head) for _ in range(n_layes)]) + self.num_token = num_token + self.style_embedding = nn.Parameter(torch.randn(1, num_token, width) * scale) + self.ln_post = LayerNorm(width) + self.ln_pre = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, context_dim)) + + def forward(self, x): + # x shape [N, HW+1, C] + style_embedding = self.style_embedding + torch.zeros( + (x.shape[0], self.num_token, self.style_embedding.shape[-1]), device=x.device) + x = torch.cat([x, style_embedding], dim=1) + x = self.ln_pre(x) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer_layes(x) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, -self.num_token:, :]) + x = x @ self.proj + + return x + + +class ResnetBlock_light(nn.Module): + def __init__(self, in_c): + super().__init__() + self.block1 = nn.Conv2d(in_c, in_c, 3, 1, 1) + self.act = nn.ReLU() + self.block2 = nn.Conv2d(in_c, in_c, 3, 1, 1) + + def forward(self, x): + h = self.block1(x) + h = self.act(h) + h = self.block2(h) + + return h + x + + +class extractor(nn.Module): + def __init__(self, in_c, inter_c, out_c, nums_rb, down=False): + super().__init__() + self.in_conv = nn.Conv2d(in_c, inter_c, 1, 1, 0) + self.body = [] + for _ in range(nums_rb): + self.body.append(ResnetBlock_light(inter_c)) + self.body = nn.Sequential(*self.body) + self.out_conv = nn.Conv2d(inter_c, out_c, 1, 1, 0) + self.down = down + if self.down == True: + self.down_opt = Downsample(in_c, use_conv=False) + + def forward(self, x): + if self.down == True: + x = self.down_opt(x) + x = self.in_conv(x) + x = self.body(x) + x = self.out_conv(x) + + return x + + +class Adapter_light(nn.Module): + def __init__(self, channels=[320, 640, 1280, 1280], nums_rb=3, cin=64): + super(Adapter_light, self).__init__() + self.unshuffle_amount = 8 + self.unshuffle = nn.PixelUnshuffle(self.unshuffle_amount) + self.input_channels = cin // (self.unshuffle_amount * self.unshuffle_amount) + self.channels = channels + self.nums_rb = nums_rb + self.body = [] + self.xl = False + + for i in range(len(channels)): + if i == 0: + self.body.append(extractor(in_c=cin, inter_c=channels[i]//4, out_c=channels[i], nums_rb=nums_rb, down=False)) + else: + self.body.append(extractor(in_c=channels[i-1], inter_c=channels[i]//4, out_c=channels[i], nums_rb=nums_rb, down=True)) + self.body = nn.ModuleList(self.body) + + def forward(self, x): + # unshuffle + x = self.unshuffle(x) + # extract features + features = [] + for i in range(len(self.channels)): + x = self.body[i](x) + features.append(None) + features.append(None) + features.append(x) + + return features diff --git a/ldm_patched/taesd/taesd.py b/ldm_patched/taesd/taesd.py new file mode 100644 index 0000000000000000000000000000000000000000..ee9c9ed72ca21fd5d441ab9573f906f270f7b27a --- /dev/null +++ b/ldm_patched/taesd/taesd.py @@ -0,0 +1,80 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + +#!/usr/bin/env python3 +""" +Tiny AutoEncoder for Stable Diffusion +(DNN for encoding / decoding SD's latent space) +""" +import torch +import torch.nn as nn + +import ldm_patched.modules.utils +import ldm_patched.modules.ops + +def conv(n_in, n_out, **kwargs): + return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs) + +class Clamp(nn.Module): + def forward(self, x): + return torch.tanh(x / 3) * 3 + +class Block(nn.Module): + def __init__(self, n_in, n_out): + super().__init__() + self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) + self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() + self.fuse = nn.ReLU() + def forward(self, x): + return self.fuse(self.conv(x) + self.skip(x)) + +def Encoder(): + return nn.Sequential( + conv(3, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), + conv(64, 4), + ) + +def Decoder(): + return nn.Sequential( + Clamp(), conv(4, 64), nn.ReLU(), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), conv(64, 3), + ) + +class TAESD(nn.Module): + latent_magnitude = 3 + latent_shift = 0.5 + + def __init__(self, encoder_path=None, decoder_path=None): + """Initialize pretrained TAESD on the given device from the given checkpoints.""" + super().__init__() + self.taesd_encoder = Encoder() + self.taesd_decoder = Decoder() + self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) + if encoder_path is not None: + self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True)) + if decoder_path is not None: + self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True)) + + @staticmethod + def scale_latents(x): + """raw latents -> [0, 1]""" + return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) + + @staticmethod + def unscale_latents(x): + """[0, 1] -> raw latents""" + return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) + + def decode(self, x): + x_sample = self.taesd_decoder(x * self.vae_scale) + x_sample = x_sample.sub(0.5).mul(2) + return x_sample + + def encode(self, x): + return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale diff --git a/ldm_patched/unipc/uni_pc.py b/ldm_patched/unipc/uni_pc.py new file mode 100644 index 0000000000000000000000000000000000000000..60a1be5dbf871d4dee6cea694195d759237661ee --- /dev/null +++ b/ldm_patched/unipc/uni_pc.py @@ -0,0 +1,897 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + +#code taken from: https://github.com/wl-zhao/UniPC and modified + +import torch +import torch.nn.functional as F +import math + +from tqdm.auto import trange, tqdm + + +class NoiseScheduleVP: + def __init__( + self, + schedule='discrete', + betas=None, + alphas_cumprod=None, + continuous_beta_0=0.1, + continuous_beta_1=20., + ): + """Create a wrapper class for the forward SDE (VP type). + + *** + Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t. + We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images. + *** + + The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ). + We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper). + Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have: + + log_alpha_t = self.marginal_log_mean_coeff(t) + sigma_t = self.marginal_std(t) + lambda_t = self.marginal_lambda(t) + + Moreover, as lambda(t) is an invertible function, we also support its inverse function: + + t = self.inverse_lambda(lambda_t) + + =============================================================== + + We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]). + + 1. For discrete-time DPMs: + + For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by: + t_i = (i + 1) / N + e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1. + We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3. + + Args: + betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details) + alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details) + + Note that we always have alphas_cumprod = cumprod(betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`. + + **Important**: Please pay special attention for the args for `alphas_cumprod`: + The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that + q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ). + Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have + alpha_{t_n} = \sqrt{\hat{alpha_n}}, + and + log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}). + + + 2. For continuous-time DPMs: + + We support two types of VPSDEs: linear (DDPM) and cosine (improved-DDPM). The hyperparameters for the noise + schedule are the default settings in DDPM and improved-DDPM: + + Args: + beta_min: A `float` number. The smallest beta for the linear schedule. + beta_max: A `float` number. The largest beta for the linear schedule. + cosine_s: A `float` number. The hyperparameter in the cosine schedule. + cosine_beta_max: A `float` number. The hyperparameter in the cosine schedule. + T: A `float` number. The ending time of the forward process. + + =============================================================== + + Args: + schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs, + 'linear' or 'cosine' for continuous-time DPMs. + Returns: + A wrapper object of the forward SDE (VP type). + + =============================================================== + + Example: + + # For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1): + >>> ns = NoiseScheduleVP('discrete', betas=betas) + + # For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1): + >>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod) + + # For continuous-time DPMs (VPSDE), linear schedule: + >>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.) + + """ + + if schedule not in ['discrete', 'linear', 'cosine']: + raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format(schedule)) + + self.schedule = schedule + if schedule == 'discrete': + if betas is not None: + log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) + else: + assert alphas_cumprod is not None + log_alphas = 0.5 * torch.log(alphas_cumprod) + self.total_N = len(log_alphas) + self.T = 1. + self.t_array = torch.linspace(0., 1., self.total_N + 1)[1:].reshape((1, -1)) + self.log_alpha_array = log_alphas.reshape((1, -1,)) + else: + self.total_N = 1000 + self.beta_0 = continuous_beta_0 + self.beta_1 = continuous_beta_1 + self.cosine_s = 0.008 + self.cosine_beta_max = 999. + self.cosine_t_max = math.atan(self.cosine_beta_max * (1. + self.cosine_s) / math.pi) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s + self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1. + self.cosine_s) * math.pi / 2.)) + self.schedule = schedule + if schedule == 'cosine': + # For the cosine schedule, T = 1 will have numerical issues. So we manually set the ending time T. + # Note that T = 0.9946 may be not the optimal setting. However, we find it works well. + self.T = 0.9946 + else: + self.T = 1. + + def marginal_log_mean_coeff(self, t): + """ + Compute log(alpha_t) of a given continuous-time label t in [0, T]. + """ + if self.schedule == 'discrete': + return interpolate_fn(t.reshape((-1, 1)), self.t_array.to(t.device), self.log_alpha_array.to(t.device)).reshape((-1)) + elif self.schedule == 'linear': + return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0 + elif self.schedule == 'cosine': + log_alpha_fn = lambda s: torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.)) + log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0 + return log_alpha_t + + def marginal_alpha(self, t): + """ + Compute alpha_t of a given continuous-time label t in [0, T]. + """ + return torch.exp(self.marginal_log_mean_coeff(t)) + + def marginal_std(self, t): + """ + Compute sigma_t of a given continuous-time label t in [0, T]. + """ + return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t))) + + def marginal_lambda(self, t): + """ + Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. + """ + log_mean_coeff = self.marginal_log_mean_coeff(t) + log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff)) + return log_mean_coeff - log_std + + def inverse_lambda(self, lamb): + """ + Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t. + """ + if self.schedule == 'linear': + tmp = 2. * (self.beta_1 - self.beta_0) * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) + Delta = self.beta_0**2 + tmp + return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0) + elif self.schedule == 'discrete': + log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2. * lamb) + t = interpolate_fn(log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_array.to(lamb.device), [1]), torch.flip(self.t_array.to(lamb.device), [1])) + return t.reshape((-1,)) + else: + log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb)) + t_fn = lambda log_alpha_t: torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s + t = t_fn(log_alpha) + return t + + +def model_wrapper( + model, + noise_schedule, + model_type="noise", + model_kwargs={}, + guidance_type="uncond", + condition=None, + unconditional_condition=None, + guidance_scale=1., + classifier_fn=None, + classifier_kwargs={}, +): + """Create a wrapper function for the noise prediction model. + + DPM-Solver needs to solve the continuous-time diffusion ODEs. For DPMs trained on discrete-time labels, we need to + firstly wrap the model function to a noise prediction model that accepts the continuous time as the input. + + We support four types of the diffusion model by setting `model_type`: + + 1. "noise": noise prediction model. (Trained by predicting noise). + + 2. "x_start": data prediction model. (Trained by predicting the data x_0 at time 0). + + 3. "v": velocity prediction model. (Trained by predicting the velocity). + The "v" prediction is derivation detailed in Appendix D of [1], and is used in Imagen-Video [2]. + + [1] Salimans, Tim, and Jonathan Ho. "Progressive distillation for fast sampling of diffusion models." + arXiv preprint arXiv:2202.00512 (2022). + [2] Ho, Jonathan, et al. "Imagen Video: High Definition Video Generation with Diffusion Models." + arXiv preprint arXiv:2210.02303 (2022). + + 4. "score": marginal score function. (Trained by denoising score matching). + Note that the score function and the noise prediction model follows a simple relationship: + ``` + noise(x_t, t) = -sigma_t * score(x_t, t) + ``` + + We support three types of guided sampling by DPMs by setting `guidance_type`: + 1. "uncond": unconditional sampling by DPMs. + The input `model` has the following format: + `` + model(x, t_input, **model_kwargs) -> noise | x_start | v | score + `` + + 2. "classifier": classifier guidance sampling [3] by DPMs and another classifier. + The input `model` has the following format: + `` + model(x, t_input, **model_kwargs) -> noise | x_start | v | score + `` + + The input `classifier_fn` has the following format: + `` + classifier_fn(x, t_input, cond, **classifier_kwargs) -> logits(x, t_input, cond) + `` + + [3] P. Dhariwal and A. Q. Nichol, "Diffusion models beat GANs on image synthesis," + in Advances in Neural Information Processing Systems, vol. 34, 2021, pp. 8780-8794. + + 3. "classifier-free": classifier-free guidance sampling by conditional DPMs. + The input `model` has the following format: + `` + model(x, t_input, cond, **model_kwargs) -> noise | x_start | v | score + `` + And if cond == `unconditional_condition`, the model output is the unconditional DPM output. + + [4] Ho, Jonathan, and Tim Salimans. "Classifier-free diffusion guidance." + arXiv preprint arXiv:2207.12598 (2022). + + + The `t_input` is the time label of the model, which may be discrete-time labels (i.e. 0 to 999) + or continuous-time labels (i.e. epsilon to T). + + We wrap the model function to accept only `x` and `t_continuous` as inputs, and outputs the predicted noise: + `` + def model_fn(x, t_continuous) -> noise: + t_input = get_model_input_time(t_continuous) + return noise_pred(model, x, t_input, **model_kwargs) + `` + where `t_continuous` is the continuous time labels (i.e. epsilon to T). And we use `model_fn` for DPM-Solver. + + =============================================================== + + Args: + model: A diffusion model with the corresponding format described above. + noise_schedule: A noise schedule object, such as NoiseScheduleVP. + model_type: A `str`. The parameterization type of the diffusion model. + "noise" or "x_start" or "v" or "score". + model_kwargs: A `dict`. A dict for the other inputs of the model function. + guidance_type: A `str`. The type of the guidance for sampling. + "uncond" or "classifier" or "classifier-free". + condition: A pytorch tensor. The condition for the guided sampling. + Only used for "classifier" or "classifier-free" guidance type. + unconditional_condition: A pytorch tensor. The condition for the unconditional sampling. + Only used for "classifier-free" guidance type. + guidance_scale: A `float`. The scale for the guided sampling. + classifier_fn: A classifier function. Only used for the classifier guidance. + classifier_kwargs: A `dict`. A dict for the other inputs of the classifier function. + Returns: + A noise prediction model that accepts the noised data and the continuous time as the inputs. + """ + + def get_model_input_time(t_continuous): + """ + Convert the continuous-time `t_continuous` (in [epsilon, T]) to the model input time. + For discrete-time DPMs, we convert `t_continuous` in [1 / N, 1] to `t_input` in [0, 1000 * (N - 1) / N]. + For continuous-time DPMs, we just use `t_continuous`. + """ + if noise_schedule.schedule == 'discrete': + return (t_continuous - 1. / noise_schedule.total_N) * 1000. + else: + return t_continuous + + def noise_pred_fn(x, t_continuous, cond=None): + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + t_input = get_model_input_time(t_continuous) + output = model(x, t_input, **model_kwargs) + if model_type == "noise": + return output + elif model_type == "x_start": + alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous) + dims = x.dim() + return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims) + elif model_type == "v": + alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous) + dims = x.dim() + return expand_dims(alpha_t, dims) * output + expand_dims(sigma_t, dims) * x + elif model_type == "score": + sigma_t = noise_schedule.marginal_std(t_continuous) + dims = x.dim() + return -expand_dims(sigma_t, dims) * output + + def cond_grad_fn(x, t_input): + """ + Compute the gradient of the classifier, i.e. nabla_{x} log p_t(cond | x_t). + """ + with torch.enable_grad(): + x_in = x.detach().requires_grad_(True) + log_prob = classifier_fn(x_in, t_input, condition, **classifier_kwargs) + return torch.autograd.grad(log_prob.sum(), x_in)[0] + + def model_fn(x, t_continuous): + """ + The noise predicition model function that is used for DPM-Solver. + """ + if t_continuous.reshape((-1,)).shape[0] == 1: + t_continuous = t_continuous.expand((x.shape[0])) + if guidance_type == "uncond": + return noise_pred_fn(x, t_continuous) + elif guidance_type == "classifier": + assert classifier_fn is not None + t_input = get_model_input_time(t_continuous) + cond_grad = cond_grad_fn(x, t_input) + sigma_t = noise_schedule.marginal_std(t_continuous) + noise = noise_pred_fn(x, t_continuous) + return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad + elif guidance_type == "classifier-free": + if guidance_scale == 1. or unconditional_condition is None: + return noise_pred_fn(x, t_continuous, cond=condition) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t_continuous] * 2) + c_in = torch.cat([unconditional_condition, condition]) + noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) + return noise_uncond + guidance_scale * (noise - noise_uncond) + + assert model_type in ["noise", "x_start", "v"] + assert guidance_type in ["uncond", "classifier", "classifier-free"] + return model_fn + + +class UniPC: + def __init__( + self, + model_fn, + noise_schedule, + predict_x0=True, + thresholding=False, + max_val=1., + variant='bh1', + noise_mask=None, + masked_image=None, + noise=None, + ): + """Construct a UniPC. + + We support both data_prediction and noise_prediction. + """ + self.model = model_fn + self.noise_schedule = noise_schedule + self.variant = variant + self.predict_x0 = predict_x0 + self.thresholding = thresholding + self.max_val = max_val + self.noise_mask = noise_mask + self.masked_image = masked_image + self.noise = noise + + def dynamic_thresholding_fn(self, x0, t=None): + """ + The dynamic thresholding method. + """ + dims = x0.dim() + p = self.dynamic_thresholding_ratio + s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) + s = expand_dims(torch.maximum(s, self.thresholding_max_val * torch.ones_like(s).to(s.device)), dims) + x0 = torch.clamp(x0, -s, s) / s + return x0 + + def noise_prediction_fn(self, x, t): + """ + Return the noise prediction model. + """ + if self.noise_mask is not None: + return self.model(x, t) * self.noise_mask + else: + return self.model(x, t) + + def data_prediction_fn(self, x, t): + """ + Return the data prediction model (with thresholding). + """ + noise = self.noise_prediction_fn(x, t) + dims = x.dim() + alpha_t, sigma_t = self.noise_schedule.marginal_alpha(t), self.noise_schedule.marginal_std(t) + x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims) + if self.thresholding: + p = 0.995 # A hyperparameter in the paper of "Imagen" [1]. + s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1) + s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims) + x0 = torch.clamp(x0, -s, s) / s + if self.noise_mask is not None: + x0 = x0 * self.noise_mask + (1. - self.noise_mask) * self.masked_image + return x0 + + def model_fn(self, x, t): + """ + Convert the model to the noise prediction model or the data prediction model. + """ + if self.predict_x0: + return self.data_prediction_fn(x, t) + else: + return self.noise_prediction_fn(x, t) + + def get_time_steps(self, skip_type, t_T, t_0, N, device): + """Compute the intermediate time steps for sampling. + """ + if skip_type == 'logSNR': + lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device)) + lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device)) + logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device) + return self.noise_schedule.inverse_lambda(logSNR_steps) + elif skip_type == 'time_uniform': + return torch.linspace(t_T, t_0, N + 1).to(device) + elif skip_type == 'time_quadratic': + t_order = 2 + t = torch.linspace(t_T**(1. / t_order), t_0**(1. / t_order), N + 1).pow(t_order).to(device) + return t + else: + raise ValueError("Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type)) + + def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type, t_T, t_0, device): + """ + Get the order of each step for sampling by the singlestep DPM-Solver. + """ + if order == 3: + K = steps // 3 + 1 + if steps % 3 == 0: + orders = [3,] * (K - 2) + [2, 1] + elif steps % 3 == 1: + orders = [3,] * (K - 1) + [1] + else: + orders = [3,] * (K - 1) + [2] + elif order == 2: + if steps % 2 == 0: + K = steps // 2 + orders = [2,] * K + else: + K = steps // 2 + 1 + orders = [2,] * (K - 1) + [1] + elif order == 1: + K = steps + orders = [1,] * steps + else: + raise ValueError("'order' must be '1' or '2' or '3'.") + if skip_type == 'logSNR': + # To reproduce the results in DPM-Solver paper + timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, K, device) + else: + timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, steps, device)[torch.cumsum(torch.tensor([0,] + orders), 0).to(device)] + return timesteps_outer, orders + + def denoise_to_zero_fn(self, x, s): + """ + Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization. + """ + return self.data_prediction_fn(x, s) + + def multistep_uni_pc_update(self, x, model_prev_list, t_prev_list, t, order, **kwargs): + if len(t.shape) == 0: + t = t.view(-1) + if 'bh' in self.variant: + return self.multistep_uni_pc_bh_update(x, model_prev_list, t_prev_list, t, order, **kwargs) + else: + assert self.variant == 'vary_coeff' + return self.multistep_uni_pc_vary_update(x, model_prev_list, t_prev_list, t, order, **kwargs) + + def multistep_uni_pc_vary_update(self, x, model_prev_list, t_prev_list, t, order, use_corrector=True): + print(f'using unified predictor-corrector with order {order} (solver type: vary coeff)') + ns = self.noise_schedule + assert order <= len(model_prev_list) + + # first compute rks + t_prev_0 = t_prev_list[-1] + lambda_prev_0 = ns.marginal_lambda(t_prev_0) + lambda_t = ns.marginal_lambda(t) + model_prev_0 = model_prev_list[-1] + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + log_alpha_t = ns.marginal_log_mean_coeff(t) + alpha_t = torch.exp(log_alpha_t) + + h = lambda_t - lambda_prev_0 + + rks = [] + D1s = [] + for i in range(1, order): + t_prev_i = t_prev_list[-(i + 1)] + model_prev_i = model_prev_list[-(i + 1)] + lambda_prev_i = ns.marginal_lambda(t_prev_i) + rk = (lambda_prev_i - lambda_prev_0) / h + rks.append(rk) + D1s.append((model_prev_i - model_prev_0) / rk) + + rks.append(1.) + rks = torch.tensor(rks, device=x.device) + + K = len(rks) + # build C matrix + C = [] + + col = torch.ones_like(rks) + for k in range(1, K + 1): + C.append(col) + col = col * rks / (k + 1) + C = torch.stack(C, dim=1) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) # (B, K) + C_inv_p = torch.linalg.inv(C[:-1, :-1]) + A_p = C_inv_p + + if use_corrector: + print('using corrector') + C_inv = torch.linalg.inv(C) + A_c = C_inv + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) + h_phi_ks = [] + factorial_k = 1 + h_phi_k = h_phi_1 + for k in range(1, K + 2): + h_phi_ks.append(h_phi_k) + h_phi_k = h_phi_k / hh - 1 / factorial_k + factorial_k *= (k + 1) + + model_t = None + if self.predict_x0: + x_t_ = ( + sigma_t / sigma_prev_0 * x + - alpha_t * h_phi_1 * model_prev_0 + ) + # now predictor + x_t = x_t_ + if len(D1s) > 0: + # compute the residuals for predictor + for k in range(K - 1): + x_t = x_t - alpha_t * h_phi_ks[k + 1] * torch.einsum('bkchw,k->bchw', D1s, A_p[k]) + # now corrector + if use_corrector: + model_t = self.model_fn(x_t, t) + D1_t = (model_t - model_prev_0) + x_t = x_t_ + k = 0 + for k in range(K - 1): + x_t = x_t - alpha_t * h_phi_ks[k + 1] * torch.einsum('bkchw,k->bchw', D1s, A_c[k][:-1]) + x_t = x_t - alpha_t * h_phi_ks[K] * (D1_t * A_c[k][-1]) + else: + log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + x_t_ = ( + (torch.exp(log_alpha_t - log_alpha_prev_0)) * x + - (sigma_t * h_phi_1) * model_prev_0 + ) + # now predictor + x_t = x_t_ + if len(D1s) > 0: + # compute the residuals for predictor + for k in range(K - 1): + x_t = x_t - sigma_t * h_phi_ks[k + 1] * torch.einsum('bkchw,k->bchw', D1s, A_p[k]) + # now corrector + if use_corrector: + model_t = self.model_fn(x_t, t) + D1_t = (model_t - model_prev_0) + x_t = x_t_ + k = 0 + for k in range(K - 1): + x_t = x_t - sigma_t * h_phi_ks[k + 1] * torch.einsum('bkchw,k->bchw', D1s, A_c[k][:-1]) + x_t = x_t - sigma_t * h_phi_ks[K] * (D1_t * A_c[k][-1]) + return x_t, model_t + + def multistep_uni_pc_bh_update(self, x, model_prev_list, t_prev_list, t, order, x_t=None, use_corrector=True): + # print(f'using unified predictor-corrector with order {order} (solver type: B(h))') + ns = self.noise_schedule + assert order <= len(model_prev_list) + dims = x.dim() + + # first compute rks + t_prev_0 = t_prev_list[-1] + lambda_prev_0 = ns.marginal_lambda(t_prev_0) + lambda_t = ns.marginal_lambda(t) + model_prev_0 = model_prev_list[-1] + sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t) + log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t) + alpha_t = torch.exp(log_alpha_t) + + h = lambda_t - lambda_prev_0 + + rks = [] + D1s = [] + for i in range(1, order): + t_prev_i = t_prev_list[-(i + 1)] + model_prev_i = model_prev_list[-(i + 1)] + lambda_prev_i = ns.marginal_lambda(t_prev_i) + rk = ((lambda_prev_i - lambda_prev_0) / h)[0] + rks.append(rk) + D1s.append((model_prev_i - model_prev_0) / rk) + + rks.append(1.) + rks = torch.tensor(rks, device=x.device) + + R = [] + b = [] + + hh = -h[0] if self.predict_x0 else h[0] + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.variant == 'bh1': + B_h = hh + elif self.variant == 'bh2': + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= (i + 1) + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=x.device) + + # now predictor + use_predictor = len(D1s) > 0 and x_t is None + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) # (B, K) + if x_t is None: + # for order 2, we use a simplified version + if order == 2: + rhos_p = torch.tensor([0.5], device=b.device) + else: + rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]) + else: + D1s = None + + if use_corrector: + # print('using corrector') + # for order 1, we use a simplified version + if order == 1: + rhos_c = torch.tensor([0.5], device=b.device) + else: + rhos_c = torch.linalg.solve(R, b) + + model_t = None + if self.predict_x0: + x_t_ = ( + expand_dims(sigma_t / sigma_prev_0, dims) * x + - expand_dims(alpha_t * h_phi_1, dims)* model_prev_0 + ) + + if x_t is None: + if use_predictor: + pred_res = torch.einsum('k,bkchw->bchw', rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - expand_dims(alpha_t * B_h, dims) * pred_res + + if use_corrector: + model_t = self.model_fn(x_t, t) + if D1s is not None: + corr_res = torch.einsum('k,bkchw->bchw', rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = (model_t - model_prev_0) + x_t = x_t_ - expand_dims(alpha_t * B_h, dims) * (corr_res + rhos_c[-1] * D1_t) + else: + x_t_ = ( + expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x + - expand_dims(sigma_t * h_phi_1, dims) * model_prev_0 + ) + if x_t is None: + if use_predictor: + pred_res = torch.einsum('k,bkchw->bchw', rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - expand_dims(sigma_t * B_h, dims) * pred_res + + if use_corrector: + model_t = self.model_fn(x_t, t) + if D1s is not None: + corr_res = torch.einsum('k,bkchw->bchw', rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = (model_t - model_prev_0) + x_t = x_t_ - expand_dims(sigma_t * B_h, dims) * (corr_res + rhos_c[-1] * D1_t) + return x_t, model_t + + + def sample(self, x, timesteps, t_start=None, t_end=None, order=3, skip_type='time_uniform', + method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver', + atol=0.0078, rtol=0.05, corrector=False, callback=None, disable_pbar=False + ): + # t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end + # t_T = self.noise_schedule.T if t_start is None else t_start + device = x.device + steps = len(timesteps) - 1 + if method == 'multistep': + assert steps >= order + # timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) + assert timesteps.shape[0] - 1 == steps + # with torch.no_grad(): + for step_index in trange(steps, disable=disable_pbar): + if self.noise_mask is not None: + x = x * self.noise_mask + (1. - self.noise_mask) * (self.masked_image * self.noise_schedule.marginal_alpha(timesteps[step_index]) + self.noise * self.noise_schedule.marginal_std(timesteps[step_index])) + if step_index == 0: + vec_t = timesteps[0].expand((x.shape[0])) + model_prev_list = [self.model_fn(x, vec_t)] + t_prev_list = [vec_t] + elif step_index < order: + init_order = step_index + # Init the first `order` values by lower order multistep DPM-Solver. + # for init_order in range(1, order): + vec_t = timesteps[init_order].expand(x.shape[0]) + x, model_x = self.multistep_uni_pc_update(x, model_prev_list, t_prev_list, vec_t, init_order, use_corrector=True) + if model_x is None: + model_x = self.model_fn(x, vec_t) + model_prev_list.append(model_x) + t_prev_list.append(vec_t) + else: + extra_final_step = 0 + if step_index == (steps - 1): + extra_final_step = 1 + for step in range(step_index, step_index + 1 + extra_final_step): + vec_t = timesteps[step].expand(x.shape[0]) + if lower_order_final: + step_order = min(order, steps + 1 - step) + else: + step_order = order + # print('this step order:', step_order) + if step == steps: + # print('do not run corrector at the last step') + use_corrector = False + else: + use_corrector = True + x, model_x = self.multistep_uni_pc_update(x, model_prev_list, t_prev_list, vec_t, step_order, use_corrector=use_corrector) + for i in range(order - 1): + t_prev_list[i] = t_prev_list[i + 1] + model_prev_list[i] = model_prev_list[i + 1] + t_prev_list[-1] = vec_t + # We do not need to evaluate the final model value. + if step < steps: + if model_x is None: + model_x = self.model_fn(x, vec_t) + model_prev_list[-1] = model_x + if callback is not None: + callback(step_index, model_prev_list[-1], x, steps) + else: + raise NotImplementedError() + # if denoise_to_zero: + # x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0) + return x + + +############################################################# +# other utility functions +############################################################# + +def interpolate_fn(x, xp, yp): + """ + A piecewise linear function y = f(x), using xp and yp as keypoints. + We implement f(x) in a differentiable way (i.e. applicable for autograd). + The function f(x) is well-defined for all x-axis. (For x beyond the bounds of xp, we use the outmost points of xp to define the linear function.) + + Args: + x: PyTorch tensor with shape [N, C], where N is the batch size, C is the number of channels (we use C = 1 for DPM-Solver). + xp: PyTorch tensor with shape [C, K], where K is the number of keypoints. + yp: PyTorch tensor with shape [C, K]. + Returns: + The function values f(x), with shape [N, C]. + """ + N, K = x.shape[0], xp.shape[1] + all_x = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((N, 1, 1))], dim=2) + sorted_all_x, x_indices = torch.sort(all_x, dim=2) + x_idx = torch.argmin(x_indices, dim=2) + cand_start_idx = x_idx - 1 + start_idx = torch.where( + torch.eq(x_idx, 0), + torch.tensor(1, device=x.device), + torch.where( + torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + ), + ) + end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1) + start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2) + end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2) + start_idx2 = torch.where( + torch.eq(x_idx, 0), + torch.tensor(0, device=x.device), + torch.where( + torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx, + ), + ) + y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1) + start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2) + end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2) + cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x) + return cand + + +def expand_dims(v, dims): + """ + Expand the tensor `v` to the dim `dims`. + + Args: + `v`: a PyTorch tensor with shape [N]. + `dim`: a `int`. + Returns: + a PyTorch tensor with shape [N, 1, 1, ..., 1] and the total dimension is `dims`. + """ + return v[(...,) + (None,)*(dims - 1)] + + +class SigmaConvert: + schedule = "" + def marginal_log_mean_coeff(self, sigma): + return 0.5 * torch.log(1 / ((sigma * sigma) + 1)) + + def marginal_alpha(self, t): + return torch.exp(self.marginal_log_mean_coeff(t)) + + def marginal_std(self, t): + return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t))) + + def marginal_lambda(self, t): + """ + Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T]. + """ + log_mean_coeff = self.marginal_log_mean_coeff(t) + log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff)) + return log_mean_coeff - log_std + +def predict_eps_sigma(model, input, sigma_in, **kwargs): + sigma = sigma_in.view(sigma_in.shape[:1] + (1,) * (input.ndim - 1)) + input = input * ((sigma ** 2 + 1.0) ** 0.5) + return (input - model(input, sigma_in, **kwargs)) / sigma + + +def sample_unipc(model, noise, image, sigmas, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'): + timesteps = sigmas.clone() + if sigmas[-1] == 0: + timesteps = sigmas[:] + timesteps[-1] = 0.001 + else: + timesteps = sigmas.clone() + ns = SigmaConvert() + + if image is not None: + img = image * ns.marginal_alpha(timesteps[0]) + if max_denoise: + noise_mult = 1.0 + else: + noise_mult = ns.marginal_std(timesteps[0]) + img += noise * noise_mult + else: + img = noise + + model_type = "noise" + + model_fn = model_wrapper( + lambda input, sigma, **kwargs: predict_eps_sigma(model, input, sigma, **kwargs), + ns, + model_type=model_type, + guidance_type="uncond", + model_kwargs=extra_args, + ) + + order = min(3, len(timesteps) - 2) + uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, noise_mask=noise_mask, masked_image=image, noise=noise, variant=variant) + x = uni_pc.sample(img, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable) + x /= ns.marginal_alpha(timesteps[-1]) + return x diff --git a/ldm_patched/utils/latent_visualization.py b/ldm_patched/utils/latent_visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..7d592897265f79eec10cbd2fe1d71f5d81ee0d14 --- /dev/null +++ b/ldm_patched/utils/latent_visualization.py @@ -0,0 +1,100 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + +import torch +from PIL import Image +import struct +import numpy as np +from ldm_patched.modules.args_parser import args, LatentPreviewMethod +from ldm_patched.taesd.taesd import TAESD +import ldm_patched.utils.path_utils +import ldm_patched.modules.utils + +MAX_PREVIEW_RESOLUTION = 512 + +class LatentPreviewer: + def decode_latent_to_preview(self, x0): + pass + + def decode_latent_to_preview_image(self, preview_format, x0): + preview_image = self.decode_latent_to_preview(x0) + return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION) + +class TAESDPreviewerImpl(LatentPreviewer): + def __init__(self, taesd): + self.taesd = taesd + + def decode_latent_to_preview(self, x0): + x_sample = self.taesd.decode(x0[:1])[0].detach() + x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) + x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) + x_sample = x_sample.astype(np.uint8) + + preview_image = Image.fromarray(x_sample) + return preview_image + + +class Latent2RGBPreviewer(LatentPreviewer): + def __init__(self, latent_rgb_factors): + self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu") + + def decode_latent_to_preview(self, x0): + latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors + + latents_ubyte = (((latent_image + 1) / 2) + .clamp(0, 1) # change scale from -1..1 to 0..1 + .mul(0xFF) # to 0..255 + .byte()).cpu() + + return Image.fromarray(latents_ubyte.numpy()) + + +def get_previewer(device, latent_format): + previewer = None + method = args.preview_option + if method != LatentPreviewMethod.NoPreviews: + # TODO previewer methods + taesd_decoder_path = None + if latent_format.taesd_decoder_name is not None: + taesd_decoder_path = next( + (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx") + if fn.startswith(latent_format.taesd_decoder_name)), + "" + ) + taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path) + + if method == LatentPreviewMethod.Auto: + method = LatentPreviewMethod.Latent2RGB + if taesd_decoder_path: + method = LatentPreviewMethod.TAESD + + if method == LatentPreviewMethod.TAESD: + if taesd_decoder_path: + taesd = TAESD(None, taesd_decoder_path).to(device) + previewer = TAESDPreviewerImpl(taesd) + else: + print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name)) + + if previewer is None: + if latent_format.latent_rgb_factors is not None: + previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors) + return previewer + +def prepare_callback(model, steps, x0_output_dict=None): + preview_format = "JPEG" + if preview_format not in ["JPEG", "PNG"]: + preview_format = "JPEG" + + previewer = get_previewer(model.load_device, model.model.latent_format) + + pbar = ldm_patched.modules.utils.ProgressBar(steps) + def callback(step, x0, x, total_steps): + if x0_output_dict is not None: + x0_output_dict["x0"] = x0 + + preview_bytes = None + if previewer: + preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0) + pbar.update_absolute(step + 1, total_steps, preview_bytes) + return callback + diff --git a/ldm_patched/utils/path_utils.py b/ldm_patched/utils/path_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..07fca49ae9fad32e39d63197d68e7fe192cc334c --- /dev/null +++ b/ldm_patched/utils/path_utils.py @@ -0,0 +1,235 @@ +# Taken from https://github.com/comfyanonymous/ComfyUI +# This file is only for reference, and not used in the backend or runtime. + + +import os +import time + +supported_pt_extensions = set(['.ckpt', '.pt', '.bin', '.pth', '.safetensors']) + +folder_names_and_paths = {} + +# Will be assigned by modules.paths +base_path = None +models_dir = None +output_directory = None +temp_directory = None +input_directory = None +user_directory = None + +filename_list_cache = {} + +def set_output_directory(output_dir): + global output_directory + output_directory = output_dir + +def set_temp_directory(temp_dir): + global temp_directory + temp_directory = temp_dir + +def set_input_directory(input_dir): + global input_directory + input_directory = input_dir + +def get_output_directory(): + global output_directory + return output_directory + +def get_temp_directory(): + global temp_directory + return temp_directory + +def get_input_directory(): + global input_directory + return input_directory + + +#NOTE: used in http server so don't put folders that should not be accessed remotely +def get_directory_by_type(type_name): + if type_name == "output": + return get_output_directory() + if type_name == "temp": + return get_temp_directory() + if type_name == "input": + return get_input_directory() + return None + + +# determine base_dir rely on annotation if name is 'filename.ext [annotation]' format +# otherwise use default_path as base_dir +def annotated_filepath(name): + if name.endswith("[output]"): + base_dir = get_output_directory() + name = name[:-9] + elif name.endswith("[input]"): + base_dir = get_input_directory() + name = name[:-8] + elif name.endswith("[temp]"): + base_dir = get_temp_directory() + name = name[:-7] + else: + return name, None + + return name, base_dir + + +def get_annotated_filepath(name, default_dir=None): + name, base_dir = annotated_filepath(name) + + if base_dir is None: + if default_dir is not None: + base_dir = default_dir + else: + base_dir = get_input_directory() # fallback path + + return os.path.join(base_dir, name) + + +def exists_annotated_filepath(name): + name, base_dir = annotated_filepath(name) + + if base_dir is None: + base_dir = get_input_directory() # fallback path + + filepath = os.path.join(base_dir, name) + return os.path.exists(filepath) + + +def add_model_folder_path(folder_name, full_folder_path): + global folder_names_and_paths + if folder_name in folder_names_and_paths: + folder_names_and_paths[folder_name][0].append(full_folder_path) + else: + folder_names_and_paths[folder_name] = ([full_folder_path], set()) + +def get_folder_paths(folder_name): + return folder_names_and_paths[folder_name][0][:] + +def recursive_search(directory, excluded_dir_names=None): + if not os.path.isdir(directory): + return [], {} + + if excluded_dir_names is None: + excluded_dir_names = [] + + result = [] + dirs = {} + + # Attempt to add the initial directory to dirs with error handling + try: + dirs[directory] = os.path.getmtime(directory) + except FileNotFoundError: + print(f"Warning: Unable to access {directory}. Skipping this path.") + + for dirpath, subdirs, filenames in os.walk(directory, followlinks=True, topdown=True): + subdirs[:] = [d for d in subdirs if d not in excluded_dir_names] + for file_name in filenames: + relative_path = os.path.relpath(os.path.join(dirpath, file_name), directory) + result.append(relative_path) + + for d in subdirs: + path = os.path.join(dirpath, d) + try: + dirs[path] = os.path.getmtime(path) + except FileNotFoundError: + print(f"Warning: Unable to access {path}. Skipping this path.") + continue + return result, dirs + +def filter_files_extensions(files, extensions): + return sorted(list(filter(lambda a: os.path.splitext(a)[-1].lower() in extensions or len(extensions) == 0, files))) + + + +def get_full_path(folder_name, filename): + global folder_names_and_paths + if folder_name not in folder_names_and_paths: + return None + folders = folder_names_and_paths[folder_name] + filename = os.path.relpath(os.path.join("/", filename), "/") + for x in folders[0]: + full_path = os.path.join(x, filename) + if os.path.isfile(full_path): + return full_path + + return None + +def get_filename_list_(folder_name): + global folder_names_and_paths + output_list = set() + folders = folder_names_and_paths[folder_name] + output_folders = {} + for x in folders[0]: + files, folders_all = recursive_search(x, excluded_dir_names=[".git"]) + output_list.update(filter_files_extensions(files, folders[1])) + output_folders = {**output_folders, **folders_all} + + return (sorted(list(output_list)), output_folders, time.perf_counter()) + +def cached_filename_list_(folder_name): + global filename_list_cache + global folder_names_and_paths + if folder_name not in filename_list_cache: + return None + out = filename_list_cache[folder_name] + + for x in out[1]: + time_modified = out[1][x] + folder = x + if os.path.getmtime(folder) != time_modified: + return None + + folders = folder_names_and_paths[folder_name] + for x in folders[0]: + if os.path.isdir(x): + if x not in out[1]: + return None + + return out + +def get_filename_list(folder_name): + out = cached_filename_list_(folder_name) + if out is None: + out = get_filename_list_(folder_name) + global filename_list_cache + filename_list_cache[folder_name] = out + return list(out[0]) + +def get_save_image_path(filename_prefix, output_dir, image_width=0, image_height=0): + def map_filename(filename): + prefix_len = len(os.path.basename(filename_prefix)) + prefix = filename[:prefix_len + 1] + try: + digits = int(filename[prefix_len + 1:].split('_')[0]) + except: + digits = 0 + return (digits, prefix) + + def compute_vars(input, image_width, image_height): + input = input.replace("%width%", str(image_width)) + input = input.replace("%height%", str(image_height)) + return input + + filename_prefix = compute_vars(filename_prefix, image_width, image_height) + + subfolder = os.path.dirname(os.path.normpath(filename_prefix)) + filename = os.path.basename(os.path.normpath(filename_prefix)) + + full_output_folder = os.path.join(output_dir, subfolder) + + if os.path.commonpath((output_dir, os.path.abspath(full_output_folder))) != output_dir: + err = "**** ERROR: Saving image outside the output folder is not allowed." + \ + "\n full_output_folder: " + os.path.abspath(full_output_folder) + \ + "\n output_dir: " + output_dir + \ + "\n commonpath: " + os.path.commonpath((output_dir, os.path.abspath(full_output_folder))) + print(err) + raise Exception(err) + + try: + counter = max(filter(lambda a: a[1][:-1] == filename and a[1][-1] == "_", map(map_filename, os.listdir(full_output_folder))))[0] + 1 + except ValueError: + counter = 1 + except FileNotFoundError: + os.makedirs(full_output_folder, exist_ok=True) + counter = 1 + return full_output_folder, filename, counter, subfolder, filename_prefix diff --git a/localizations/Put localization files here.txt b/localizations/Put localization files here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/Roboto-Regular.ttf b/modules/Roboto-Regular.ttf new file mode 100644 index 0000000000000000000000000000000000000000..500b1045b0c94d83d2e6798aaf1faa55a2dab6fc Binary files /dev/null and b/modules/Roboto-Regular.ttf differ diff --git a/modules/api/api.py b/modules/api/api.py new file mode 100644 index 0000000000000000000000000000000000000000..d5348bb24bfd2e29ee4770aa719b6d74ceee2486 --- /dev/null +++ b/modules/api/api.py @@ -0,0 +1,920 @@ +import base64 +import io +import os +import time +import itertools +import datetime +import uvicorn +import ipaddress +import requests +import gradio as gr +import numpy as np +from threading import Lock +from io import BytesIO +from fastapi import APIRouter, Depends, FastAPI, Request, Response +from fastapi.security import HTTPBasic, HTTPBasicCredentials +from fastapi.exceptions import HTTPException +from fastapi.responses import JSONResponse +from fastapi.encoders import jsonable_encoder +from secrets import compare_digest + +import modules.shared as shared +from modules import sd_samplers, deepbooru, sd_hijack, images, scripts, ui, postprocessing, errors, restart, shared_items, script_callbacks, infotext_utils, sd_models +from modules.api import models +from modules.shared import opts +from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images +from modules.textual_inversion.textual_inversion import create_embedding, train_embedding +from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork +from PIL import PngImagePlugin, Image +from modules.sd_models_config import find_checkpoint_config_near_filename +from modules.realesrgan_model import get_realesrgan_models +from modules import devices +from typing import Any +import piexif +import piexif.helper +from contextlib import closing +from modules.progress import create_task_id, add_task_to_queue, start_task, finish_task, current_task + +def script_name_to_index(name, scripts): + try: + return [script.title().lower() for script in scripts].index(name.lower()) + except Exception as e: + raise HTTPException(status_code=422, detail=f"Script '{name}' not found") from e + + +def validate_sampler_name(name): + config = sd_samplers.all_samplers_map.get(name, None) + if config is None: + raise HTTPException(status_code=404, detail="Sampler not found") + + return name + + +def setUpscalers(req: dict): + reqDict = vars(req) + reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None) + reqDict['extras_upscaler_2'] = reqDict.pop('upscaler_2', None) + return reqDict + + +def verify_url(url): + """Returns True if the url refers to a global resource.""" + + import socket + from urllib.parse import urlparse + try: + parsed_url = urlparse(url) + domain_name = parsed_url.netloc + host = socket.gethostbyname_ex(domain_name) + for ip in host[2]: + ip_addr = ipaddress.ip_address(ip) + if not ip_addr.is_global: + return False + except Exception: + return False + + return True + + +def decode_base64_to_image(encoding): + if encoding.startswith("http://") or encoding.startswith("https://"): + if not opts.api_enable_requests: + raise HTTPException(status_code=500, detail="Requests not allowed") + + if opts.api_forbid_local_requests and not verify_url(encoding): + raise HTTPException(status_code=500, detail="Request to local resource not allowed") + + headers = {'user-agent': opts.api_useragent} if opts.api_useragent else {} + response = requests.get(encoding, timeout=30, headers=headers) + try: + image = Image.open(BytesIO(response.content)) + return image + except Exception as e: + raise HTTPException(status_code=500, detail="Invalid image url") from e + + if encoding.startswith("data:image/"): + encoding = encoding.split(";")[1].split(",")[1] + try: + image = Image.open(BytesIO(base64.b64decode(encoding))) + return image + except Exception as e: + raise HTTPException(status_code=500, detail="Invalid encoded image") from e + + +def encode_pil_to_base64(image): + with io.BytesIO() as output_bytes: + if isinstance(image, str): + return image + if isinstance(image, np.ndarray): + image = Image.fromarray(image) + if opts.samples_format.lower() == 'png': + use_metadata = False + metadata = PngImagePlugin.PngInfo() + for key, value in image.info.items(): + if isinstance(key, str) and isinstance(value, str): + metadata.add_text(key, value) + use_metadata = True + image.save(output_bytes, format="PNG", pnginfo=(metadata if use_metadata else None), quality=opts.jpeg_quality) + + elif opts.samples_format.lower() in ("jpg", "jpeg", "webp"): + if image.mode == "RGBA": + image = image.convert("RGB") + parameters = image.info.get('parameters', None) + exif_bytes = piexif.dump({ + "Exif": { piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(parameters or "", encoding="unicode") } + }) + if opts.samples_format.lower() in ("jpg", "jpeg"): + image.save(output_bytes, format="JPEG", exif = exif_bytes, quality=opts.jpeg_quality) + else: + image.save(output_bytes, format="WEBP", exif = exif_bytes, quality=opts.jpeg_quality) + + else: + raise HTTPException(status_code=500, detail="Invalid image format") + + bytes_data = output_bytes.getvalue() + + return base64.b64encode(bytes_data) + + +def api_middleware(app: FastAPI): + rich_available = False + try: + if os.environ.get('WEBUI_RICH_EXCEPTIONS', None) is not None: + import anyio # importing just so it can be placed on silent list + import starlette # importing just so it can be placed on silent list + from rich.console import Console + console = Console() + rich_available = True + except Exception: + pass + + @app.middleware("http") + async def log_and_time(req: Request, call_next): + ts = time.time() + res: Response = await call_next(req) + duration = str(round(time.time() - ts, 4)) + res.headers["X-Process-Time"] = duration + endpoint = req.scope.get('path', 'err') + if shared.cmd_opts.api_log and endpoint.startswith('/sdapi'): + print('API {t} {code} {prot}/{ver} {method} {endpoint} {cli} {duration}'.format( + t=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"), + code=res.status_code, + ver=req.scope.get('http_version', '0.0'), + cli=req.scope.get('client', ('0:0.0.0', 0))[0], + prot=req.scope.get('scheme', 'err'), + method=req.scope.get('method', 'err'), + endpoint=endpoint, + duration=duration, + )) + return res + + def handle_exception(request: Request, e: Exception): + err = { + "error": type(e).__name__, + "detail": vars(e).get('detail', ''), + "body": vars(e).get('body', ''), + "errors": str(e), + } + if not isinstance(e, HTTPException): # do not print backtrace on known httpexceptions + message = f"API error: {request.method}: {request.url} {err}" + if rich_available: + print(message) + console.print_exception(show_locals=True, max_frames=2, extra_lines=1, suppress=[anyio, starlette], word_wrap=False, width=min([console.width, 200])) + else: + errors.report(message, exc_info=True) + return JSONResponse(status_code=vars(e).get('status_code', 500), content=jsonable_encoder(err)) + + @app.middleware("http") + async def exception_handling(request: Request, call_next): + try: + return await call_next(request) + except Exception as e: + return handle_exception(request, e) + + @app.exception_handler(Exception) + async def fastapi_exception_handler(request: Request, e: Exception): + return handle_exception(request, e) + + @app.exception_handler(HTTPException) + async def http_exception_handler(request: Request, e: HTTPException): + return handle_exception(request, e) + + +class Api: + def __init__(self, app: FastAPI, queue_lock: Lock): + if shared.cmd_opts.api_auth: + self.credentials = {} + for auth in shared.cmd_opts.api_auth.split(","): + user, password = auth.split(":") + self.credentials[user] = password + + self.router = APIRouter() + self.app = app + self.queue_lock = queue_lock + api_middleware(self.app) + self.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"], response_model=models.TextToImageResponse) + self.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"], response_model=models.ImageToImageResponse) + self.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=models.ExtrasSingleImageResponse) + self.add_api_route("/sdapi/v1/extra-batch-images", self.extras_batch_images_api, methods=["POST"], response_model=models.ExtrasBatchImagesResponse) + self.add_api_route("/sdapi/v1/png-info", self.pnginfoapi, methods=["POST"], response_model=models.PNGInfoResponse) + self.add_api_route("/sdapi/v1/progress", self.progressapi, methods=["GET"], response_model=models.ProgressResponse) + self.add_api_route("/sdapi/v1/interrogate", self.interrogateapi, methods=["POST"]) + self.add_api_route("/sdapi/v1/interrupt", self.interruptapi, methods=["POST"]) + self.add_api_route("/sdapi/v1/skip", self.skip, methods=["POST"]) + self.add_api_route("/sdapi/v1/options", self.get_config, methods=["GET"], response_model=models.OptionsModel) + self.add_api_route("/sdapi/v1/options", self.set_config, methods=["POST"]) + self.add_api_route("/sdapi/v1/cmd-flags", self.get_cmd_flags, methods=["GET"], response_model=models.FlagsModel) + self.add_api_route("/sdapi/v1/samplers", self.get_samplers, methods=["GET"], response_model=list[models.SamplerItem]) + self.add_api_route("/sdapi/v1/upscalers", self.get_upscalers, methods=["GET"], response_model=list[models.UpscalerItem]) + self.add_api_route("/sdapi/v1/latent-upscale-modes", self.get_latent_upscale_modes, methods=["GET"], response_model=list[models.LatentUpscalerModeItem]) + self.add_api_route("/sdapi/v1/sd-models", self.get_sd_models, methods=["GET"], response_model=list[models.SDModelItem]) + self.add_api_route("/sdapi/v1/sd-vae", self.get_sd_vaes, methods=["GET"], response_model=list[models.SDVaeItem]) + self.add_api_route("/sdapi/v1/hypernetworks", self.get_hypernetworks, methods=["GET"], response_model=list[models.HypernetworkItem]) + self.add_api_route("/sdapi/v1/face-restorers", self.get_face_restorers, methods=["GET"], response_model=list[models.FaceRestorerItem]) + self.add_api_route("/sdapi/v1/realesrgan-models", self.get_realesrgan_models, methods=["GET"], response_model=list[models.RealesrganItem]) + self.add_api_route("/sdapi/v1/prompt-styles", self.get_prompt_styles, methods=["GET"], response_model=list[models.PromptStyleItem]) + self.add_api_route("/sdapi/v1/embeddings", self.get_embeddings, methods=["GET"], response_model=models.EmbeddingsResponse) + self.add_api_route("/sdapi/v1/refresh-embeddings", self.refresh_embeddings, methods=["POST"]) + self.add_api_route("/sdapi/v1/refresh-checkpoints", self.refresh_checkpoints, methods=["POST"]) + self.add_api_route("/sdapi/v1/refresh-vae", self.refresh_vae, methods=["POST"]) + self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=models.CreateResponse) + self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=models.CreateResponse) + self.add_api_route("/sdapi/v1/train/embedding", self.train_embedding, methods=["POST"], response_model=models.TrainResponse) + self.add_api_route("/sdapi/v1/train/hypernetwork", self.train_hypernetwork, methods=["POST"], response_model=models.TrainResponse) + self.add_api_route("/sdapi/v1/memory", self.get_memory, methods=["GET"], response_model=models.MemoryResponse) + self.add_api_route("/sdapi/v1/unload-checkpoint", self.unloadapi, methods=["POST"]) + self.add_api_route("/sdapi/v1/reload-checkpoint", self.reloadapi, methods=["POST"]) + self.add_api_route("/sdapi/v1/scripts", self.get_scripts_list, methods=["GET"], response_model=models.ScriptsList) + self.add_api_route("/sdapi/v1/script-info", self.get_script_info, methods=["GET"], response_model=list[models.ScriptInfo]) + self.add_api_route("/sdapi/v1/extensions", self.get_extensions_list, methods=["GET"], response_model=list[models.ExtensionItem]) + + if shared.cmd_opts.api_server_stop: + self.add_api_route("/sdapi/v1/server-kill", self.kill_webui, methods=["POST"]) + self.add_api_route("/sdapi/v1/server-restart", self.restart_webui, methods=["POST"]) + self.add_api_route("/sdapi/v1/server-stop", self.stop_webui, methods=["POST"]) + + self.default_script_arg_txt2img = [] + self.default_script_arg_img2img = [] + + txt2img_script_runner = scripts.scripts_txt2img + img2img_script_runner = scripts.scripts_img2img + + if not txt2img_script_runner.scripts or not img2img_script_runner.scripts: + ui.create_ui() + + if not txt2img_script_runner.scripts: + txt2img_script_runner.initialize_scripts(False) + if not self.default_script_arg_txt2img: + self.default_script_arg_txt2img = self.init_default_script_args(txt2img_script_runner) + + if not img2img_script_runner.scripts: + img2img_script_runner.initialize_scripts(True) + if not self.default_script_arg_img2img: + self.default_script_arg_img2img = self.init_default_script_args(img2img_script_runner) + + + + def add_api_route(self, path: str, endpoint, **kwargs): + if shared.cmd_opts.api_auth: + return self.app.add_api_route(path, endpoint, dependencies=[Depends(self.auth)], **kwargs) + return self.app.add_api_route(path, endpoint, **kwargs) + + def auth(self, credentials: HTTPBasicCredentials = Depends(HTTPBasic())): + if credentials.username in self.credentials: + if compare_digest(credentials.password, self.credentials[credentials.username]): + return True + + raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"}) + + def get_selectable_script(self, script_name, script_runner): + if script_name is None or script_name == "": + return None, None + + script_idx = script_name_to_index(script_name, script_runner.selectable_scripts) + script = script_runner.selectable_scripts[script_idx] + return script, script_idx + + def get_scripts_list(self): + t2ilist = [script.name for script in scripts.scripts_txt2img.scripts if script.name is not None] + i2ilist = [script.name for script in scripts.scripts_img2img.scripts if script.name is not None] + + return models.ScriptsList(txt2img=t2ilist, img2img=i2ilist) + + def get_script_info(self): + res = [] + + for script_list in [scripts.scripts_txt2img.scripts, scripts.scripts_img2img.scripts]: + res += [script.api_info for script in script_list if script.api_info is not None] + + return res + + def get_script(self, script_name, script_runner): + if script_name is None or script_name == "": + return None, None + + script_idx = script_name_to_index(script_name, script_runner.scripts) + return script_runner.scripts[script_idx] + + def init_default_script_args(self, script_runner): + #find max idx from the scripts in runner and generate a none array to init script_args + last_arg_index = 1 + for script in script_runner.scripts: + if last_arg_index < script.args_to: + last_arg_index = script.args_to + # None everywhere except position 0 to initialize script args + script_args = [None]*last_arg_index + script_args[0] = 0 + + # get default values + with gr.Blocks(): # will throw errors calling ui function without this + for script in script_runner.scripts: + if script.ui(script.is_img2img): + ui_default_values = [] + for elem in script.ui(script.is_img2img): + ui_default_values.append(elem.value) + script_args[script.args_from:script.args_to] = ui_default_values + return script_args + + def init_script_args(self, request, default_script_args, selectable_scripts, selectable_idx, script_runner, *, input_script_args=None): + script_args = default_script_args.copy() + + if input_script_args is not None: + for index, value in input_script_args.items(): + script_args[index] = value + + # position 0 in script_arg is the idx+1 of the selectable script that is going to be run when using scripts.scripts_*2img.run() + if selectable_scripts: + script_args[selectable_scripts.args_from:selectable_scripts.args_to] = request.script_args + script_args[0] = selectable_idx + 1 + + # Now check for always on scripts + if request.alwayson_scripts: + for alwayson_script_name in request.alwayson_scripts.keys(): + alwayson_script = self.get_script(alwayson_script_name, script_runner) + if alwayson_script is None: + raise HTTPException(status_code=422, detail=f"always on script {alwayson_script_name} not found") + # Selectable script in always on script param check + if alwayson_script.alwayson is False: + raise HTTPException(status_code=422, detail="Cannot have a selectable script in the always on scripts params") + # always on script with no arg should always run so you don't really need to add them to the requests + if "args" in request.alwayson_scripts[alwayson_script_name]: + # min between arg length in scriptrunner and arg length in the request + for idx in range(0, min((alwayson_script.args_to - alwayson_script.args_from), len(request.alwayson_scripts[alwayson_script_name]["args"]))): + script_args[alwayson_script.args_from + idx] = request.alwayson_scripts[alwayson_script_name]["args"][idx] + return script_args + + def apply_infotext(self, request, tabname, *, script_runner=None, mentioned_script_args=None): + """Processes `infotext` field from the `request`, and sets other fields of the `request` accoring to what's in infotext. + + If request already has a field set, and that field is encountered in infotext too, the value from infotext is ignored. + + Additionally, fills `mentioned_script_args` dict with index: value pairs for script arguments read from infotext. + """ + + if not request.infotext: + return {} + + possible_fields = infotext_utils.paste_fields[tabname]["fields"] + set_fields = request.model_dump(exclude_unset=True) if hasattr(request, "request") else request.dict(exclude_unset=True) # pydantic v1/v2 have differenrt names for this + params = infotext_utils.parse_generation_parameters(request.infotext) + + def get_field_value(field, params): + value = field.function(params) if field.function else params.get(field.label) + if value is None: + return None + + if field.api in request.__fields__: + target_type = request.__fields__[field.api].type_ + else: + target_type = type(field.component.value) + + if target_type == type(None): + return None + + if isinstance(value, dict) and value.get('__type__') == 'generic_update': # this is a gradio.update rather than a value + value = value.get('value') + + if value is not None and not isinstance(value, target_type): + value = target_type(value) + + return value + + for field in possible_fields: + if not field.api: + continue + + if field.api in set_fields: + continue + + value = get_field_value(field, params) + if value is not None: + setattr(request, field.api, value) + + if request.override_settings is None: + request.override_settings = {} + + overriden_settings = infotext_utils.get_override_settings(params) + for _, setting_name, value in overriden_settings: + if setting_name not in request.override_settings: + request.override_settings[setting_name] = value + + if script_runner is not None and mentioned_script_args is not None: + indexes = {v: i for i, v in enumerate(script_runner.inputs)} + script_fields = ((field, indexes[field.component]) for field in possible_fields if field.component in indexes) + + for field, index in script_fields: + value = get_field_value(field, params) + + if value is None: + continue + + mentioned_script_args[index] = value + + return params + + def text2imgapi(self, txt2imgreq: models.StableDiffusionTxt2ImgProcessingAPI): + task_id = txt2imgreq.force_task_id or create_task_id("txt2img") + + script_runner = scripts.scripts_txt2img + + infotext_script_args = {} + self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) + + selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner) + + populate = txt2imgreq.copy(update={ # Override __init__ params + "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index), + "do_not_save_samples": not txt2imgreq.save_images, + "do_not_save_grid": not txt2imgreq.save_images, + }) + if populate.sampler_name: + populate.sampler_index = None # prevent a warning later on + + args = vars(populate) + args.pop('script_name', None) + args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them + args.pop('alwayson_scripts', None) + args.pop('infotext', None) + + script_args = self.init_script_args(txt2imgreq, self.default_script_arg_txt2img, selectable_scripts, selectable_script_idx, script_runner, input_script_args=infotext_script_args) + + send_images = args.pop('send_images', True) + args.pop('save_images', None) + + add_task_to_queue(task_id) + + with self.queue_lock: + with closing(StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **args)) as p: + p.is_api = True + p.scripts = script_runner + p.outpath_grids = opts.outdir_txt2img_grids + p.outpath_samples = opts.outdir_txt2img_samples + + try: + shared.state.begin(job="scripts_txt2img") + start_task(task_id) + if selectable_scripts is not None: + p.script_args = script_args + processed = scripts.scripts_txt2img.run(p, *p.script_args) # Need to pass args as list here + else: + p.script_args = tuple(script_args) # Need to pass args as tuple here + processed = process_images(p) + finish_task(task_id) + finally: + shared.state.end() + shared.total_tqdm.clear() + + b64images = [ + encode_pil_to_base64(image) + for image in itertools.chain(processed.images, processed.extra_images) + if send_images + ] + + return models.TextToImageResponse(images=b64images, parameters=vars(txt2imgreq), info=processed.js()) + + def img2imgapi(self, img2imgreq: models.StableDiffusionImg2ImgProcessingAPI): + task_id = img2imgreq.force_task_id or create_task_id("img2img") + + init_images = img2imgreq.init_images + if init_images is None: + raise HTTPException(status_code=404, detail="Init image not found") + + mask = img2imgreq.mask + if mask: + mask = decode_base64_to_image(mask) + + script_runner = scripts.scripts_img2img + + infotext_script_args = {} + self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) + + selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner) + + populate = img2imgreq.copy(update={ # Override __init__ params + "sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index), + "do_not_save_samples": not img2imgreq.save_images, + "do_not_save_grid": not img2imgreq.save_images, + "mask": mask, + }) + if populate.sampler_name: + populate.sampler_index = None # prevent a warning later on + + args = vars(populate) + args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine. + args.pop('script_name', None) + args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them + args.pop('alwayson_scripts', None) + args.pop('infotext', None) + + script_args = self.init_script_args(img2imgreq, self.default_script_arg_img2img, selectable_scripts, selectable_script_idx, script_runner, input_script_args=infotext_script_args) + + send_images = args.pop('send_images', True) + args.pop('save_images', None) + + add_task_to_queue(task_id) + + with self.queue_lock: + with closing(StableDiffusionProcessingImg2Img(sd_model=shared.sd_model, **args)) as p: + p.init_images = [decode_base64_to_image(x) for x in init_images] + p.is_api = True + p.scripts = script_runner + p.outpath_grids = opts.outdir_img2img_grids + p.outpath_samples = opts.outdir_img2img_samples + + try: + shared.state.begin(job="scripts_img2img") + start_task(task_id) + if selectable_scripts is not None: + p.script_args = script_args + processed = scripts.scripts_img2img.run(p, *p.script_args) # Need to pass args as list here + else: + p.script_args = tuple(script_args) # Need to pass args as tuple here + processed = process_images(p) + finish_task(task_id) + finally: + shared.state.end() + shared.total_tqdm.clear() + + b64images = [ + encode_pil_to_base64(image) + for image in itertools.chain(processed.images, processed.extra_images) + if send_images + ] + + if not img2imgreq.include_init_images: + img2imgreq.init_images = None + img2imgreq.mask = None + + return models.ImageToImageResponse(images=b64images, parameters=vars(img2imgreq), info=processed.js()) + + def extras_single_image_api(self, req: models.ExtrasSingleImageRequest): + reqDict = setUpscalers(req) + + reqDict['image'] = decode_base64_to_image(reqDict['image']) + + with self.queue_lock: + result = postprocessing.run_extras(extras_mode=0, image_folder="", input_dir="", output_dir="", save_output=False, **reqDict) + + return models.ExtrasSingleImageResponse(image=encode_pil_to_base64(result[0][0]), html_info=result[1]) + + def extras_batch_images_api(self, req: models.ExtrasBatchImagesRequest): + reqDict = setUpscalers(req) + + image_list = reqDict.pop('imageList', []) + image_folder = [decode_base64_to_image(x.data) for x in image_list] + + with self.queue_lock: + result = postprocessing.run_extras(extras_mode=1, image_folder=image_folder, image="", input_dir="", output_dir="", save_output=False, **reqDict) + + return models.ExtrasBatchImagesResponse(images=list(map(encode_pil_to_base64, result[0])), html_info=result[1]) + + def pnginfoapi(self, req: models.PNGInfoRequest): + image = decode_base64_to_image(req.image.strip()) + if image is None: + return models.PNGInfoResponse(info="") + + geninfo, items = images.read_info_from_image(image) + if geninfo is None: + geninfo = "" + + params = infotext_utils.parse_generation_parameters(geninfo) + script_callbacks.infotext_pasted_callback(geninfo, params) + + return models.PNGInfoResponse(info=geninfo, items=items, parameters=params) + + def progressapi(self, req: models.ProgressRequest = Depends()): + # copy from check_progress_call of ui.py + + if shared.state.job_count == 0: + return models.ProgressResponse(progress=0, eta_relative=0, state=shared.state.dict(), textinfo=shared.state.textinfo) + + # avoid dividing zero + progress = 0.01 + + if shared.state.job_count > 0: + progress += shared.state.job_no / shared.state.job_count + if shared.state.sampling_steps > 0: + progress += 1 / shared.state.job_count * shared.state.sampling_step / shared.state.sampling_steps + + time_since_start = time.time() - shared.state.time_start + eta = (time_since_start/progress) + eta_relative = eta-time_since_start + + progress = min(progress, 1) + + shared.state.set_current_image() + + current_image = None + if shared.state.current_image and not req.skip_current_image: + current_image = encode_pil_to_base64(shared.state.current_image) + + return models.ProgressResponse(progress=progress, eta_relative=eta_relative, state=shared.state.dict(), current_image=current_image, textinfo=shared.state.textinfo, current_task=current_task) + + def interrogateapi(self, interrogatereq: models.InterrogateRequest): + image_b64 = interrogatereq.image + if image_b64 is None: + raise HTTPException(status_code=404, detail="Image not found") + + img = decode_base64_to_image(image_b64) + img = img.convert('RGB') + + # Override object param + with self.queue_lock: + if interrogatereq.model == "clip": + processed = shared.interrogator.interrogate(img) + elif interrogatereq.model == "deepdanbooru": + processed = deepbooru.model.tag(img) + else: + raise HTTPException(status_code=404, detail="Model not found") + + return models.InterrogateResponse(caption=processed) + + def interruptapi(self): + shared.state.interrupt() + + return {} + + def unloadapi(self): + sd_models.unload_model_weights() + + return {} + + def reloadapi(self): + sd_models.send_model_to_device(shared.sd_model) + + return {} + + def skip(self): + shared.state.skip() + + def get_config(self): + options = {} + for key in shared.opts.data.keys(): + metadata = shared.opts.data_labels.get(key) + if(metadata is not None): + options.update({key: shared.opts.data.get(key, shared.opts.data_labels.get(key).default)}) + else: + options.update({key: shared.opts.data.get(key, None)}) + + return options + + def set_config(self, req: dict[str, Any]): + checkpoint_name = req.get("sd_model_checkpoint", None) + if checkpoint_name is not None and checkpoint_name not in sd_models.checkpoint_aliases: + raise RuntimeError(f"model {checkpoint_name!r} not found") + + for k, v in req.items(): + shared.opts.set(k, v, is_api=True) + + shared.opts.save(shared.config_filename) + return + + def get_cmd_flags(self): + return vars(shared.cmd_opts) + + def get_samplers(self): + return [{"name": sampler[0], "aliases":sampler[2], "options":sampler[3]} for sampler in sd_samplers.all_samplers] + + def get_upscalers(self): + return [ + { + "name": upscaler.name, + "model_name": upscaler.scaler.model_name, + "model_path": upscaler.data_path, + "model_url": None, + "scale": upscaler.scale, + } + for upscaler in shared.sd_upscalers + ] + + def get_latent_upscale_modes(self): + return [ + { + "name": upscale_mode, + } + for upscale_mode in [*(shared.latent_upscale_modes or {})] + ] + + def get_sd_models(self): + import modules.sd_models as sd_models + return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config_near_filename(x)} for x in sd_models.checkpoints_list.values()] + + def get_sd_vaes(self): + import modules.sd_vae as sd_vae + return [{"model_name": x, "filename": sd_vae.vae_dict[x]} for x in sd_vae.vae_dict.keys()] + + def get_hypernetworks(self): + return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks] + + def get_face_restorers(self): + return [{"name":x.name(), "cmd_dir": getattr(x, "cmd_dir", None)} for x in shared.face_restorers] + + def get_realesrgan_models(self): + return [{"name":x.name,"path":x.data_path, "scale":x.scale} for x in get_realesrgan_models(None)] + + def get_prompt_styles(self): + styleList = [] + for k in shared.prompt_styles.styles: + style = shared.prompt_styles.styles[k] + styleList.append({"name":style[0], "prompt": style[1], "negative_prompt": style[2]}) + + return styleList + + def get_embeddings(self): + db = sd_hijack.model_hijack.embedding_db + + def convert_embedding(embedding): + return { + "step": embedding.step, + "sd_checkpoint": embedding.sd_checkpoint, + "sd_checkpoint_name": embedding.sd_checkpoint_name, + "shape": embedding.shape, + "vectors": embedding.vectors, + } + + def convert_embeddings(embeddings): + return {embedding.name: convert_embedding(embedding) for embedding in embeddings.values()} + + return { + "loaded": convert_embeddings(db.word_embeddings), + "skipped": convert_embeddings(db.skipped_embeddings), + } + + def refresh_embeddings(self): + with self.queue_lock: + sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings(force_reload=True) + + def refresh_checkpoints(self): + with self.queue_lock: + shared.refresh_checkpoints() + + def refresh_vae(self): + with self.queue_lock: + shared_items.refresh_vae_list() + + def create_embedding(self, args: dict): + try: + shared.state.begin(job="create_embedding") + filename = create_embedding(**args) # create empty embedding + sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() # reload embeddings so new one can be immediately used + return models.CreateResponse(info=f"create embedding filename: {filename}") + except AssertionError as e: + return models.TrainResponse(info=f"create embedding error: {e}") + finally: + shared.state.end() + + + def create_hypernetwork(self, args: dict): + try: + shared.state.begin(job="create_hypernetwork") + filename = create_hypernetwork(**args) # create empty embedding + return models.CreateResponse(info=f"create hypernetwork filename: {filename}") + except AssertionError as e: + return models.TrainResponse(info=f"create hypernetwork error: {e}") + finally: + shared.state.end() + + def train_embedding(self, args: dict): + try: + shared.state.begin(job="train_embedding") + apply_optimizations = shared.opts.training_xattention_optimizations + error = None + filename = '' + if not apply_optimizations: + sd_hijack.undo_optimizations() + try: + embedding, filename = train_embedding(**args) # can take a long time to complete + except Exception as e: + error = e + finally: + if not apply_optimizations: + sd_hijack.apply_optimizations() + return models.TrainResponse(info=f"train embedding complete: filename: {filename} error: {error}") + except Exception as msg: + return models.TrainResponse(info=f"train embedding error: {msg}") + finally: + shared.state.end() + + def train_hypernetwork(self, args: dict): + try: + shared.state.begin(job="train_hypernetwork") + shared.loaded_hypernetworks = [] + apply_optimizations = shared.opts.training_xattention_optimizations + error = None + filename = '' + if not apply_optimizations: + sd_hijack.undo_optimizations() + try: + hypernetwork, filename = train_hypernetwork(**args) + except Exception as e: + error = e + finally: + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + if not apply_optimizations: + sd_hijack.apply_optimizations() + shared.state.end() + return models.TrainResponse(info=f"train embedding complete: filename: {filename} error: {error}") + except Exception as exc: + return models.TrainResponse(info=f"train embedding error: {exc}") + finally: + shared.state.end() + + def get_memory(self): + try: + import os + import psutil + process = psutil.Process(os.getpid()) + res = process.memory_info() # only rss is cross-platform guaranteed so we dont rely on other values + ram_total = 100 * res.rss / process.memory_percent() # and total memory is calculated as actual value is not cross-platform safe + ram = { 'free': ram_total - res.rss, 'used': res.rss, 'total': ram_total } + except Exception as err: + ram = { 'error': f'{err}' } + try: + import torch + if torch.cuda.is_available(): + s = torch.cuda.mem_get_info() + system = { 'free': s[0], 'used': s[1] - s[0], 'total': s[1] } + s = dict(torch.cuda.memory_stats(shared.device)) + allocated = { 'current': s['allocated_bytes.all.current'], 'peak': s['allocated_bytes.all.peak'] } + reserved = { 'current': s['reserved_bytes.all.current'], 'peak': s['reserved_bytes.all.peak'] } + active = { 'current': s['active_bytes.all.current'], 'peak': s['active_bytes.all.peak'] } + inactive = { 'current': s['inactive_split_bytes.all.current'], 'peak': s['inactive_split_bytes.all.peak'] } + warnings = { 'retries': s['num_alloc_retries'], 'oom': s['num_ooms'] } + cuda = { + 'system': system, + 'active': active, + 'allocated': allocated, + 'reserved': reserved, + 'inactive': inactive, + 'events': warnings, + } + else: + cuda = {'error': 'unavailable'} + except Exception as err: + cuda = {'error': f'{err}'} + return models.MemoryResponse(ram=ram, cuda=cuda) + + def get_extensions_list(self): + from modules import extensions + extensions.list_extensions() + ext_list = [] + for ext in extensions.extensions: + ext: extensions.Extension + ext.read_info_from_repo() + if ext.remote is not None: + ext_list.append({ + "name": ext.name, + "remote": ext.remote, + "branch": ext.branch, + "commit_hash":ext.commit_hash, + "commit_date":ext.commit_date, + "version":ext.version, + "enabled":ext.enabled + }) + return ext_list + + def launch(self, server_name, port, root_path): + self.app.include_router(self.router) + uvicorn.run( + self.app, + host=server_name, + port=port, + timeout_keep_alive=shared.cmd_opts.timeout_keep_alive, + root_path=root_path, + ssl_keyfile=shared.cmd_opts.tls_keyfile, + ssl_certfile=shared.cmd_opts.tls_certfile + ) + + def kill_webui(self): + restart.stop_program() + + def restart_webui(self): + if restart.is_restartable(): + restart.restart_program() + return Response(status_code=501) + + def stop_webui(request): + shared.state.server_command = "stop" + return Response("Stopping.") + diff --git a/modules/api/models.py b/modules/api/models.py new file mode 100644 index 0000000000000000000000000000000000000000..16edf11cf83ebfda679455040e691d4df96e9d8b --- /dev/null +++ b/modules/api/models.py @@ -0,0 +1,322 @@ +import inspect + +from pydantic import BaseModel, Field, create_model +from typing import Any, Optional, Literal +from inflection import underscore +from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img +from modules.shared import sd_upscalers, opts, parser + +API_NOT_ALLOWED = [ + "self", + "kwargs", + "sd_model", + "outpath_samples", + "outpath_grids", + "sampler_index", + # "do_not_save_samples", + # "do_not_save_grid", + "extra_generation_params", + "overlay_images", + "do_not_reload_embeddings", + "seed_enable_extras", + "prompt_for_display", + "sampler_noise_scheduler_override", + "ddim_discretize" +] + +class ModelDef(BaseModel): + """Assistance Class for Pydantic Dynamic Model Generation""" + + field: str + field_alias: str + field_type: Any + field_value: Any + field_exclude: bool = False + + +class PydanticModelGenerator: + """ + Takes in created classes and stubs them out in a way FastAPI/Pydantic is happy about: + source_data is a snapshot of the default values produced by the class + params are the names of the actual keys required by __init__ + """ + + def __init__( + self, + model_name: str = None, + class_instance = None, + additional_fields = None, + ): + def field_type_generator(k, v): + field_type = v.annotation + + if field_type == 'Image': + # images are sent as base64 strings via API + field_type = 'str' + + return Optional[field_type] + + def merge_class_params(class_): + all_classes = list(filter(lambda x: x is not object, inspect.getmro(class_))) + parameters = {} + for classes in all_classes: + parameters = {**parameters, **inspect.signature(classes.__init__).parameters} + return parameters + + self._model_name = model_name + self._class_data = merge_class_params(class_instance) + + self._model_def = [ + ModelDef( + field=underscore(k), + field_alias=k, + field_type=field_type_generator(k, v), + field_value=None if isinstance(v.default, property) else v.default + ) + for (k,v) in self._class_data.items() if k not in API_NOT_ALLOWED + ] + + for fields in additional_fields: + self._model_def.append(ModelDef( + field=underscore(fields["key"]), + field_alias=fields["key"], + field_type=fields["type"], + field_value=fields["default"], + field_exclude=fields["exclude"] if "exclude" in fields else False)) + + def generate_model(self): + """ + Creates a pydantic BaseModel + from the json and overrides provided at initialization + """ + fields = { + d.field: (d.field_type, Field(default=d.field_value, alias=d.field_alias, exclude=d.field_exclude)) for d in self._model_def + } + DynamicModel = create_model(self._model_name, **fields) + DynamicModel.__config__.allow_population_by_field_name = True + DynamicModel.__config__.allow_mutation = True + return DynamicModel + +StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator( + "StableDiffusionProcessingTxt2Img", + StableDiffusionProcessingTxt2Img, + [ + {"key": "sampler_index", "type": str, "default": "Euler"}, + {"key": "script_name", "type": str, "default": None}, + {"key": "script_args", "type": list, "default": []}, + {"key": "send_images", "type": bool, "default": True}, + {"key": "save_images", "type": bool, "default": False}, + {"key": "alwayson_scripts", "type": dict, "default": {}}, + {"key": "force_task_id", "type": str, "default": None}, + {"key": "infotext", "type": str, "default": None}, + ] +).generate_model() + +StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator( + "StableDiffusionProcessingImg2Img", + StableDiffusionProcessingImg2Img, + [ + {"key": "sampler_index", "type": str, "default": "Euler"}, + {"key": "init_images", "type": list, "default": None}, + {"key": "denoising_strength", "type": float, "default": 0.75}, + {"key": "mask", "type": str, "default": None}, + {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}, + {"key": "script_name", "type": str, "default": None}, + {"key": "script_args", "type": list, "default": []}, + {"key": "send_images", "type": bool, "default": True}, + {"key": "save_images", "type": bool, "default": False}, + {"key": "alwayson_scripts", "type": dict, "default": {}}, + {"key": "force_task_id", "type": str, "default": None}, + {"key": "infotext", "type": str, "default": None}, + ] +).generate_model() + +class TextToImageResponse(BaseModel): + images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + parameters: dict + info: str + +class ImageToImageResponse(BaseModel): + images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + parameters: dict + info: str + +class ExtrasBaseRequest(BaseModel): + resize_mode: Literal[0, 1] = Field(default=0, title="Resize Mode", description="Sets the resize mode: 0 to upscale by upscaling_resize amount, 1 to upscale up to upscaling_resize_h x upscaling_resize_w.") + show_extras_results: bool = Field(default=True, title="Show results", description="Should the backend return the generated image?") + gfpgan_visibility: float = Field(default=0, title="GFPGAN Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of GFPGAN, values should be between 0 and 1.") + codeformer_visibility: float = Field(default=0, title="CodeFormer Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of CodeFormer, values should be between 0 and 1.") + codeformer_weight: float = Field(default=0, title="CodeFormer Weight", ge=0, le=1, allow_inf_nan=False, description="Sets the weight of CodeFormer, values should be between 0 and 1.") + upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=8, description="By how much to upscale the image, only used when resize_mode=0.") + upscaling_resize_w: int = Field(default=512, title="Target Width", ge=1, description="Target width for the upscaler to hit. Only used when resize_mode=1.") + upscaling_resize_h: int = Field(default=512, title="Target Height", ge=1, description="Target height for the upscaler to hit. Only used when resize_mode=1.") + upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the chosen size?") + upscaler_1: str = Field(default="None", title="Main upscaler", description=f"The name of the main upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}") + upscaler_2: str = Field(default="None", title="Secondary upscaler", description=f"The name of the secondary upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}") + extras_upscaler_2_visibility: float = Field(default=0, title="Secondary upscaler visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of secondary upscaler, values should be between 0 and 1.") + upscale_first: bool = Field(default=False, title="Upscale first", description="Should the upscaler run before restoring faces?") + +class ExtraBaseResponse(BaseModel): + html_info: str = Field(title="HTML info", description="A series of HTML tags containing the process info.") + +class ExtrasSingleImageRequest(ExtrasBaseRequest): + image: str = Field(default="", title="Image", description="Image to work on, must be a Base64 string containing the image's data.") + +class ExtrasSingleImageResponse(ExtraBaseResponse): + image: str = Field(default=None, title="Image", description="The generated image in base64 format.") + +class FileData(BaseModel): + data: str = Field(title="File data", description="Base64 representation of the file") + name: str = Field(title="File name") + +class ExtrasBatchImagesRequest(ExtrasBaseRequest): + imageList: list[FileData] = Field(title="Images", description="List of images to work on. Must be Base64 strings") + +class ExtrasBatchImagesResponse(ExtraBaseResponse): + images: list[str] = Field(title="Images", description="The generated images in base64 format.") + +class PNGInfoRequest(BaseModel): + image: str = Field(title="Image", description="The base64 encoded PNG image") + +class PNGInfoResponse(BaseModel): + info: str = Field(title="Image info", description="A string with the parameters used to generate the image") + items: dict = Field(title="Items", description="A dictionary containing all the other fields the image had") + parameters: dict = Field(title="Parameters", description="A dictionary with parsed generation info fields") + +class ProgressRequest(BaseModel): + skip_current_image: bool = Field(default=False, title="Skip current image", description="Skip current image serialization") + +class ProgressResponse(BaseModel): + progress: float = Field(title="Progress", description="The progress with a range of 0 to 1") + eta_relative: float = Field(title="ETA in secs") + state: dict = Field(title="State", description="The current state snapshot") + current_image: str = Field(default=None, title="Current image", description="The current image in base64 format. opts.show_progress_every_n_steps is required for this to work.") + textinfo: str = Field(default=None, title="Info text", description="Info text used by WebUI.") + +class InterrogateRequest(BaseModel): + image: str = Field(default="", title="Image", description="Image to work on, must be a Base64 string containing the image's data.") + model: str = Field(default="clip", title="Model", description="The interrogate model used.") + +class InterrogateResponse(BaseModel): + caption: str = Field(default=None, title="Caption", description="The generated caption for the image.") + +class TrainResponse(BaseModel): + info: str = Field(title="Train info", description="Response string from train embedding or hypernetwork task.") + +class CreateResponse(BaseModel): + info: str = Field(title="Create info", description="Response string from create embedding or hypernetwork task.") + +fields = {} +for key, metadata in opts.data_labels.items(): + value = opts.data.get(key) + optType = opts.typemap.get(type(metadata.default), type(metadata.default)) if metadata.default else Any + + if metadata is not None: + fields.update({key: (Optional[optType], Field(default=metadata.default, description=metadata.label))}) + else: + fields.update({key: (Optional[optType], Field())}) + +OptionsModel = create_model("Options", **fields) + +flags = {} +_options = vars(parser)['_option_string_actions'] +for key in _options: + if(_options[key].dest != 'help'): + flag = _options[key] + _type = str + if _options[key].default is not None: + _type = type(_options[key].default) + flags.update({flag.dest: (_type, Field(default=flag.default, description=flag.help))}) + +FlagsModel = create_model("Flags", **flags) + +class SamplerItem(BaseModel): + name: str = Field(title="Name") + aliases: list[str] = Field(title="Aliases") + options: dict[str, str] = Field(title="Options") + +class UpscalerItem(BaseModel): + name: str = Field(title="Name") + model_name: Optional[str] = Field(title="Model Name") + model_path: Optional[str] = Field(title="Path") + model_url: Optional[str] = Field(title="URL") + scale: Optional[float] = Field(title="Scale") + +class LatentUpscalerModeItem(BaseModel): + name: str = Field(title="Name") + +class SDModelItem(BaseModel): + title: str = Field(title="Title") + model_name: str = Field(title="Model Name") + hash: Optional[str] = Field(title="Short hash") + sha256: Optional[str] = Field(title="sha256 hash") + filename: str = Field(title="Filename") + config: Optional[str] = Field(title="Config file") + +class SDVaeItem(BaseModel): + model_name: str = Field(title="Model Name") + filename: str = Field(title="Filename") + +class HypernetworkItem(BaseModel): + name: str = Field(title="Name") + path: Optional[str] = Field(title="Path") + +class FaceRestorerItem(BaseModel): + name: str = Field(title="Name") + cmd_dir: Optional[str] = Field(title="Path") + +class RealesrganItem(BaseModel): + name: str = Field(title="Name") + path: Optional[str] = Field(title="Path") + scale: Optional[int] = Field(title="Scale") + +class PromptStyleItem(BaseModel): + name: str = Field(title="Name") + prompt: Optional[str] = Field(title="Prompt") + negative_prompt: Optional[str] = Field(title="Negative Prompt") + + +class EmbeddingItem(BaseModel): + step: Optional[int] = Field(title="Step", description="The number of steps that were used to train this embedding, if available") + sd_checkpoint: Optional[str] = Field(title="SD Checkpoint", description="The hash of the checkpoint this embedding was trained on, if available") + sd_checkpoint_name: Optional[str] = Field(title="SD Checkpoint Name", description="The name of the checkpoint this embedding was trained on, if available. Note that this is the name that was used by the trainer; for a stable identifier, use `sd_checkpoint` instead") + shape: int = Field(title="Shape", description="The length of each individual vector in the embedding") + vectors: int = Field(title="Vectors", description="The number of vectors in the embedding") + +class EmbeddingsResponse(BaseModel): + loaded: dict[str, EmbeddingItem] = Field(title="Loaded", description="Embeddings loaded for the current model") + skipped: dict[str, EmbeddingItem] = Field(title="Skipped", description="Embeddings skipped for the current model (likely due to architecture incompatibility)") + +class MemoryResponse(BaseModel): + ram: dict = Field(title="RAM", description="System memory stats") + cuda: dict = Field(title="CUDA", description="nVidia CUDA memory stats") + + +class ScriptsList(BaseModel): + txt2img: list = Field(default=None, title="Txt2img", description="Titles of scripts (txt2img)") + img2img: list = Field(default=None, title="Img2img", description="Titles of scripts (img2img)") + + +class ScriptArg(BaseModel): + label: str = Field(default=None, title="Label", description="Name of the argument in UI") + value: Optional[Any] = Field(default=None, title="Value", description="Default value of the argument") + minimum: Optional[Any] = Field(default=None, title="Minimum", description="Minimum allowed value for the argumentin UI") + maximum: Optional[Any] = Field(default=None, title="Minimum", description="Maximum allowed value for the argumentin UI") + step: Optional[Any] = Field(default=None, title="Minimum", description="Step for changing value of the argumentin UI") + choices: Optional[list[str]] = Field(default=None, title="Choices", description="Possible values for the argument") + + +class ScriptInfo(BaseModel): + name: str = Field(default=None, title="Name", description="Script name") + is_alwayson: bool = Field(default=None, title="IsAlwayson", description="Flag specifying whether this script is an alwayson script") + is_img2img: bool = Field(default=None, title="IsImg2img", description="Flag specifying whether this script is an img2img script") + args: list[ScriptArg] = Field(title="Arguments", description="List of script's arguments") + +class ExtensionItem(BaseModel): + name: str = Field(title="Name", description="Extension name") + remote: str = Field(title="Remote", description="Extension Repository URL") + branch: str = Field(title="Branch", description="Extension Repository Branch") + commit_hash: str = Field(title="Commit Hash", description="Extension Repository Commit Hash") + version: str = Field(title="Version", description="Extension Version") + commit_date: str = Field(title="Commit Date", description="Extension Repository Commit Date") + enabled: bool = Field(title="Enabled", description="Flag specifying whether this extension is enabled") diff --git a/modules/cache.py b/modules/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..1796ae72725332ee23f6d468591f6b60949352d4 --- /dev/null +++ b/modules/cache.py @@ -0,0 +1,123 @@ +import json +import os +import os.path +import threading +import time + +from modules.paths import data_path, script_path + +cache_filename = os.environ.get('SD_WEBUI_CACHE_FILE', os.path.join(data_path, "cache.json")) +cache_data = None +cache_lock = threading.Lock() + +dump_cache_after = None +dump_cache_thread = None + + +def dump_cache(): + """ + Marks cache for writing to disk. 5 seconds after no one else flags the cache for writing, it is written. + """ + + global dump_cache_after + global dump_cache_thread + + def thread_func(): + global dump_cache_after + global dump_cache_thread + + while dump_cache_after is not None and time.time() < dump_cache_after: + time.sleep(1) + + with cache_lock: + cache_filename_tmp = cache_filename + "-" + with open(cache_filename_tmp, "w", encoding="utf8") as file: + json.dump(cache_data, file, indent=4, ensure_ascii=False) + + os.replace(cache_filename_tmp, cache_filename) + + dump_cache_after = None + dump_cache_thread = None + + with cache_lock: + dump_cache_after = time.time() + 5 + if dump_cache_thread is None: + dump_cache_thread = threading.Thread(name='cache-writer', target=thread_func) + dump_cache_thread.start() + + +def cache(subsection): + """ + Retrieves or initializes a cache for a specific subsection. + + Parameters: + subsection (str): The subsection identifier for the cache. + + Returns: + dict: The cache data for the specified subsection. + """ + + global cache_data + + if cache_data is None: + with cache_lock: + if cache_data is None: + try: + with open(cache_filename, "r", encoding="utf8") as file: + cache_data = json.load(file) + except FileNotFoundError: + cache_data = {} + except Exception: + os.replace(cache_filename, os.path.join(script_path, "tmp", "cache.json")) + print('[ERROR] issue occurred while trying to read cache.json, move current cache to tmp/cache.json and create new cache') + cache_data = {} + + s = cache_data.get(subsection, {}) + cache_data[subsection] = s + + return s + + +def cached_data_for_file(subsection, title, filename, func): + """ + Retrieves or generates data for a specific file, using a caching mechanism. + + Parameters: + subsection (str): The subsection of the cache to use. + title (str): The title of the data entry in the subsection of the cache. + filename (str): The path to the file to be checked for modifications. + func (callable): A function that generates the data if it is not available in the cache. + + Returns: + dict or None: The cached or generated data, or None if data generation fails. + + The `cached_data_for_file` function implements a caching mechanism for data stored in files. + It checks if the data associated with the given `title` is present in the cache and compares the + modification time of the file with the cached modification time. If the file has been modified, + the cache is considered invalid and the data is regenerated using the provided `func`. + Otherwise, the cached data is returned. + + If the data generation fails, None is returned to indicate the failure. Otherwise, the generated + or cached data is returned as a dictionary. + """ + + existing_cache = cache(subsection) + ondisk_mtime = os.path.getmtime(filename) + + entry = existing_cache.get(title) + if entry: + cached_mtime = entry.get("mtime", 0) + if ondisk_mtime > cached_mtime: + entry = None + + if not entry or 'value' not in entry: + value = func() + if value is None: + return None + + entry = {'mtime': ondisk_mtime, 'value': value} + existing_cache[title] = entry + + dump_cache() + + return entry['value'] diff --git a/modules/call_queue.py b/modules/call_queue.py new file mode 100644 index 0000000000000000000000000000000000000000..7f7d07ceacf280a022ee69240c26764ee0f72d8d --- /dev/null +++ b/modules/call_queue.py @@ -0,0 +1,119 @@ +from functools import wraps +import html +import time + +from modules import shared, progress, errors, devices, fifo_lock + +queue_lock = fifo_lock.FIFOLock() + + +def wrap_queued_call(func): + def f(*args, **kwargs): + with queue_lock: + res = func(*args, **kwargs) + + return res + + return f + + +def wrap_gradio_gpu_call(func, extra_outputs=None): + @wraps(func) + def f(*args, **kwargs): + + # if the first argument is a string that says "task(...)", it is treated as a job id + if args and type(args[0]) == str and args[0].startswith("task(") and args[0].endswith(")"): + id_task = args[0] + progress.add_task_to_queue(id_task) + else: + id_task = None + + with queue_lock: + shared.state.begin(job=id_task) + progress.start_task(id_task) + + try: + res = func(*args, **kwargs) + progress.record_results(id_task, res) + finally: + progress.finish_task(id_task) + + shared.state.end() + + return res + + return wrap_gradio_call(f, extra_outputs=extra_outputs, add_stats=True) + + +def wrap_gradio_call(func, extra_outputs=None, add_stats=False): + @wraps(func) + def f(*args, extra_outputs_array=extra_outputs, **kwargs): + run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats + if run_memmon: + shared.mem_mon.monitor() + t = time.perf_counter() + + try: + res = list(func(*args, **kwargs)) + except Exception as e: + # When printing out our debug argument list, + # do not print out more than a 100 KB of text + max_debug_str_len = 131072 + message = "Error completing request" + arg_str = f"Arguments: {args} {kwargs}"[:max_debug_str_len] + if len(arg_str) > max_debug_str_len: + arg_str += f" (Argument list truncated at {max_debug_str_len}/{len(arg_str)} characters)" + errors.report(f"{message}\n{arg_str}", exc_info=True) + + shared.state.job = "" + shared.state.job_count = 0 + + if extra_outputs_array is None: + extra_outputs_array = [None, ''] + + error_message = f'{type(e).__name__}: {e}' + res = extra_outputs_array + [f"
    {html.escape(error_message)}
    "] + + devices.torch_gc() + + shared.state.skipped = False + shared.state.interrupted = False + shared.state.stopping_generation = False + shared.state.job_count = 0 + + if not add_stats: + return tuple(res) + + elapsed = time.perf_counter() - t + elapsed_m = int(elapsed // 60) + elapsed_s = elapsed % 60 + elapsed_text = f"{elapsed_s:.1f} sec." + if elapsed_m > 0: + elapsed_text = f"{elapsed_m} min. "+elapsed_text + + if run_memmon: + mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()} + active_peak = mem_stats['active_peak'] + reserved_peak = mem_stats['reserved_peak'] + sys_peak = mem_stats['system_peak'] + sys_total = mem_stats['total'] + sys_pct = sys_peak/max(sys_total, 1) * 100 + + toltip_a = "Active: peak amount of video memory used during generation (excluding cached data)" + toltip_r = "Reserved: total amout of video memory allocated by the Torch library " + toltip_sys = "System: peak amout of video memory allocated by all running programs, out of total capacity" + + text_a = f"A: {active_peak/1024:.2f} GB" + text_r = f"R: {reserved_peak/1024:.2f} GB" + text_sys = f"Sys: {sys_peak/1024:.1f}/{sys_total/1024:g} GB ({sys_pct:.1f}%)" + + vram_html = f"

    {text_a}, {text_r}, {text_sys}

    " + else: + vram_html = '' + + # last item is always HTML + res[-1] += f"

    Time taken: {elapsed_text}

    {vram_html}
    " + + return tuple(res) + + return f diff --git a/modules/cmd_args.py b/modules/cmd_args.py new file mode 100644 index 0000000000000000000000000000000000000000..e1723816a17c3d795e4134e35a61336892cc904c --- /dev/null +++ b/modules/cmd_args.py @@ -0,0 +1,145 @@ +import argparse +import json +import os +from modules.paths_internal import normalized_filepath, models_path, script_path, data_path, extensions_dir, extensions_builtin_dir, sd_default_config, sd_model_file # noqa: F401 +from pathlib import Path +from ldm_patched.modules import args_parser + +parser = args_parser.parser + +parser.add_argument("-f", action='store_true', help=argparse.SUPPRESS) # allows running as root; implemented outside of webui +parser.add_argument("--update-all-extensions", action='store_true', help="launch.py argument: download updates for all extensions when starting the program") +parser.add_argument("--skip-python-version-check", action='store_true', help="launch.py argument: do not check python version") +parser.add_argument("--skip-torch-cuda-test", action='store_true', help="launch.py argument: do not check if CUDA is able to work properly") +parser.add_argument("--reinstall-xformers", action='store_true', help="launch.py argument: install the appropriate version of xformers even if you have some version already installed") +parser.add_argument("--reinstall-torch", action='store_true', help="launch.py argument: install the appropriate version of torch even if you have some version already installed") +parser.add_argument("--update-check", action='store_true', help="launch.py argument: check for updates at startup") +parser.add_argument("--test-server", action='store_true', help="launch.py argument: configure server for testing") +parser.add_argument("--log-startup", action='store_true', help="launch.py argument: print a detailed log of what's happening at startup") +parser.add_argument("--skip-prepare-environment", action='store_true', help="launch.py argument: skip all environment preparation") +parser.add_argument("--skip-install", action='store_true', help="launch.py argument: skip installation of packages") +parser.add_argument("--dump-sysinfo", action='store_true', help="launch.py argument: dump limited sysinfo file (without information about extensions, options) to disk and quit") +parser.add_argument("--loglevel", type=str, help="log level; one of: CRITICAL, ERROR, WARNING, INFO, DEBUG", default=None) +parser.add_argument("--do-not-download-clip", action='store_true', help="do not download CLIP model even if it's not included in the checkpoint") +parser.add_argument("--data-dir", type=normalized_filepath, default=os.path.dirname(os.path.dirname(os.path.realpath(__file__))), help="base path where all user data is stored") +parser.add_argument("--config", type=normalized_filepath, default=sd_default_config, help="path to config which constructs model",) +parser.add_argument("--ckpt", type=normalized_filepath, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",) +parser.add_argument("--ckpt-dir", type=normalized_filepath, default=None, help="Path to directory with stable diffusion checkpoints") +parser.add_argument("--vae-dir", type=normalized_filepath, default=None, help="Path to directory with VAE files") +parser.add_argument("--gfpgan-dir", type=normalized_filepath, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN')) +parser.add_argument("--gfpgan-model", type=normalized_filepath, help="GFPGAN model file name", default=None) +parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats") +parser.add_argument("--no-half-vae", action='store_true', help="do not switch the VAE model to 16-bit floats") +parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)") +parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI") +parser.add_argument("--embeddings-dir", type=normalized_filepath, default=os.path.join(data_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)") +parser.add_argument("--textual-inversion-templates-dir", type=normalized_filepath, default=os.path.join(script_path, 'textual_inversion_templates'), help="directory with textual inversion templates") +parser.add_argument("--hypernetwork-dir", type=normalized_filepath, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory") +parser.add_argument("--localizations-dir", type=normalized_filepath, default=os.path.join(script_path, 'localizations'), help="localizations directory") +parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui") +parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage") +parser.add_argument("--medvram-sdxl", action='store_true', help="enable --medvram optimization just for SDXL models") +parser.add_argument("--lowvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a lot of speed for very low VRM usage") +parser.add_argument("--lowram", action='store_true', help="load stable diffusion checkpoint weights to VRAM instead of RAM") +parser.add_argument("--always-batch-cond-uncond", action='store_true', help="does not do anything") +parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.") +parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast") +parser.add_argument("--upcast-sampling", action='store_true', help="upcast sampling. No effect with --no-half. Usually produces similar results to --no-half with better performance while using less memory.") +parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site") +parser.add_argument("--ngrok", type=str, help="ngrok authtoken, alternative to gradio --share", default=None) +parser.add_argument("--ngrok-region", type=str, help="does not do anything.", default="") +parser.add_argument("--ngrok-options", type=json.loads, help='The options to pass to ngrok in JSON format, e.g.: \'{"authtoken_from_env":true, "basic_auth":"user:password", "oauth_provider":"google", "oauth_allow_emails":"user@asdf.com"}\'', default=dict()) +parser.add_argument("--enable-insecure-extension-access", action='store_true', help="enable extensions tab regardless of other options") +parser.add_argument("--codeformer-models-path", type=normalized_filepath, help="Path to directory with codeformer model file(s).", default=os.path.join(models_path, 'Codeformer')) +parser.add_argument("--gfpgan-models-path", type=normalized_filepath, help="Path to directory with GFPGAN model file(s).", default=os.path.join(models_path, 'GFPGAN')) +parser.add_argument("--esrgan-models-path", type=normalized_filepath, help="Path to directory with ESRGAN model file(s).", default=os.path.join(models_path, 'ESRGAN')) +parser.add_argument("--bsrgan-models-path", type=normalized_filepath, help="Path to directory with BSRGAN model file(s).", default=os.path.join(models_path, 'BSRGAN')) +parser.add_argument("--realesrgan-models-path", type=normalized_filepath, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(models_path, 'RealESRGAN')) +parser.add_argument("--clip-models-path", type=normalized_filepath, help="Path to directory with CLIP model file(s).", default=None) +parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") +parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") +parser.add_argument("--xformers-flash-attention", action='store_true', help="enable xformers with Flash Attention to improve reproducibility (supported for SD2.x or variant only)") +parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything") +parser.add_argument("--opt-split-attention", action='store_true', help="prefer Doggettx's cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--opt-sub-quad-attention", action='store_true', help="prefer memory efficient sub-quadratic cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024) +parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None) +parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the percentage of VRAM threshold for the sub-quadratic cross-attention layer optimization to use chunking", default=None) +parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="prefer InvokeAI's cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization") +parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*") +parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*") +parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization") +parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI") +parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower) +parser.add_argument("--use-ipex", action="store_true", help="use Intel XPU as torch device") +parser.add_argument("--disable-model-loading-ram-optimization", action='store_true', help="disable an optimization that reduces RAM use when loading a model") +parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") +parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) +parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) +parser.add_argument("--ui-config-file", type=str, help="filename to use for ui configuration", default=os.path.join(data_path, 'ui-config.json')) +parser.add_argument("--hide-ui-dir-config", action='store_true', help="hide directory configuration from webui", default=False) +parser.add_argument("--freeze-settings", action='store_true', help="disable editing of all settings globally", default=False) +parser.add_argument("--freeze-settings-in-sections", type=str, help='disable editing settings in specific sections of the settings page by specifying a comma-delimited list such like "saving-images,upscaling". The list of setting names can be found in the modules/shared_options.py file', default=None) +parser.add_argument("--freeze-specific-settings", type=str, help='disable editing of individual settings by specifying a comma-delimited list like "samples_save,samples_format". The list of setting names can be found in the config.json file', default=None) +parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default=os.path.join(data_path, 'config.json')) +parser.add_argument("--gradio-debug", action='store_true', help="launch gradio with --debug option") +parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--gradio-auth-path", type=normalized_filepath, help='set gradio authentication file path ex. "/path/to/auth/file" same auth format as --gradio-auth', default=None) +parser.add_argument("--gradio-img2img-tool", type=str, help='does not do anything') +parser.add_argument("--gradio-inpaint-tool", type=str, help="does not do anything") +parser.add_argument("--gradio-allowed-path", action='append', help="add path to gradio's allowed_paths, make it possible to serve files from it", default=[data_path]) +parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last") +parser.add_argument("--styles-file", type=str, action='append', help="path or wildcard path of styles files, allow multiple entries.", default=[]) +parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False) +parser.add_argument("--theme", type=str, help="launches the UI with light or dark theme", default=None) +parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False) +parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False) +parser.add_argument("--enable-console-prompts", action='store_true', help="does not do anything", default=False) # Legacy compatibility, use as default value shared.opts.enable_console_prompts +parser.add_argument('--vae-path', type=normalized_filepath, help='Checkpoint to use as VAE; setting this argument disables all settings related to VAE', default=None) +parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False) +parser.add_argument("--api", action='store_true', help="use api=True to launch the API together with the webui (use --nowebui instead for only the API)") +parser.add_argument("--api-auth", type=str, help='Set authentication for API like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--api-log", action='store_true', help="use api-log=True to enable logging of all API requests") +parser.add_argument("--nowebui", action='store_true', help="use api=True to launch the API instead of the webui") +parser.add_argument("--ui-debug-mode", action='store_true', help="Don't load model to quickly launch UI") +parser.add_argument("--device-id", type=str, help="Select the default CUDA device to use (export CUDA_VISIBLE_DEVICES=0,1,etc might be needed before)", default=None) +parser.add_argument("--administrator", action='store_true', help="Administrator rights", default=False) +parser.add_argument("--cors-allow-origins", type=str, help="Allowed CORS origin(s) in the form of a comma-separated list (no spaces)", default=None) +parser.add_argument("--cors-allow-origins-regex", type=str, help="Allowed CORS origin(s) in the form of a single regular expression", default=None) +parser.add_argument("--tls-keyfile", type=str, help="Partially enables TLS, requires --tls-certfile to fully function", default=None) +parser.add_argument("--tls-certfile", type=str, help="Partially enables TLS, requires --tls-keyfile to fully function", default=None) +parser.add_argument("--disable-tls-verify", action="store_false", help="When passed, enables the use of self-signed certificates.", default=None) +parser.add_argument("--server-name", type=str, help="Sets hostname of server", default=None) +parser.add_argument("--gradio-queue", action='store_true', help="does not do anything", default=True) +parser.add_argument("--no-gradio-queue", action='store_true', help="Disables gradio queue; causes the webpage to use http requests instead of websockets; was the default in earlier versions") +parser.add_argument("--skip-version-check", action='store_true', help="Do not check versions of torch and xformers") +parser.add_argument("--no-hashing", action='store_true', help="disable sha256 hashing of checkpoints to help loading performance", default=False) +parser.add_argument("--no-download-sd-model", action='store_true', help="don't download SD1.5 model even if no model is found in --ckpt-dir", default=False) +parser.add_argument('--subpath', type=str, help='customize the subpath for gradio, use with reverse proxy') +parser.add_argument('--add-stop-route', action='store_true', help='does not do anything') +parser.add_argument('--api-server-stop', action='store_true', help='enable server stop/restart/kill via api') +parser.add_argument('--timeout-keep-alive', type=int, default=30, help='set timeout_keep_alive for uvicorn') +parser.add_argument("--disable-all-extensions", action='store_true', help="prevent all extensions from running regardless of any other settings", default=False) +parser.add_argument("--disable-extra-extensions", action='store_true', help="prevent all extensions except built-in from running regardless of any other settings", default=False) +parser.add_argument("--skip-load-model-at-start", action='store_true', help="if load a model at web start, only take effect when --nowebui", ) + +# Arguments added by forge. +parser.add_argument( + '--forge-ref-a1111-home', + type=Path, + help="Look for models in an existing A1111 checkout's path", + default=None +) +parser.add_argument( + "--controlnet-dir", + type=Path, + help="Path to directory with ControlNet models", + default=None, +) +parser.add_argument( + "--controlnet-preprocessor-models-dir", + type=Path, + help="Path to directory with annotator model directories", + default=None, +) diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py new file mode 100644 index 0000000000000000000000000000000000000000..dcc9e4d8938f125ad538a17e899a6b1cf80fa7fe --- /dev/null +++ b/modules/codeformer_model.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import logging + +import torch + +from modules import ( + devices, + errors, + face_restoration, + face_restoration_utils, + modelloader, + shared, +) + +logger = logging.getLogger(__name__) + +model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth' +model_download_name = 'codeformer-v0.1.0.pth' + +# used by e.g. postprocessing_codeformer.py +codeformer: face_restoration.FaceRestoration | None = None + + +class FaceRestorerCodeFormer(face_restoration_utils.CommonFaceRestoration): + def name(self): + return "CodeFormer" + + def load_net(self) -> torch.Module: + for model_path in modelloader.load_models( + model_path=self.model_path, + model_url=model_url, + command_path=self.model_path, + download_name=model_download_name, + ext_filter=['.pth'], + ): + return modelloader.load_spandrel_model( + model_path, + device=devices.device_codeformer, + expected_architecture='CodeFormer', + ).model + raise ValueError("No codeformer model found") + + def get_device(self): + return devices.device_codeformer + + def restore(self, np_image, w: float | None = None): + if w is None: + w = getattr(shared.opts, "code_former_weight", 0.5) + + def restore_face(cropped_face_t): + assert self.net is not None + return self.net(cropped_face_t, w=w, adain=True)[0] + + return self.restore_with_helper(np_image, restore_face) + + +def setup_model(dirname: str) -> None: + global codeformer + try: + codeformer = FaceRestorerCodeFormer(dirname) + shared.face_restorers.append(codeformer) + except Exception: + errors.report("Error setting up CodeFormer", exc_info=True) diff --git a/modules/config_states.py b/modules/config_states.py new file mode 100644 index 0000000000000000000000000000000000000000..651793c7f6f659f8751e35f894a8f588f276d9ac --- /dev/null +++ b/modules/config_states.py @@ -0,0 +1,198 @@ +""" +Supports saving and restoring webui and extensions from a known working set of commits +""" + +import os +import json +import tqdm + +from datetime import datetime +import git + +from modules import shared, extensions, errors +from modules.paths_internal import script_path, config_states_dir + +all_config_states = {} + + +def list_config_states(): + global all_config_states + + all_config_states.clear() + os.makedirs(config_states_dir, exist_ok=True) + + config_states = [] + for filename in os.listdir(config_states_dir): + if filename.endswith(".json"): + path = os.path.join(config_states_dir, filename) + try: + with open(path, "r", encoding="utf-8") as f: + j = json.load(f) + assert "created_at" in j, '"created_at" does not exist' + j["filepath"] = path + config_states.append(j) + except Exception as e: + print(f'[ERROR]: Config states {path}, {e}') + + config_states = sorted(config_states, key=lambda cs: cs["created_at"], reverse=True) + + for cs in config_states: + timestamp = datetime.fromtimestamp(cs["created_at"]).strftime('%Y-%m-%d %H:%M:%S') + name = cs.get("name", "Config") + full_name = f"{name}: {timestamp}" + all_config_states[full_name] = cs + + return all_config_states + + +def get_webui_config(): + webui_repo = None + + try: + if os.path.exists(os.path.join(script_path, ".git")): + webui_repo = git.Repo(script_path) + except Exception: + errors.report(f"Error reading webui git info from {script_path}", exc_info=True) + + webui_remote = None + webui_commit_hash = None + webui_commit_date = None + webui_branch = None + if webui_repo and not webui_repo.bare: + try: + webui_remote = next(webui_repo.remote().urls, None) + head = webui_repo.head.commit + webui_commit_date = webui_repo.head.commit.committed_date + webui_commit_hash = head.hexsha + webui_branch = webui_repo.active_branch.name + + except Exception: + webui_remote = None + + return { + "remote": webui_remote, + "commit_hash": webui_commit_hash, + "commit_date": webui_commit_date, + "branch": webui_branch, + } + + +def get_extension_config(): + ext_config = {} + + for ext in extensions.extensions: + ext.read_info_from_repo() + + entry = { + "name": ext.name, + "path": ext.path, + "enabled": ext.enabled, + "is_builtin": ext.is_builtin, + "remote": ext.remote, + "commit_hash": ext.commit_hash, + "commit_date": ext.commit_date, + "branch": ext.branch, + "have_info_from_repo": ext.have_info_from_repo + } + + ext_config[ext.name] = entry + + return ext_config + + +def get_config(): + creation_time = datetime.now().timestamp() + webui_config = get_webui_config() + ext_config = get_extension_config() + + return { + "created_at": creation_time, + "webui": webui_config, + "extensions": ext_config + } + + +def restore_webui_config(config): + print("* Restoring webui state...") + + if "webui" not in config: + print("Error: No webui data saved to config") + return + + webui_config = config["webui"] + + if "commit_hash" not in webui_config: + print("Error: No commit saved to webui config") + return + + webui_commit_hash = webui_config.get("commit_hash", None) + webui_repo = None + + try: + if os.path.exists(os.path.join(script_path, ".git")): + webui_repo = git.Repo(script_path) + except Exception: + errors.report(f"Error reading webui git info from {script_path}", exc_info=True) + return + + try: + webui_repo.git.fetch(all=True) + webui_repo.git.reset(webui_commit_hash, hard=True) + print(f"* Restored webui to commit {webui_commit_hash}.") + except Exception: + errors.report(f"Error restoring webui to commit{webui_commit_hash}") + + +def restore_extension_config(config): + print("* Restoring extension state...") + + if "extensions" not in config: + print("Error: No extension data saved to config") + return + + ext_config = config["extensions"] + + results = [] + disabled = [] + + for ext in tqdm.tqdm(extensions.extensions): + if ext.is_builtin: + continue + + ext.read_info_from_repo() + current_commit = ext.commit_hash + + if ext.name not in ext_config: + ext.disabled = True + disabled.append(ext.name) + results.append((ext, current_commit[:8], False, "Saved extension state not found in config, marking as disabled")) + continue + + entry = ext_config[ext.name] + + if "commit_hash" in entry and entry["commit_hash"]: + try: + ext.fetch_and_reset_hard(entry["commit_hash"]) + ext.read_info_from_repo() + if current_commit != entry["commit_hash"]: + results.append((ext, current_commit[:8], True, entry["commit_hash"][:8])) + except Exception as ex: + results.append((ext, current_commit[:8], False, ex)) + else: + results.append((ext, current_commit[:8], False, "No commit hash found in config")) + + if not entry.get("enabled", False): + ext.disabled = True + disabled.append(ext.name) + else: + ext.disabled = False + + shared.opts.disabled_extensions = disabled + shared.opts.save(shared.config_filename) + + print("* Finished restoring extensions. Results:") + for ext, prev_commit, success, result in results: + if success: + print(f" + {ext.name}: {prev_commit} -> {result}") + else: + print(f" ! {ext.name}: FAILURE ({result})") diff --git a/modules/dat_model.py b/modules/dat_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c2caaafdfe7af184caf31263947dfa9a1eb41365 --- /dev/null +++ b/modules/dat_model.py @@ -0,0 +1,81 @@ +import os + +from modules import modelloader, errors +from modules.shared import cmd_opts, opts +from modules.upscaler import Upscaler, UpscalerData +from modules.upscaler_utils import upscale_with_model +from modules_forge.forge_util import prepare_free_memory + + +class UpscalerDAT(Upscaler): + def __init__(self, user_path): + self.name = "DAT" + self.user_path = user_path + self.scalers = [] + super().__init__() + + for file in self.find_models(ext_filter=[".pt", ".pth"]): + name = modelloader.friendly_name(file) + scaler_data = UpscalerData(name, file, upscaler=self, scale=None) + self.scalers.append(scaler_data) + + for model in get_dat_models(self): + if model.name in opts.dat_enabled_models: + self.scalers.append(model) + + def do_upscale(self, img, path): + prepare_free_memory() + try: + info = self.load_model(path) + except Exception: + errors.report(f"Unable to load DAT model {path}", exc_info=True) + return img + + model_descriptor = modelloader.load_spandrel_model( + info.local_data_path, + device=self.device, + prefer_half=(not cmd_opts.no_half and not cmd_opts.upcast_sampling), + expected_architecture="DAT", + ) + return upscale_with_model( + model_descriptor, + img, + tile_size=opts.DAT_tile, + tile_overlap=opts.DAT_tile_overlap, + ) + + def load_model(self, path): + for scaler in self.scalers: + if scaler.data_path == path: + if scaler.local_data_path.startswith("http"): + scaler.local_data_path = modelloader.load_file_from_url( + scaler.data_path, + model_dir=self.model_download_path, + ) + if not os.path.exists(scaler.local_data_path): + raise FileNotFoundError(f"DAT data missing: {scaler.local_data_path}") + return scaler + raise ValueError(f"Unable to find model info: {path}") + + +def get_dat_models(scaler): + return [ + UpscalerData( + name="DAT x2", + path="https://github.com/n0kovo/dat_upscaler_models/raw/main/DAT/DAT_x2.pth", + scale=2, + upscaler=scaler, + ), + UpscalerData( + name="DAT x3", + path="https://github.com/n0kovo/dat_upscaler_models/raw/main/DAT/DAT_x3.pth", + scale=3, + upscaler=scaler, + ), + UpscalerData( + name="DAT x4", + path="https://github.com/n0kovo/dat_upscaler_models/raw/main/DAT/DAT_x4.pth", + scale=4, + upscaler=scaler, + ), + ] diff --git a/modules/deepbooru.py b/modules/deepbooru.py new file mode 100644 index 0000000000000000000000000000000000000000..246c9b25ca451e2b4f7725a28c38ad609e8721a6 --- /dev/null +++ b/modules/deepbooru.py @@ -0,0 +1,109 @@ +import os +import re + +import torch +import numpy as np + +from modules import modelloader, paths, deepbooru_model, images, shared +from ldm_patched.modules import model_management +from ldm_patched.modules.model_patcher import ModelPatcher + + +re_special = re.compile(r'([\\()])') + + +class DeepDanbooru: + def __init__(self): + self.model = None + self.load_device = model_management.text_encoder_device() + self.offload_device = model_management.text_encoder_offload_device() + self.dtype = torch.float32 + + if model_management.should_use_fp16(device=self.load_device): + self.dtype = torch.float16 + + self.patcher = None + + def load(self): + if self.model is not None: + return + + files = modelloader.load_models( + model_path=os.path.join(paths.models_path, "torch_deepdanbooru"), + model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt', + ext_filter=[".pt"], + download_name='model-resnet_custom_v3.pt', + ) + + self.model = deepbooru_model.DeepDanbooruModel() + self.model.load_state_dict(torch.load(files[0], map_location="cpu")) + + self.model.eval() + self.model.to(self.offload_device, self.dtype) + + self.patcher = ModelPatcher(self.model, load_device=self.load_device, offload_device=self.offload_device) + + def start(self): + self.load() + model_management.load_models_gpu([self.patcher]) + + def stop(self): + pass + + def tag(self, pil_image): + self.start() + res = self.tag_multi(pil_image) + self.stop() + + return res + + def tag_multi(self, pil_image, force_disable_ranks=False): + threshold = shared.opts.interrogate_deepbooru_score_threshold + use_spaces = shared.opts.deepbooru_use_spaces + use_escape = shared.opts.deepbooru_escape + alpha_sort = shared.opts.deepbooru_sort_alpha + include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks + + pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512) + a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255 + + with torch.no_grad(): + x = torch.from_numpy(a).to(self.load_device, self.dtype) + y = self.model(x)[0].detach().cpu().numpy() + + probability_dict = {} + + for tag, probability in zip(self.model.tags, y): + if probability < threshold: + continue + + if tag.startswith("rating:"): + continue + + probability_dict[tag] = probability + + if alpha_sort: + tags = sorted(probability_dict) + else: + tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])] + + res = [] + + filtertags = {x.strip().replace(' ', '_') for x in shared.opts.deepbooru_filter_tags.split(",")} + + for tag in [x for x in tags if x not in filtertags]: + probability = probability_dict[tag] + tag_outformat = tag + if use_spaces: + tag_outformat = tag_outformat.replace('_', ' ') + if use_escape: + tag_outformat = re.sub(re_special, r'\\\1', tag_outformat) + if include_ranks: + tag_outformat = f"({tag_outformat}:{probability:.3f})" + + res.append(tag_outformat) + + return ", ".join(res) + + +model = DeepDanbooru() diff --git a/modules/deepbooru_model.py b/modules/deepbooru_model.py new file mode 100644 index 0000000000000000000000000000000000000000..7a53884624e96284c35214ce02b8a2891d92c3e8 --- /dev/null +++ b/modules/deepbooru_model.py @@ -0,0 +1,678 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from modules import devices + +# see https://github.com/AUTOMATIC1111/TorchDeepDanbooru for more + + +class DeepDanbooruModel(nn.Module): + def __init__(self): + super(DeepDanbooruModel, self).__init__() + + self.tags = [] + + self.n_Conv_0 = nn.Conv2d(kernel_size=(7, 7), in_channels=3, out_channels=64, stride=(2, 2)) + self.n_MaxPool_0 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)) + self.n_Conv_1 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_2 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=64) + self.n_Conv_3 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_4 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_5 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=64) + self.n_Conv_6 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_7 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_8 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=64) + self.n_Conv_9 = nn.Conv2d(kernel_size=(3, 3), in_channels=64, out_channels=64) + self.n_Conv_10 = nn.Conv2d(kernel_size=(1, 1), in_channels=64, out_channels=256) + self.n_Conv_11 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=512, stride=(2, 2)) + self.n_Conv_12 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=128) + self.n_Conv_13 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128, stride=(2, 2)) + self.n_Conv_14 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_15 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_16 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_17 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_18 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_19 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_20 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_21 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_22 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_23 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_24 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_25 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_26 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_27 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_28 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_29 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_30 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_31 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_32 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_33 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=128) + self.n_Conv_34 = nn.Conv2d(kernel_size=(3, 3), in_channels=128, out_channels=128) + self.n_Conv_35 = nn.Conv2d(kernel_size=(1, 1), in_channels=128, out_channels=512) + self.n_Conv_36 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=1024, stride=(2, 2)) + self.n_Conv_37 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=256) + self.n_Conv_38 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256, stride=(2, 2)) + self.n_Conv_39 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_40 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_41 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_42 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_43 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_44 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_45 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_46 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_47 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_48 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_49 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_50 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_51 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_52 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_53 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_54 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_55 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_56 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_57 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_58 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_59 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_60 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_61 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_62 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_63 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_64 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_65 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_66 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_67 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_68 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_69 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_70 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_71 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_72 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_73 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_74 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_75 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_76 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_77 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_78 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_79 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_80 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_81 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_82 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_83 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_84 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_85 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_86 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_87 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_88 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_89 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_90 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_91 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_92 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_93 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_94 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_95 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_96 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_97 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_98 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256, stride=(2, 2)) + self.n_Conv_99 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_100 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=1024, stride=(2, 2)) + self.n_Conv_101 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_102 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_103 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_104 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_105 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_106 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_107 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_108 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_109 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_110 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_111 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_112 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_113 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_114 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_115 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_116 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_117 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_118 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_119 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_120 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_121 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_122 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_123 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_124 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_125 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_126 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_127 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_128 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_129 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_130 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_131 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_132 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_133 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_134 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_135 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_136 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_137 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_138 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_139 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_140 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_141 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_142 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_143 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_144 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_145 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_146 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_147 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_148 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_149 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_150 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_151 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_152 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_153 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_154 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_155 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=256) + self.n_Conv_156 = nn.Conv2d(kernel_size=(3, 3), in_channels=256, out_channels=256) + self.n_Conv_157 = nn.Conv2d(kernel_size=(1, 1), in_channels=256, out_channels=1024) + self.n_Conv_158 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=2048, stride=(2, 2)) + self.n_Conv_159 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=512) + self.n_Conv_160 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512, stride=(2, 2)) + self.n_Conv_161 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_162 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=512) + self.n_Conv_163 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512) + self.n_Conv_164 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_165 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=512) + self.n_Conv_166 = nn.Conv2d(kernel_size=(3, 3), in_channels=512, out_channels=512) + self.n_Conv_167 = nn.Conv2d(kernel_size=(1, 1), in_channels=512, out_channels=2048) + self.n_Conv_168 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=4096, stride=(2, 2)) + self.n_Conv_169 = nn.Conv2d(kernel_size=(1, 1), in_channels=2048, out_channels=1024) + self.n_Conv_170 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024, stride=(2, 2)) + self.n_Conv_171 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_172 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=1024) + self.n_Conv_173 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024) + self.n_Conv_174 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_175 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=1024) + self.n_Conv_176 = nn.Conv2d(kernel_size=(3, 3), in_channels=1024, out_channels=1024) + self.n_Conv_177 = nn.Conv2d(kernel_size=(1, 1), in_channels=1024, out_channels=4096) + self.n_Conv_178 = nn.Conv2d(kernel_size=(1, 1), in_channels=4096, out_channels=9176, bias=False) + + def forward(self, *inputs): + t_358, = inputs + t_359 = t_358.permute(*[0, 3, 1, 2]) + t_359_padded = F.pad(t_359, [2, 3, 2, 3], value=0) + t_360 = self.n_Conv_0(t_359_padded.to(self.n_Conv_0.bias.dtype) if devices.unet_needs_upcast else t_359_padded) + t_361 = F.relu(t_360) + t_361 = F.pad(t_361, [0, 1, 0, 1], value=float('-inf')) + t_362 = self.n_MaxPool_0(t_361) + t_363 = self.n_Conv_1(t_362) + t_364 = self.n_Conv_2(t_362) + t_365 = F.relu(t_364) + t_365_padded = F.pad(t_365, [1, 1, 1, 1], value=0) + t_366 = self.n_Conv_3(t_365_padded) + t_367 = F.relu(t_366) + t_368 = self.n_Conv_4(t_367) + t_369 = torch.add(t_368, t_363) + t_370 = F.relu(t_369) + t_371 = self.n_Conv_5(t_370) + t_372 = F.relu(t_371) + t_372_padded = F.pad(t_372, [1, 1, 1, 1], value=0) + t_373 = self.n_Conv_6(t_372_padded) + t_374 = F.relu(t_373) + t_375 = self.n_Conv_7(t_374) + t_376 = torch.add(t_375, t_370) + t_377 = F.relu(t_376) + t_378 = self.n_Conv_8(t_377) + t_379 = F.relu(t_378) + t_379_padded = F.pad(t_379, [1, 1, 1, 1], value=0) + t_380 = self.n_Conv_9(t_379_padded) + t_381 = F.relu(t_380) + t_382 = self.n_Conv_10(t_381) + t_383 = torch.add(t_382, t_377) + t_384 = F.relu(t_383) + t_385 = self.n_Conv_11(t_384) + t_386 = self.n_Conv_12(t_384) + t_387 = F.relu(t_386) + t_387_padded = F.pad(t_387, [0, 1, 0, 1], value=0) + t_388 = self.n_Conv_13(t_387_padded) + t_389 = F.relu(t_388) + t_390 = self.n_Conv_14(t_389) + t_391 = torch.add(t_390, t_385) + t_392 = F.relu(t_391) + t_393 = self.n_Conv_15(t_392) + t_394 = F.relu(t_393) + t_394_padded = F.pad(t_394, [1, 1, 1, 1], value=0) + t_395 = self.n_Conv_16(t_394_padded) + t_396 = F.relu(t_395) + t_397 = self.n_Conv_17(t_396) + t_398 = torch.add(t_397, t_392) + t_399 = F.relu(t_398) + t_400 = self.n_Conv_18(t_399) + t_401 = F.relu(t_400) + t_401_padded = F.pad(t_401, [1, 1, 1, 1], value=0) + t_402 = self.n_Conv_19(t_401_padded) + t_403 = F.relu(t_402) + t_404 = self.n_Conv_20(t_403) + t_405 = torch.add(t_404, t_399) + t_406 = F.relu(t_405) + t_407 = self.n_Conv_21(t_406) + t_408 = F.relu(t_407) + t_408_padded = F.pad(t_408, [1, 1, 1, 1], value=0) + t_409 = self.n_Conv_22(t_408_padded) + t_410 = F.relu(t_409) + t_411 = self.n_Conv_23(t_410) + t_412 = torch.add(t_411, t_406) + t_413 = F.relu(t_412) + t_414 = self.n_Conv_24(t_413) + t_415 = F.relu(t_414) + t_415_padded = F.pad(t_415, [1, 1, 1, 1], value=0) + t_416 = self.n_Conv_25(t_415_padded) + t_417 = F.relu(t_416) + t_418 = self.n_Conv_26(t_417) + t_419 = torch.add(t_418, t_413) + t_420 = F.relu(t_419) + t_421 = self.n_Conv_27(t_420) + t_422 = F.relu(t_421) + t_422_padded = F.pad(t_422, [1, 1, 1, 1], value=0) + t_423 = self.n_Conv_28(t_422_padded) + t_424 = F.relu(t_423) + t_425 = self.n_Conv_29(t_424) + t_426 = torch.add(t_425, t_420) + t_427 = F.relu(t_426) + t_428 = self.n_Conv_30(t_427) + t_429 = F.relu(t_428) + t_429_padded = F.pad(t_429, [1, 1, 1, 1], value=0) + t_430 = self.n_Conv_31(t_429_padded) + t_431 = F.relu(t_430) + t_432 = self.n_Conv_32(t_431) + t_433 = torch.add(t_432, t_427) + t_434 = F.relu(t_433) + t_435 = self.n_Conv_33(t_434) + t_436 = F.relu(t_435) + t_436_padded = F.pad(t_436, [1, 1, 1, 1], value=0) + t_437 = self.n_Conv_34(t_436_padded) + t_438 = F.relu(t_437) + t_439 = self.n_Conv_35(t_438) + t_440 = torch.add(t_439, t_434) + t_441 = F.relu(t_440) + t_442 = self.n_Conv_36(t_441) + t_443 = self.n_Conv_37(t_441) + t_444 = F.relu(t_443) + t_444_padded = F.pad(t_444, [0, 1, 0, 1], value=0) + t_445 = self.n_Conv_38(t_444_padded) + t_446 = F.relu(t_445) + t_447 = self.n_Conv_39(t_446) + t_448 = torch.add(t_447, t_442) + t_449 = F.relu(t_448) + t_450 = self.n_Conv_40(t_449) + t_451 = F.relu(t_450) + t_451_padded = F.pad(t_451, [1, 1, 1, 1], value=0) + t_452 = self.n_Conv_41(t_451_padded) + t_453 = F.relu(t_452) + t_454 = self.n_Conv_42(t_453) + t_455 = torch.add(t_454, t_449) + t_456 = F.relu(t_455) + t_457 = self.n_Conv_43(t_456) + t_458 = F.relu(t_457) + t_458_padded = F.pad(t_458, [1, 1, 1, 1], value=0) + t_459 = self.n_Conv_44(t_458_padded) + t_460 = F.relu(t_459) + t_461 = self.n_Conv_45(t_460) + t_462 = torch.add(t_461, t_456) + t_463 = F.relu(t_462) + t_464 = self.n_Conv_46(t_463) + t_465 = F.relu(t_464) + t_465_padded = F.pad(t_465, [1, 1, 1, 1], value=0) + t_466 = self.n_Conv_47(t_465_padded) + t_467 = F.relu(t_466) + t_468 = self.n_Conv_48(t_467) + t_469 = torch.add(t_468, t_463) + t_470 = F.relu(t_469) + t_471 = self.n_Conv_49(t_470) + t_472 = F.relu(t_471) + t_472_padded = F.pad(t_472, [1, 1, 1, 1], value=0) + t_473 = self.n_Conv_50(t_472_padded) + t_474 = F.relu(t_473) + t_475 = self.n_Conv_51(t_474) + t_476 = torch.add(t_475, t_470) + t_477 = F.relu(t_476) + t_478 = self.n_Conv_52(t_477) + t_479 = F.relu(t_478) + t_479_padded = F.pad(t_479, [1, 1, 1, 1], value=0) + t_480 = self.n_Conv_53(t_479_padded) + t_481 = F.relu(t_480) + t_482 = self.n_Conv_54(t_481) + t_483 = torch.add(t_482, t_477) + t_484 = F.relu(t_483) + t_485 = self.n_Conv_55(t_484) + t_486 = F.relu(t_485) + t_486_padded = F.pad(t_486, [1, 1, 1, 1], value=0) + t_487 = self.n_Conv_56(t_486_padded) + t_488 = F.relu(t_487) + t_489 = self.n_Conv_57(t_488) + t_490 = torch.add(t_489, t_484) + t_491 = F.relu(t_490) + t_492 = self.n_Conv_58(t_491) + t_493 = F.relu(t_492) + t_493_padded = F.pad(t_493, [1, 1, 1, 1], value=0) + t_494 = self.n_Conv_59(t_493_padded) + t_495 = F.relu(t_494) + t_496 = self.n_Conv_60(t_495) + t_497 = torch.add(t_496, t_491) + t_498 = F.relu(t_497) + t_499 = self.n_Conv_61(t_498) + t_500 = F.relu(t_499) + t_500_padded = F.pad(t_500, [1, 1, 1, 1], value=0) + t_501 = self.n_Conv_62(t_500_padded) + t_502 = F.relu(t_501) + t_503 = self.n_Conv_63(t_502) + t_504 = torch.add(t_503, t_498) + t_505 = F.relu(t_504) + t_506 = self.n_Conv_64(t_505) + t_507 = F.relu(t_506) + t_507_padded = F.pad(t_507, [1, 1, 1, 1], value=0) + t_508 = self.n_Conv_65(t_507_padded) + t_509 = F.relu(t_508) + t_510 = self.n_Conv_66(t_509) + t_511 = torch.add(t_510, t_505) + t_512 = F.relu(t_511) + t_513 = self.n_Conv_67(t_512) + t_514 = F.relu(t_513) + t_514_padded = F.pad(t_514, [1, 1, 1, 1], value=0) + t_515 = self.n_Conv_68(t_514_padded) + t_516 = F.relu(t_515) + t_517 = self.n_Conv_69(t_516) + t_518 = torch.add(t_517, t_512) + t_519 = F.relu(t_518) + t_520 = self.n_Conv_70(t_519) + t_521 = F.relu(t_520) + t_521_padded = F.pad(t_521, [1, 1, 1, 1], value=0) + t_522 = self.n_Conv_71(t_521_padded) + t_523 = F.relu(t_522) + t_524 = self.n_Conv_72(t_523) + t_525 = torch.add(t_524, t_519) + t_526 = F.relu(t_525) + t_527 = self.n_Conv_73(t_526) + t_528 = F.relu(t_527) + t_528_padded = F.pad(t_528, [1, 1, 1, 1], value=0) + t_529 = self.n_Conv_74(t_528_padded) + t_530 = F.relu(t_529) + t_531 = self.n_Conv_75(t_530) + t_532 = torch.add(t_531, t_526) + t_533 = F.relu(t_532) + t_534 = self.n_Conv_76(t_533) + t_535 = F.relu(t_534) + t_535_padded = F.pad(t_535, [1, 1, 1, 1], value=0) + t_536 = self.n_Conv_77(t_535_padded) + t_537 = F.relu(t_536) + t_538 = self.n_Conv_78(t_537) + t_539 = torch.add(t_538, t_533) + t_540 = F.relu(t_539) + t_541 = self.n_Conv_79(t_540) + t_542 = F.relu(t_541) + t_542_padded = F.pad(t_542, [1, 1, 1, 1], value=0) + t_543 = self.n_Conv_80(t_542_padded) + t_544 = F.relu(t_543) + t_545 = self.n_Conv_81(t_544) + t_546 = torch.add(t_545, t_540) + t_547 = F.relu(t_546) + t_548 = self.n_Conv_82(t_547) + t_549 = F.relu(t_548) + t_549_padded = F.pad(t_549, [1, 1, 1, 1], value=0) + t_550 = self.n_Conv_83(t_549_padded) + t_551 = F.relu(t_550) + t_552 = self.n_Conv_84(t_551) + t_553 = torch.add(t_552, t_547) + t_554 = F.relu(t_553) + t_555 = self.n_Conv_85(t_554) + t_556 = F.relu(t_555) + t_556_padded = F.pad(t_556, [1, 1, 1, 1], value=0) + t_557 = self.n_Conv_86(t_556_padded) + t_558 = F.relu(t_557) + t_559 = self.n_Conv_87(t_558) + t_560 = torch.add(t_559, t_554) + t_561 = F.relu(t_560) + t_562 = self.n_Conv_88(t_561) + t_563 = F.relu(t_562) + t_563_padded = F.pad(t_563, [1, 1, 1, 1], value=0) + t_564 = self.n_Conv_89(t_563_padded) + t_565 = F.relu(t_564) + t_566 = self.n_Conv_90(t_565) + t_567 = torch.add(t_566, t_561) + t_568 = F.relu(t_567) + t_569 = self.n_Conv_91(t_568) + t_570 = F.relu(t_569) + t_570_padded = F.pad(t_570, [1, 1, 1, 1], value=0) + t_571 = self.n_Conv_92(t_570_padded) + t_572 = F.relu(t_571) + t_573 = self.n_Conv_93(t_572) + t_574 = torch.add(t_573, t_568) + t_575 = F.relu(t_574) + t_576 = self.n_Conv_94(t_575) + t_577 = F.relu(t_576) + t_577_padded = F.pad(t_577, [1, 1, 1, 1], value=0) + t_578 = self.n_Conv_95(t_577_padded) + t_579 = F.relu(t_578) + t_580 = self.n_Conv_96(t_579) + t_581 = torch.add(t_580, t_575) + t_582 = F.relu(t_581) + t_583 = self.n_Conv_97(t_582) + t_584 = F.relu(t_583) + t_584_padded = F.pad(t_584, [0, 1, 0, 1], value=0) + t_585 = self.n_Conv_98(t_584_padded) + t_586 = F.relu(t_585) + t_587 = self.n_Conv_99(t_586) + t_588 = self.n_Conv_100(t_582) + t_589 = torch.add(t_587, t_588) + t_590 = F.relu(t_589) + t_591 = self.n_Conv_101(t_590) + t_592 = F.relu(t_591) + t_592_padded = F.pad(t_592, [1, 1, 1, 1], value=0) + t_593 = self.n_Conv_102(t_592_padded) + t_594 = F.relu(t_593) + t_595 = self.n_Conv_103(t_594) + t_596 = torch.add(t_595, t_590) + t_597 = F.relu(t_596) + t_598 = self.n_Conv_104(t_597) + t_599 = F.relu(t_598) + t_599_padded = F.pad(t_599, [1, 1, 1, 1], value=0) + t_600 = self.n_Conv_105(t_599_padded) + t_601 = F.relu(t_600) + t_602 = self.n_Conv_106(t_601) + t_603 = torch.add(t_602, t_597) + t_604 = F.relu(t_603) + t_605 = self.n_Conv_107(t_604) + t_606 = F.relu(t_605) + t_606_padded = F.pad(t_606, [1, 1, 1, 1], value=0) + t_607 = self.n_Conv_108(t_606_padded) + t_608 = F.relu(t_607) + t_609 = self.n_Conv_109(t_608) + t_610 = torch.add(t_609, t_604) + t_611 = F.relu(t_610) + t_612 = self.n_Conv_110(t_611) + t_613 = F.relu(t_612) + t_613_padded = F.pad(t_613, [1, 1, 1, 1], value=0) + t_614 = self.n_Conv_111(t_613_padded) + t_615 = F.relu(t_614) + t_616 = self.n_Conv_112(t_615) + t_617 = torch.add(t_616, t_611) + t_618 = F.relu(t_617) + t_619 = self.n_Conv_113(t_618) + t_620 = F.relu(t_619) + t_620_padded = F.pad(t_620, [1, 1, 1, 1], value=0) + t_621 = self.n_Conv_114(t_620_padded) + t_622 = F.relu(t_621) + t_623 = self.n_Conv_115(t_622) + t_624 = torch.add(t_623, t_618) + t_625 = F.relu(t_624) + t_626 = self.n_Conv_116(t_625) + t_627 = F.relu(t_626) + t_627_padded = F.pad(t_627, [1, 1, 1, 1], value=0) + t_628 = self.n_Conv_117(t_627_padded) + t_629 = F.relu(t_628) + t_630 = self.n_Conv_118(t_629) + t_631 = torch.add(t_630, t_625) + t_632 = F.relu(t_631) + t_633 = self.n_Conv_119(t_632) + t_634 = F.relu(t_633) + t_634_padded = F.pad(t_634, [1, 1, 1, 1], value=0) + t_635 = self.n_Conv_120(t_634_padded) + t_636 = F.relu(t_635) + t_637 = self.n_Conv_121(t_636) + t_638 = torch.add(t_637, t_632) + t_639 = F.relu(t_638) + t_640 = self.n_Conv_122(t_639) + t_641 = F.relu(t_640) + t_641_padded = F.pad(t_641, [1, 1, 1, 1], value=0) + t_642 = self.n_Conv_123(t_641_padded) + t_643 = F.relu(t_642) + t_644 = self.n_Conv_124(t_643) + t_645 = torch.add(t_644, t_639) + t_646 = F.relu(t_645) + t_647 = self.n_Conv_125(t_646) + t_648 = F.relu(t_647) + t_648_padded = F.pad(t_648, [1, 1, 1, 1], value=0) + t_649 = self.n_Conv_126(t_648_padded) + t_650 = F.relu(t_649) + t_651 = self.n_Conv_127(t_650) + t_652 = torch.add(t_651, t_646) + t_653 = F.relu(t_652) + t_654 = self.n_Conv_128(t_653) + t_655 = F.relu(t_654) + t_655_padded = F.pad(t_655, [1, 1, 1, 1], value=0) + t_656 = self.n_Conv_129(t_655_padded) + t_657 = F.relu(t_656) + t_658 = self.n_Conv_130(t_657) + t_659 = torch.add(t_658, t_653) + t_660 = F.relu(t_659) + t_661 = self.n_Conv_131(t_660) + t_662 = F.relu(t_661) + t_662_padded = F.pad(t_662, [1, 1, 1, 1], value=0) + t_663 = self.n_Conv_132(t_662_padded) + t_664 = F.relu(t_663) + t_665 = self.n_Conv_133(t_664) + t_666 = torch.add(t_665, t_660) + t_667 = F.relu(t_666) + t_668 = self.n_Conv_134(t_667) + t_669 = F.relu(t_668) + t_669_padded = F.pad(t_669, [1, 1, 1, 1], value=0) + t_670 = self.n_Conv_135(t_669_padded) + t_671 = F.relu(t_670) + t_672 = self.n_Conv_136(t_671) + t_673 = torch.add(t_672, t_667) + t_674 = F.relu(t_673) + t_675 = self.n_Conv_137(t_674) + t_676 = F.relu(t_675) + t_676_padded = F.pad(t_676, [1, 1, 1, 1], value=0) + t_677 = self.n_Conv_138(t_676_padded) + t_678 = F.relu(t_677) + t_679 = self.n_Conv_139(t_678) + t_680 = torch.add(t_679, t_674) + t_681 = F.relu(t_680) + t_682 = self.n_Conv_140(t_681) + t_683 = F.relu(t_682) + t_683_padded = F.pad(t_683, [1, 1, 1, 1], value=0) + t_684 = self.n_Conv_141(t_683_padded) + t_685 = F.relu(t_684) + t_686 = self.n_Conv_142(t_685) + t_687 = torch.add(t_686, t_681) + t_688 = F.relu(t_687) + t_689 = self.n_Conv_143(t_688) + t_690 = F.relu(t_689) + t_690_padded = F.pad(t_690, [1, 1, 1, 1], value=0) + t_691 = self.n_Conv_144(t_690_padded) + t_692 = F.relu(t_691) + t_693 = self.n_Conv_145(t_692) + t_694 = torch.add(t_693, t_688) + t_695 = F.relu(t_694) + t_696 = self.n_Conv_146(t_695) + t_697 = F.relu(t_696) + t_697_padded = F.pad(t_697, [1, 1, 1, 1], value=0) + t_698 = self.n_Conv_147(t_697_padded) + t_699 = F.relu(t_698) + t_700 = self.n_Conv_148(t_699) + t_701 = torch.add(t_700, t_695) + t_702 = F.relu(t_701) + t_703 = self.n_Conv_149(t_702) + t_704 = F.relu(t_703) + t_704_padded = F.pad(t_704, [1, 1, 1, 1], value=0) + t_705 = self.n_Conv_150(t_704_padded) + t_706 = F.relu(t_705) + t_707 = self.n_Conv_151(t_706) + t_708 = torch.add(t_707, t_702) + t_709 = F.relu(t_708) + t_710 = self.n_Conv_152(t_709) + t_711 = F.relu(t_710) + t_711_padded = F.pad(t_711, [1, 1, 1, 1], value=0) + t_712 = self.n_Conv_153(t_711_padded) + t_713 = F.relu(t_712) + t_714 = self.n_Conv_154(t_713) + t_715 = torch.add(t_714, t_709) + t_716 = F.relu(t_715) + t_717 = self.n_Conv_155(t_716) + t_718 = F.relu(t_717) + t_718_padded = F.pad(t_718, [1, 1, 1, 1], value=0) + t_719 = self.n_Conv_156(t_718_padded) + t_720 = F.relu(t_719) + t_721 = self.n_Conv_157(t_720) + t_722 = torch.add(t_721, t_716) + t_723 = F.relu(t_722) + t_724 = self.n_Conv_158(t_723) + t_725 = self.n_Conv_159(t_723) + t_726 = F.relu(t_725) + t_726_padded = F.pad(t_726, [0, 1, 0, 1], value=0) + t_727 = self.n_Conv_160(t_726_padded) + t_728 = F.relu(t_727) + t_729 = self.n_Conv_161(t_728) + t_730 = torch.add(t_729, t_724) + t_731 = F.relu(t_730) + t_732 = self.n_Conv_162(t_731) + t_733 = F.relu(t_732) + t_733_padded = F.pad(t_733, [1, 1, 1, 1], value=0) + t_734 = self.n_Conv_163(t_733_padded) + t_735 = F.relu(t_734) + t_736 = self.n_Conv_164(t_735) + t_737 = torch.add(t_736, t_731) + t_738 = F.relu(t_737) + t_739 = self.n_Conv_165(t_738) + t_740 = F.relu(t_739) + t_740_padded = F.pad(t_740, [1, 1, 1, 1], value=0) + t_741 = self.n_Conv_166(t_740_padded) + t_742 = F.relu(t_741) + t_743 = self.n_Conv_167(t_742) + t_744 = torch.add(t_743, t_738) + t_745 = F.relu(t_744) + t_746 = self.n_Conv_168(t_745) + t_747 = self.n_Conv_169(t_745) + t_748 = F.relu(t_747) + t_748_padded = F.pad(t_748, [0, 1, 0, 1], value=0) + t_749 = self.n_Conv_170(t_748_padded) + t_750 = F.relu(t_749) + t_751 = self.n_Conv_171(t_750) + t_752 = torch.add(t_751, t_746) + t_753 = F.relu(t_752) + t_754 = self.n_Conv_172(t_753) + t_755 = F.relu(t_754) + t_755_padded = F.pad(t_755, [1, 1, 1, 1], value=0) + t_756 = self.n_Conv_173(t_755_padded) + t_757 = F.relu(t_756) + t_758 = self.n_Conv_174(t_757) + t_759 = torch.add(t_758, t_753) + t_760 = F.relu(t_759) + t_761 = self.n_Conv_175(t_760) + t_762 = F.relu(t_761) + t_762_padded = F.pad(t_762, [1, 1, 1, 1], value=0) + t_763 = self.n_Conv_176(t_762_padded) + t_764 = F.relu(t_763) + t_765 = self.n_Conv_177(t_764) + t_766 = torch.add(t_765, t_760) + t_767 = F.relu(t_766) + t_768 = self.n_Conv_178(t_767) + t_769 = F.avg_pool2d(t_768, kernel_size=t_768.shape[-2:]) + t_770 = torch.squeeze(t_769, 3) + t_770 = torch.squeeze(t_770, 2) + t_771 = torch.sigmoid(t_770) + return t_771 + + def load_state_dict(self, state_dict, **kwargs): + self.tags = state_dict.get('tags', []) + + super(DeepDanbooruModel, self).load_state_dict({k: v for k, v in state_dict.items() if k != 'tags'}) + diff --git a/modules/devices.py b/modules/devices.py new file mode 100644 index 0000000000000000000000000000000000000000..08d0d706756749144aac0f0b3a2ef5d10c5a0298 --- /dev/null +++ b/modules/devices.py @@ -0,0 +1,102 @@ +import contextlib +import torch +import ldm_patched.modules.model_management as model_management + + +def has_xpu() -> bool: + return model_management.xpu_available + + +def has_mps() -> bool: + return model_management.mps_mode() + + +def cuda_no_autocast(device_id=None) -> bool: + return False + + +def get_cuda_device_id(): + return model_management.get_torch_device().index + + +def get_cuda_device_string(): + return str(model_management.get_torch_device()) + + +def get_optimal_device_name(): + return model_management.get_torch_device().type + + +def get_optimal_device(): + return model_management.get_torch_device() + + +def get_device_for(task): + return model_management.get_torch_device() + + +def torch_gc(): + model_management.soft_empty_cache() + + +def torch_npu_set_device(): + return + + +def enable_tf32(): + return + + +cpu: torch.device = torch.device("cpu") +fp8: bool = False +device: torch.device = model_management.get_torch_device() +device_interrogate: torch.device = model_management.text_encoder_device() # for backward compatibility, not used now +device_gfpgan: torch.device = model_management.get_torch_device() # will be managed by memory management system +device_esrgan: torch.device = model_management.get_torch_device() # will be managed by memory management system +device_codeformer: torch.device = model_management.get_torch_device() # will be managed by memory management system +dtype: torch.dtype = model_management.unet_dtype() +dtype_vae: torch.dtype = model_management.vae_dtype() +dtype_unet: torch.dtype = model_management.unet_dtype() +dtype_inference: torch.dtype = model_management.unet_dtype() +unet_needs_upcast = False + + +def cond_cast_unet(input): + return input + + +def cond_cast_float(input): + return input + + +nv_rng = None +patch_module_list = [] + + +def manual_cast_forward(target_dtype): + return + + +@contextlib.contextmanager +def manual_cast(target_dtype): + return + + +def autocast(disable=False): + return contextlib.nullcontext() + + +def without_autocast(disable=False): + return contextlib.nullcontext() + + +class NansException(Exception): + pass + + +def test_for_nans(x, where): + return + + +def first_time_calculation(): + return diff --git a/modules/errors.py b/modules/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..3320f45b03daed9e7a72ac3943d706310c3aa85f --- /dev/null +++ b/modules/errors.py @@ -0,0 +1,150 @@ +import sys +import textwrap +import traceback + + +exception_records = [] + + +def format_traceback(tb): + return [[f"{x.filename}, line {x.lineno}, {x.name}", x.line] for x in traceback.extract_tb(tb)] + + +def format_exception(e, tb): + return {"exception": str(e), "traceback": format_traceback(tb)} + + +def get_exceptions(): + try: + return list(reversed(exception_records)) + except Exception as e: + return str(e) + + +def record_exception(): + _, e, tb = sys.exc_info() + if e is None: + return + + if exception_records and exception_records[-1] == e: + return + + exception_records.append(format_exception(e, tb)) + + if len(exception_records) > 5: + exception_records.pop(0) + + +def report(message: str, *, exc_info: bool = False) -> None: + """ + Print an error message to stderr, with optional traceback. + """ + + record_exception() + + for line in message.splitlines(): + print("***", line, file=sys.stderr) + if exc_info: + print(textwrap.indent(traceback.format_exc(), " "), file=sys.stderr) + print("---", file=sys.stderr) + + +def print_error_explanation(message): + record_exception() + + lines = message.strip().split("\n") + max_len = max([len(x) for x in lines]) + + print('=' * max_len, file=sys.stderr) + for line in lines: + print(line, file=sys.stderr) + print('=' * max_len, file=sys.stderr) + + +def display(e: Exception, task, *, full_traceback=False): + record_exception() + + print(f"{task or 'error'}: {type(e).__name__}", file=sys.stderr) + te = traceback.TracebackException.from_exception(e) + if full_traceback: + # include frames leading up to the try-catch block + te.stack = traceback.StackSummary(traceback.extract_stack()[:-2] + te.stack) + print(*te.format(), sep="", file=sys.stderr) + + message = str(e) + if "copying a param with shape torch.Size([640, 1024]) from checkpoint, the shape in current model is torch.Size([640, 768])" in message: + print_error_explanation(""" +The most likely cause of this is you are trying to load Stable Diffusion 2.0 model without specifying its config file. +See https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#stable-diffusion-20 for how to solve this. + """) + + +already_displayed = {} + + +def display_once(e: Exception, task): + record_exception() + + if task in already_displayed: + return + + display(e, task) + + already_displayed[task] = 1 + + +def run(code, task): + try: + code() + except Exception as e: + display(task, e) + + +def check_versions(): + from packaging import version + from modules import shared + + import torch + import gradio + + expected_torch_version = "2.1.2" + expected_xformers_version = "0.0.23.post1" + expected_gradio_version = "3.41.2" + + if version.parse(torch.__version__) < version.parse(expected_torch_version): + print_error_explanation(f""" +You are running torch {torch.__version__}. +The program is tested to work with torch {expected_torch_version}. +To reinstall the desired version, run with commandline flag --reinstall-torch. +Beware that this will cause a lot of large files to be downloaded, as well as +there are reports of issues with training tab on the latest version. + +Use --skip-version-check commandline argument to disable this check. + """.strip()) + + if shared.xformers_available: + import xformers + + if version.parse(xformers.__version__) < version.parse(expected_xformers_version): + print_error_explanation(f""" +You are running xformers {xformers.__version__}. +The program is tested to work with xformers {expected_xformers_version}. +To reinstall the desired version, run with commandline flag --reinstall-xformers. + +Use --skip-version-check commandline argument to disable this check. + """.strip()) + + if gradio.__version__ != expected_gradio_version: + print_error_explanation(f""" +You are running gradio {gradio.__version__}. +The program is designed to work with gradio {expected_gradio_version}. +Using a different version of gradio is extremely likely to break the program. + +Reasons why you have the mismatched gradio version can be: + - you use --skip-install flag. + - you use webui.py to start the program instead of launch.py. + - an extension installs the incompatible gradio version. + +Use --skip-version-check commandline argument to disable this check. + """.strip()) + diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..ee16075b7bd69da5eb982f7b67feb2821514368e --- /dev/null +++ b/modules/esrgan_model.py @@ -0,0 +1,64 @@ +from modules import modelloader, devices, errors +from modules.shared import opts +from modules.upscaler import Upscaler, UpscalerData +from modules.upscaler_utils import upscale_with_model +from modules_forge.forge_util import prepare_free_memory + + +class UpscalerESRGAN(Upscaler): + def __init__(self, dirname): + self.name = "ESRGAN" + self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/ESRGAN.pth" + self.model_name = "ESRGAN_4x" + self.scalers = [] + self.user_path = dirname + super().__init__() + model_paths = self.find_models(ext_filter=[".pt", ".pth"]) + scalers = [] + if len(model_paths) == 0: + scaler_data = UpscalerData(self.model_name, self.model_url, self, 4) + scalers.append(scaler_data) + for file in model_paths: + if file.startswith("http"): + name = self.model_name + else: + name = modelloader.friendly_name(file) + + scaler_data = UpscalerData(name, file, self, 4) + self.scalers.append(scaler_data) + + def do_upscale(self, img, selected_model): + prepare_free_memory() + try: + model = self.load_model(selected_model) + except Exception: + errors.report(f"Unable to load ESRGAN model {selected_model}", exc_info=True) + return img + model.to(devices.device_esrgan) + return esrgan_upscale(model, img) + + def load_model(self, path: str): + if path.startswith("http"): + # TODO: this doesn't use `path` at all? + filename = modelloader.load_file_from_url( + url=self.model_url, + model_dir=self.model_download_path, + file_name=f"{self.model_name}.pth", + ) + else: + filename = path + + return modelloader.load_spandrel_model( + filename, + device=('cpu' if devices.device_esrgan.type == 'mps' else None), + expected_architecture='ESRGAN', + ) + + +def esrgan_upscale(model, img): + return upscale_with_model( + model, + img, + tile_size=opts.ESRGAN_tile, + tile_overlap=opts.ESRGAN_tile_overlap, + ) diff --git a/modules/extensions.py b/modules/extensions.py new file mode 100644 index 0000000000000000000000000000000000000000..917ae8f5ae5c9815df25a41887aff614ea27bd72 --- /dev/null +++ b/modules/extensions.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import configparser +import os +import threading +import re + +from modules import shared, errors, cache, scripts +from modules.gitpython_hack import Repo +from modules.paths_internal import extensions_dir, extensions_builtin_dir, script_path # noqa: F401 +from modules_forge.config import always_disabled_extensions + + +os.makedirs(extensions_dir, exist_ok=True) + + +def active(): + if shared.cmd_opts.disable_all_extensions or shared.opts.disable_all_extensions == "all": + return [] + elif shared.cmd_opts.disable_extra_extensions or shared.opts.disable_all_extensions == "extra": + return [x for x in extensions if x.enabled and x.is_builtin] + else: + return [x for x in extensions if x.enabled] + + +class ExtensionMetadata: + filename = "metadata.ini" + config: configparser.ConfigParser + canonical_name: str + requires: list + + def __init__(self, path, canonical_name): + self.config = configparser.ConfigParser() + + filepath = os.path.join(path, self.filename) + # `self.config.read()` will quietly swallow OSErrors (which FileNotFoundError is), + # so no need to check whether the file exists beforehand. + try: + self.config.read(filepath) + except Exception: + errors.report(f"Error reading {self.filename} for extension {canonical_name}.", exc_info=True) + + self.canonical_name = self.config.get("Extension", "Name", fallback=canonical_name) + self.canonical_name = canonical_name.lower().strip() + + self.requires = self.get_script_requirements("Requires", "Extension") + + def get_script_requirements(self, field, section, extra_section=None): + """reads a list of requirements from the config; field is the name of the field in the ini file, + like Requires or Before, and section is the name of the [section] in the ini file; additionally, + reads more requirements from [extra_section] if specified.""" + + x = self.config.get(section, field, fallback='') + + if extra_section: + x = x + ', ' + self.config.get(extra_section, field, fallback='') + + return self.parse_list(x.lower()) + + def parse_list(self, text): + """converts a line from config ("ext1 ext2, ext3 ") into a python list (["ext1", "ext2", "ext3"])""" + + if not text: + return [] + + # both "," and " " are accepted as separator + return [x for x in re.split(r"[,\s]+", text.strip()) if x] + + +class Extension: + lock = threading.Lock() + cached_fields = ['remote', 'commit_date', 'branch', 'commit_hash', 'version'] + metadata: ExtensionMetadata + + def __init__(self, name, path, enabled=True, is_builtin=False, metadata=None): + self.name = name + self.path = path + self.enabled = enabled + self.status = '' + self.can_update = False + self.is_builtin = is_builtin + self.commit_hash = '' + self.commit_date = None + self.version = '' + self.branch = None + self.remote = None + self.have_info_from_repo = False + self.metadata = metadata if metadata else ExtensionMetadata(self.path, name.lower()) + self.canonical_name = metadata.canonical_name + + def to_dict(self): + return {x: getattr(self, x) for x in self.cached_fields} + + def from_dict(self, d): + for field in self.cached_fields: + setattr(self, field, d[field]) + + def read_info_from_repo(self): + if self.is_builtin or self.have_info_from_repo: + return + + def read_from_repo(): + with self.lock: + if self.have_info_from_repo: + return + + self.do_read_info_from_repo() + + return self.to_dict() + + try: + d = cache.cached_data_for_file('extensions-git', self.name, os.path.join(self.path, ".git"), read_from_repo) + self.from_dict(d) + except FileNotFoundError: + pass + self.status = 'unknown' if self.status == '' else self.status + + def do_read_info_from_repo(self): + repo = None + try: + if os.path.exists(os.path.join(self.path, ".git")): + repo = Repo(self.path) + except Exception: + errors.report(f"Error reading github repository info from {self.path}", exc_info=True) + + if repo is None or repo.bare: + self.remote = None + else: + try: + self.remote = next(repo.remote().urls, None) + commit = repo.head.commit + self.commit_date = commit.committed_date + if repo.active_branch: + self.branch = repo.active_branch.name + self.commit_hash = commit.hexsha + self.version = self.commit_hash[:8] + + except Exception: + errors.report(f"Failed reading extension data from Git repository ({self.name})", exc_info=True) + self.remote = None + + self.have_info_from_repo = True + + def list_files(self, subdir, extension): + dirpath = os.path.join(self.path, subdir) + if not os.path.isdir(dirpath): + return [] + + res = [] + for filename in sorted(os.listdir(dirpath)): + res.append(scripts.ScriptFile(self.path, filename, os.path.join(dirpath, filename))) + + res = [x for x in res if os.path.splitext(x.path)[1].lower() == extension and os.path.isfile(x.path)] + + return res + + def check_updates(self): + repo = Repo(self.path) + for fetch in repo.remote().fetch(dry_run=True): + if fetch.flags != fetch.HEAD_UPTODATE: + self.can_update = True + self.status = "new commits" + return + + try: + origin = repo.rev_parse('origin') + if repo.head.commit != origin: + self.can_update = True + self.status = "behind HEAD" + return + except Exception: + self.can_update = False + self.status = "unknown (remote error)" + return + + self.can_update = False + self.status = "latest" + + def fetch_and_reset_hard(self, commit='origin'): + repo = Repo(self.path) + # Fix: `error: Your local changes to the following files would be overwritten by merge`, + # because WSL2 Docker set 755 file permissions instead of 644, this results to the error. + repo.git.fetch(all=True) + repo.git.reset(commit, hard=True) + self.have_info_from_repo = False + + +def list_extensions(): + extensions.clear() + + if shared.cmd_opts.disable_all_extensions: + print("*** \"--disable-all-extensions\" arg was used, will not load any extensions ***") + elif shared.opts.disable_all_extensions == "all": + print("*** \"Disable all extensions\" option was set, will not load any extensions ***") + elif shared.cmd_opts.disable_extra_extensions: + print("*** \"--disable-extra-extensions\" arg was used, will only load built-in extensions ***") + elif shared.opts.disable_all_extensions == "extra": + print("*** \"Disable all extensions\" option was set, will only load built-in extensions ***") + + loaded_extensions = {} + + # scan through extensions directory and load metadata + for dirname in [extensions_builtin_dir, extensions_dir]: + if not os.path.isdir(dirname): + continue + + for extension_dirname in sorted(os.listdir(dirname)): + path = os.path.join(dirname, extension_dirname) + if not os.path.isdir(path): + continue + + canonical_name = extension_dirname + metadata = ExtensionMetadata(path, canonical_name) + + # check for duplicated canonical names + already_loaded_extension = loaded_extensions.get(metadata.canonical_name) + if already_loaded_extension is not None: + errors.report(f'Duplicate canonical name "{canonical_name}" found in extensions "{extension_dirname}" and "{already_loaded_extension.name}". Former will be discarded.', exc_info=False) + continue + + is_builtin = dirname == extensions_builtin_dir + + disabled_extensions = shared.opts.disabled_extensions + always_disabled_extensions + + extension = Extension( + name=extension_dirname, + path=path, + enabled=extension_dirname not in disabled_extensions, + is_builtin=is_builtin, + metadata=metadata + ) + + extensions.append(extension) + loaded_extensions[canonical_name] = extension + + # check for requirements + for extension in extensions: + if not extension.enabled: + continue + + for req in extension.metadata.requires: + required_extension = loaded_extensions.get(req) + if required_extension is None: + errors.report(f'Extension "{extension.name}" requires "{req}" which is not installed.', exc_info=False) + continue + + if not required_extension.enabled: + errors.report(f'Extension "{extension.name}" requires "{required_extension.name}" which is disabled.', exc_info=False) + continue + + +extensions: list[Extension] = [] diff --git a/modules/extra_networks.py b/modules/extra_networks.py new file mode 100644 index 0000000000000000000000000000000000000000..36ea4f1e15cc58b888c2f4eaa0c84c7ecb32f568 --- /dev/null +++ b/modules/extra_networks.py @@ -0,0 +1,225 @@ +import json +import os +import re +import logging +from collections import defaultdict + +from modules import errors + +extra_network_registry = {} +extra_network_aliases = {} + + +def initialize(): + extra_network_registry.clear() + extra_network_aliases.clear() + + +def register_extra_network(extra_network): + extra_network_registry[extra_network.name] = extra_network + + +def register_extra_network_alias(extra_network, alias): + extra_network_aliases[alias] = extra_network + + +def register_default_extra_networks(): + from modules.extra_networks_hypernet import ExtraNetworkHypernet + register_extra_network(ExtraNetworkHypernet()) + + +class ExtraNetworkParams: + def __init__(self, items=None): + self.items = items or [] + self.positional = [] + self.named = {} + + for item in self.items: + parts = item.split('=', 2) if isinstance(item, str) else [item] + if len(parts) == 2: + self.named[parts[0]] = parts[1] + else: + self.positional.append(item) + + def __eq__(self, other): + return self.items == other.items + + +class ExtraNetwork: + def __init__(self, name): + self.name = name + + def activate(self, p, params_list): + """ + Called by processing on every run. Whatever the extra network is meant to do should be activated here. + Passes arguments related to this extra network in params_list. + User passes arguments by specifying this in his prompt: + + + + Where name matches the name of this ExtraNetwork object, and arg1:arg2:arg3 are any natural number of text arguments + separated by colon. + + Even if the user does not mention this ExtraNetwork in his prompt, the call will stil be made, with empty params_list - + in this case, all effects of this extra networks should be disabled. + + Can be called multiple times before deactivate() - each new call should override the previous call completely. + + For example, if this ExtraNetwork's name is 'hypernet' and user's prompt is: + + > "1girl, " + + params_list will be: + + [ + ExtraNetworkParams(items=["agm", "1.1"]), + ExtraNetworkParams(items=["ray"]) + ] + + """ + raise NotImplementedError + + def deactivate(self, p): + """ + Called at the end of processing for housekeeping. No need to do anything here. + """ + + raise NotImplementedError + + +def lookup_extra_networks(extra_network_data): + """returns a dict mapping ExtraNetwork objects to lists of arguments for those extra networks. + + Example input: + { + 'lora': [], + 'lyco': [], + 'hypernet': [] + } + + Example output: + + { + : [, ], + : [] + } + """ + + res = {} + + for extra_network_name, extra_network_args in list(extra_network_data.items()): + extra_network = extra_network_registry.get(extra_network_name, None) + alias = extra_network_aliases.get(extra_network_name, None) + + if alias is not None and extra_network is None: + extra_network = alias + + if extra_network is None: + logging.info(f"Skipping unknown extra network: {extra_network_name}") + continue + + res.setdefault(extra_network, []).extend(extra_network_args) + + return res + + +def activate(p, extra_network_data): + """call activate for extra networks in extra_network_data in specified order, then call + activate for all remaining registered networks with an empty argument list""" + + activated = [] + + for extra_network, extra_network_args in lookup_extra_networks(extra_network_data).items(): + + try: + extra_network.activate(p, extra_network_args) + activated.append(extra_network) + except Exception as e: + errors.display(e, f"activating extra network {extra_network.name} with arguments {extra_network_args}") + + for extra_network_name, extra_network in extra_network_registry.items(): + if extra_network in activated: + continue + + try: + extra_network.activate(p, []) + except Exception as e: + errors.display(e, f"activating extra network {extra_network_name}") + + if p.scripts is not None: + p.scripts.after_extra_networks_activate(p, batch_number=p.iteration, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds, extra_network_data=extra_network_data) + + +def deactivate(p, extra_network_data): + """call deactivate for extra networks in extra_network_data in specified order, then call + deactivate for all remaining registered networks""" + + data = lookup_extra_networks(extra_network_data) + + for extra_network in data: + try: + extra_network.deactivate(p) + except Exception as e: + errors.display(e, f"deactivating extra network {extra_network.name}") + + for extra_network_name, extra_network in extra_network_registry.items(): + if extra_network in data: + continue + + try: + extra_network.deactivate(p) + except Exception as e: + errors.display(e, f"deactivating unmentioned extra network {extra_network_name}") + + +re_extra_net = re.compile(r"<(\w+):([^>]+)>") + + +def parse_prompt(prompt): + res = defaultdict(list) + + def found(m): + name = m.group(1) + args = m.group(2) + + res[name].append(ExtraNetworkParams(items=args.split(":"))) + + return "" + + prompt = re.sub(re_extra_net, found, prompt) + + return prompt, res + + +def parse_prompts(prompts): + res = [] + extra_data = None + + for prompt in prompts: + updated_prompt, parsed_extra_data = parse_prompt(prompt) + + if extra_data is None: + extra_data = parsed_extra_data + + res.append(updated_prompt) + + return res, extra_data + + +def get_user_metadata(filename, lister=None): + if filename is None: + return {} + + basename, ext = os.path.splitext(filename) + metadata_filename = basename + '.json' + + metadata = {} + try: + exists = lister.exists(metadata_filename) if lister else os.path.exists(metadata_filename) + if exists: + with open(metadata_filename, "r", encoding="utf8") as file: + metadata = json.load(file) + except Exception as e: + errors.display(e, f"reading extra network user metadata from {metadata_filename}") + + return metadata diff --git a/modules/extra_networks_hypernet.py b/modules/extra_networks_hypernet.py new file mode 100644 index 0000000000000000000000000000000000000000..192f11b9cbd88447a0f80dbd2f0ace26d74f18b2 --- /dev/null +++ b/modules/extra_networks_hypernet.py @@ -0,0 +1,28 @@ +from modules import extra_networks, shared +from modules.hypernetworks import hypernetwork + + +class ExtraNetworkHypernet(extra_networks.ExtraNetwork): + def __init__(self): + super().__init__('hypernet') + + def activate(self, p, params_list): + additional = shared.opts.sd_hypernetwork + + if additional != "None" and additional in shared.hypernetworks and not any(x for x in params_list if x.items[0] == additional): + hypernet_prompt_text = f"" + p.all_prompts = [f"{prompt}{hypernet_prompt_text}" for prompt in p.all_prompts] + params_list.append(extra_networks.ExtraNetworkParams(items=[additional, shared.opts.extra_networks_default_multiplier])) + + names = [] + multipliers = [] + for params in params_list: + assert params.items + + names.append(params.items[0]) + multipliers.append(float(params.items[1]) if len(params.items) > 1 else 1.0) + + hypernetwork.load_hypernetworks(names, multipliers) + + def deactivate(self, p): + pass diff --git a/modules/extras.py b/modules/extras.py new file mode 100644 index 0000000000000000000000000000000000000000..4653c3f6edacc067eb935c81d7e71c71790ce449 --- /dev/null +++ b/modules/extras.py @@ -0,0 +1,330 @@ +import os +import re +import shutil +import json + + +import torch +import tqdm + +from modules import shared, images, sd_models, sd_vae, sd_models_config, errors +from modules.ui_common import plaintext_to_html +import gradio as gr +import safetensors.torch + + +def run_pnginfo(image): + if image is None: + return '', '', '' + + geninfo, items = images.read_info_from_image(image) + items = {**{'parameters': geninfo}, **items} + + info = '' + for key, text in items.items(): + info += f""" +
    +

    {plaintext_to_html(str(key))}

    +

    {plaintext_to_html(str(text))}

    +
    +""".strip()+"\n" + + if len(info) == 0: + message = "Nothing found in the image." + info = f"

    {message}

    " + + return '', geninfo, info + + +def create_config(ckpt_result, config_source, a, b, c): + def config(x): + res = sd_models_config.find_checkpoint_config_near_filename(x) if x else None + return res if res != shared.sd_default_config else None + + if config_source == 0: + cfg = config(a) or config(b) or config(c) + elif config_source == 1: + cfg = config(b) + elif config_source == 2: + cfg = config(c) + else: + cfg = None + + if cfg is None: + return + + filename, _ = os.path.splitext(ckpt_result) + checkpoint_filename = filename + ".yaml" + + print("Copying config:") + print(" from:", cfg) + print(" to:", checkpoint_filename) + shutil.copyfile(cfg, checkpoint_filename) + + +checkpoint_dict_skip_on_merge = ["cond_stage_model.transformer.text_model.embeddings.position_ids"] + + +def to_half(tensor, enable): + if enable and tensor.dtype == torch.float: + return tensor.half() + + return tensor + + +def read_metadata(primary_model_name, secondary_model_name, tertiary_model_name): + metadata = {} + + for checkpoint_name in [primary_model_name, secondary_model_name, tertiary_model_name]: + checkpoint_info = sd_models.checkpoints_list.get(checkpoint_name, None) + if checkpoint_info is None: + continue + + metadata.update(checkpoint_info.metadata) + + return json.dumps(metadata, indent=4, ensure_ascii=False) + + +def run_modelmerger(id_task, primary_model_name, secondary_model_name, tertiary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format, config_source, bake_in_vae, discard_weights, save_metadata, add_merge_recipe, copy_metadata_fields, metadata_json): + shared.state.begin(job="model-merge") + + def fail(message): + shared.state.textinfo = message + shared.state.end() + return [*[gr.update() for _ in range(4)], message] + + def weighted_sum(theta0, theta1, alpha): + return ((1 - alpha) * theta0) + (alpha * theta1) + + def get_difference(theta1, theta2): + return theta1 - theta2 + + def add_difference(theta0, theta1_2_diff, alpha): + return theta0 + (alpha * theta1_2_diff) + + def filename_weighted_sum(): + a = primary_model_info.model_name + b = secondary_model_info.model_name + Ma = round(1 - multiplier, 2) + Mb = round(multiplier, 2) + + return f"{Ma}({a}) + {Mb}({b})" + + def filename_add_difference(): + a = primary_model_info.model_name + b = secondary_model_info.model_name + c = tertiary_model_info.model_name + M = round(multiplier, 2) + + return f"{a} + {M}({b} - {c})" + + def filename_nothing(): + return primary_model_info.model_name + + theta_funcs = { + "Weighted sum": (filename_weighted_sum, None, weighted_sum), + "Add difference": (filename_add_difference, get_difference, add_difference), + "No interpolation": (filename_nothing, None, None), + } + filename_generator, theta_func1, theta_func2 = theta_funcs[interp_method] + shared.state.job_count = (1 if theta_func1 else 0) + (1 if theta_func2 else 0) + + if not primary_model_name: + return fail("Failed: Merging requires a primary model.") + + primary_model_info = sd_models.checkpoints_list[primary_model_name] + + if theta_func2 and not secondary_model_name: + return fail("Failed: Merging requires a secondary model.") + + secondary_model_info = sd_models.checkpoints_list[secondary_model_name] if theta_func2 else None + + if theta_func1 and not tertiary_model_name: + return fail(f"Failed: Interpolation method ({interp_method}) requires a tertiary model.") + + tertiary_model_info = sd_models.checkpoints_list[tertiary_model_name] if theta_func1 else None + + result_is_inpainting_model = False + result_is_instruct_pix2pix_model = False + + if theta_func2: + shared.state.textinfo = "Loading B" + print(f"Loading {secondary_model_info.filename}...") + theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu') + else: + theta_1 = None + + if theta_func1: + shared.state.textinfo = "Loading C" + print(f"Loading {tertiary_model_info.filename}...") + theta_2 = sd_models.read_state_dict(tertiary_model_info.filename, map_location='cpu') + + shared.state.textinfo = 'Merging B and C' + shared.state.sampling_steps = len(theta_1.keys()) + for key in tqdm.tqdm(theta_1.keys()): + if key in checkpoint_dict_skip_on_merge: + continue + + if 'model' in key: + if key in theta_2: + t2 = theta_2.get(key, torch.zeros_like(theta_1[key])) + theta_1[key] = theta_func1(theta_1[key], t2) + else: + theta_1[key] = torch.zeros_like(theta_1[key]) + + shared.state.sampling_step += 1 + del theta_2 + + shared.state.nextjob() + + shared.state.textinfo = f"Loading {primary_model_info.filename}..." + print(f"Loading {primary_model_info.filename}...") + theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu') + + print("Merging...") + shared.state.textinfo = 'Merging A and B' + shared.state.sampling_steps = len(theta_0.keys()) + for key in tqdm.tqdm(theta_0.keys()): + if theta_1 and 'model' in key and key in theta_1: + + if key in checkpoint_dict_skip_on_merge: + continue + + a = theta_0[key] + b = theta_1[key] + + # this enables merging an inpainting model (A) with another one (B); + # where normal model would have 4 channels, for latenst space, inpainting model would + # have another 4 channels for unmasked picture's latent space, plus one channel for mask, for a total of 9 + if a.shape != b.shape and a.shape[0:1] + a.shape[2:] == b.shape[0:1] + b.shape[2:]: + if a.shape[1] == 4 and b.shape[1] == 9: + raise RuntimeError("When merging inpainting model with a normal one, A must be the inpainting model.") + if a.shape[1] == 4 and b.shape[1] == 8: + raise RuntimeError("When merging instruct-pix2pix model with a normal one, A must be the instruct-pix2pix model.") + + if a.shape[1] == 8 and b.shape[1] == 4:#If we have an Instruct-Pix2Pix model... + theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)#Merge only the vectors the models have in common. Otherwise we get an error due to dimension mismatch. + result_is_instruct_pix2pix_model = True + else: + assert a.shape[1] == 9 and b.shape[1] == 4, f"Bad dimensions for merged layer {key}: A={a.shape}, B={b.shape}" + theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier) + result_is_inpainting_model = True + else: + theta_0[key] = theta_func2(a, b, multiplier) + + theta_0[key] = to_half(theta_0[key], save_as_half) + + shared.state.sampling_step += 1 + + del theta_1 + + bake_in_vae_filename = sd_vae.vae_dict.get(bake_in_vae, None) + if bake_in_vae_filename is not None: + print(f"Baking in VAE from {bake_in_vae_filename}") + shared.state.textinfo = 'Baking in VAE' + vae_dict = sd_vae.load_vae_dict(bake_in_vae_filename, map_location='cpu') + + for key in vae_dict.keys(): + theta_0_key = 'first_stage_model.' + key + if theta_0_key in theta_0: + theta_0[theta_0_key] = to_half(vae_dict[key], save_as_half) + + del vae_dict + + if save_as_half and not theta_func2: + for key in theta_0.keys(): + theta_0[key] = to_half(theta_0[key], save_as_half) + + if discard_weights: + regex = re.compile(discard_weights) + for key in list(theta_0): + if re.search(regex, key): + theta_0.pop(key, None) + + ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path + + filename = filename_generator() if custom_name == '' else custom_name + filename += ".inpainting" if result_is_inpainting_model else "" + filename += ".instruct-pix2pix" if result_is_instruct_pix2pix_model else "" + filename += "." + checkpoint_format + + output_modelname = os.path.join(ckpt_dir, filename) + + shared.state.nextjob() + shared.state.textinfo = "Saving" + print(f"Saving to {output_modelname}...") + + metadata = {} + + if save_metadata and copy_metadata_fields: + if primary_model_info: + metadata.update(primary_model_info.metadata) + if secondary_model_info: + metadata.update(secondary_model_info.metadata) + if tertiary_model_info: + metadata.update(tertiary_model_info.metadata) + + if save_metadata: + try: + metadata.update(json.loads(metadata_json)) + except Exception as e: + errors.display(e, "readin metadata from json") + + metadata["format"] = "pt" + + if save_metadata and add_merge_recipe: + merge_recipe = { + "type": "webui", # indicate this model was merged with webui's built-in merger + "primary_model_hash": primary_model_info.sha256, + "secondary_model_hash": secondary_model_info.sha256 if secondary_model_info else None, + "tertiary_model_hash": tertiary_model_info.sha256 if tertiary_model_info else None, + "interp_method": interp_method, + "multiplier": multiplier, + "save_as_half": save_as_half, + "custom_name": custom_name, + "config_source": config_source, + "bake_in_vae": bake_in_vae, + "discard_weights": discard_weights, + "is_inpainting": result_is_inpainting_model, + "is_instruct_pix2pix": result_is_instruct_pix2pix_model + } + + sd_merge_models = {} + + def add_model_metadata(checkpoint_info): + checkpoint_info.calculate_shorthash() + sd_merge_models[checkpoint_info.sha256] = { + "name": checkpoint_info.name, + "legacy_hash": checkpoint_info.hash, + "sd_merge_recipe": checkpoint_info.metadata.get("sd_merge_recipe", None) + } + + sd_merge_models.update(checkpoint_info.metadata.get("sd_merge_models", {})) + + add_model_metadata(primary_model_info) + if secondary_model_info: + add_model_metadata(secondary_model_info) + if tertiary_model_info: + add_model_metadata(tertiary_model_info) + + metadata["sd_merge_recipe"] = json.dumps(merge_recipe) + metadata["sd_merge_models"] = json.dumps(sd_merge_models) + + _, extension = os.path.splitext(output_modelname) + if extension.lower() == ".safetensors": + safetensors.torch.save_file(theta_0, output_modelname, metadata=metadata if len(metadata)>0 else None) + else: + torch.save(theta_0, output_modelname) + + sd_models.list_models() + created_model = next((ckpt for ckpt in sd_models.checkpoints_list.values() if ckpt.name == filename), None) + if created_model: + created_model.calculate_shorthash() + + create_config(output_modelname, config_source, primary_model_info, secondary_model_info, tertiary_model_info) + + print(f"Checkpoint saved to {output_modelname}.") + shared.state.textinfo = "Checkpoint saved" + shared.state.end() + + return [*[gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)], "Checkpoint saved to " + output_modelname] diff --git a/modules/face_restoration.py b/modules/face_restoration.py new file mode 100644 index 0000000000000000000000000000000000000000..2c86c6ccce338a1411f4367a0bc6e4046ad67cae --- /dev/null +++ b/modules/face_restoration.py @@ -0,0 +1,19 @@ +from modules import shared + + +class FaceRestoration: + def name(self): + return "None" + + def restore(self, np_image): + return np_image + + +def restore_faces(np_image): + face_restorers = [x for x in shared.face_restorers if x.name() == shared.opts.face_restoration_model or shared.opts.face_restoration_model is None] + if len(face_restorers) == 0: + return np_image + + face_restorer = face_restorers[0] + + return face_restorer.restore(np_image) diff --git a/modules/face_restoration_utils.py b/modules/face_restoration_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1ba017345e2f4ac6d3284bdd1c608f8152ff3609 --- /dev/null +++ b/modules/face_restoration_utils.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import logging +import os +from functools import cached_property +from typing import TYPE_CHECKING, Callable + +import cv2 +import numpy as np +import torch + +from modules import devices, errors, face_restoration, shared +from modules_forge.forge_util import prepare_free_memory + +if TYPE_CHECKING: + from facexlib.utils.face_restoration_helper import FaceRestoreHelper + +logger = logging.getLogger(__name__) + + +def bgr_image_to_rgb_tensor(img: np.ndarray) -> torch.Tensor: + """Convert a BGR NumPy image in [0..1] range to a PyTorch RGB float32 tensor.""" + assert img.shape[2] == 3, "image must be RGB" + if img.dtype == "float64": + img = img.astype("float32") + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return torch.from_numpy(img.transpose(2, 0, 1)).float() + + +def rgb_tensor_to_bgr_image(tensor: torch.Tensor, *, min_max=(0.0, 1.0)) -> np.ndarray: + """ + Convert a PyTorch RGB tensor in range `min_max` to a BGR NumPy image in [0..1] range. + """ + tensor = tensor.squeeze(0).float().detach().cpu().clamp_(*min_max) + tensor = (tensor - min_max[0]) / (min_max[1] - min_max[0]) + assert tensor.dim() == 3, "tensor must be RGB" + img_np = tensor.numpy().transpose(1, 2, 0) + if img_np.shape[2] == 1: # gray image, no RGB/BGR required + return np.squeeze(img_np, axis=2) + return cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) + + +def create_face_helper(device) -> FaceRestoreHelper: + from facexlib.detection import retinaface + from facexlib.utils.face_restoration_helper import FaceRestoreHelper + if hasattr(retinaface, 'device'): + retinaface.device = device + return FaceRestoreHelper( + upscale_factor=1, + face_size=512, + crop_ratio=(1, 1), + det_model='retinaface_resnet50', + save_ext='png', + use_parse=True, + device=device, + ) + + +def restore_with_face_helper( + np_image: np.ndarray, + face_helper: FaceRestoreHelper, + restore_face: Callable[[torch.Tensor], torch.Tensor], +) -> np.ndarray: + """ + Find faces in the image using face_helper, restore them using restore_face, and paste them back into the image. + + `restore_face` should take a cropped face image and return a restored face image. + """ + from torchvision.transforms.functional import normalize + np_image = np_image[:, :, ::-1] + original_resolution = np_image.shape[0:2] + + try: + logger.debug("Detecting faces...") + face_helper.clean_all() + face_helper.read_image(np_image) + face_helper.get_face_landmarks_5(only_center_face=False, resize=640, eye_dist_threshold=5) + face_helper.align_warp_face() + logger.debug("Found %d faces, restoring", len(face_helper.cropped_faces)) + for cropped_face in face_helper.cropped_faces: + cropped_face_t = bgr_image_to_rgb_tensor(cropped_face / 255.0) + normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) + cropped_face_t = cropped_face_t.unsqueeze(0).to(devices.device_codeformer) + + try: + with torch.no_grad(): + cropped_face_t = restore_face(cropped_face_t) + devices.torch_gc() + except Exception: + errors.report('Failed face-restoration inference', exc_info=True) + + restored_face = rgb_tensor_to_bgr_image(cropped_face_t, min_max=(-1, 1)) + restored_face = (restored_face * 255.0).astype('uint8') + face_helper.add_restored_face(restored_face) + + logger.debug("Merging restored faces into image") + face_helper.get_inverse_affine(None) + img = face_helper.paste_faces_to_input_image() + img = img[:, :, ::-1] + if original_resolution != img.shape[0:2]: + img = cv2.resize( + img, + (0, 0), + fx=original_resolution[1] / img.shape[1], + fy=original_resolution[0] / img.shape[0], + interpolation=cv2.INTER_LINEAR, + ) + logger.debug("Face restoration complete") + finally: + face_helper.clean_all() + return img + + +class CommonFaceRestoration(face_restoration.FaceRestoration): + net: torch.Module | None + model_url: str + model_download_name: str + + def __init__(self, model_path: str): + super().__init__() + self.net = None + self.model_path = model_path + os.makedirs(model_path, exist_ok=True) + + @cached_property + def face_helper(self) -> FaceRestoreHelper: + return create_face_helper(self.get_device()) + + def send_model_to(self, device): + if self.net: + logger.debug("Sending %s to %s", self.net, device) + self.net.to(device) + if self.face_helper: + logger.debug("Sending face helper to %s", device) + self.face_helper.face_det.to(device) + self.face_helper.face_parse.to(device) + + def get_device(self): + raise NotImplementedError("get_device must be implemented by subclasses") + + def load_net(self) -> torch.Module: + raise NotImplementedError("load_net must be implemented by subclasses") + + def restore_with_helper( + self, + np_image: np.ndarray, + restore_face: Callable[[torch.Tensor], torch.Tensor], + ) -> np.ndarray: + try: + if self.net is None: + self.net = self.load_net() + except Exception: + logger.warning("Unable to load face-restoration model", exc_info=True) + return np_image + + try: + prepare_free_memory() + self.send_model_to(self.get_device()) + return restore_with_face_helper(np_image, self.face_helper, restore_face) + finally: + if shared.opts.face_restoration_unload: + self.send_model_to(devices.cpu) + + +def patch_facexlib(dirname: str) -> None: + import facexlib.detection + import facexlib.parsing + + det_facex_load_file_from_url = facexlib.detection.load_file_from_url + par_facex_load_file_from_url = facexlib.parsing.load_file_from_url + + def update_kwargs(kwargs): + return dict(kwargs, save_dir=dirname, model_dir=None) + + def facex_load_file_from_url(**kwargs): + return det_facex_load_file_from_url(**update_kwargs(kwargs)) + + def facex_load_file_from_url2(**kwargs): + return par_facex_load_file_from_url(**update_kwargs(kwargs)) + + facexlib.detection.load_file_from_url = facex_load_file_from_url + facexlib.parsing.load_file_from_url = facex_load_file_from_url2 diff --git a/modules/fifo_lock.py b/modules/fifo_lock.py new file mode 100644 index 0000000000000000000000000000000000000000..c35b3ae25a3cf383c8beae04db3e0a3d66785135 --- /dev/null +++ b/modules/fifo_lock.py @@ -0,0 +1,37 @@ +import threading +import collections + + +# reference: https://gist.github.com/vitaliyp/6d54dd76ca2c3cdfc1149d33007dc34a +class FIFOLock(object): + def __init__(self): + self._lock = threading.Lock() + self._inner_lock = threading.Lock() + self._pending_threads = collections.deque() + + def acquire(self, blocking=True): + with self._inner_lock: + lock_acquired = self._lock.acquire(False) + if lock_acquired: + return True + elif not blocking: + return False + + release_event = threading.Event() + self._pending_threads.append(release_event) + + release_event.wait() + return self._lock.acquire() + + def release(self): + with self._inner_lock: + if self._pending_threads: + release_event = self._pending_threads.popleft() + release_event.set() + + self._lock.release() + + __enter__ = acquire + + def __exit__(self, t, v, tb): + self.release() diff --git a/modules/gfpgan_model.py b/modules/gfpgan_model.py new file mode 100644 index 0000000000000000000000000000000000000000..38214be479f73d1ca297218c20279485bd58c899 --- /dev/null +++ b/modules/gfpgan_model.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import logging +import os + +import torch + +from modules import ( + devices, + errors, + face_restoration, + face_restoration_utils, + modelloader, + shared, +) + +logger = logging.getLogger(__name__) +model_url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth" +model_download_name = "GFPGANv1.4.pth" +gfpgan_face_restorer: face_restoration.FaceRestoration | None = None + + +class FaceRestorerGFPGAN(face_restoration_utils.CommonFaceRestoration): + def name(self): + return "GFPGAN" + + def get_device(self): + return devices.device_gfpgan + + def load_net(self) -> torch.Module: + for model_path in modelloader.load_models( + model_path=self.model_path, + model_url=model_url, + command_path=self.model_path, + download_name=model_download_name, + ext_filter=['.pth'], + ): + if 'GFPGAN' in os.path.basename(model_path): + model = modelloader.load_spandrel_model( + model_path, + device=self.get_device(), + expected_architecture='GFPGAN', + ).model + model.different_w = True # see https://github.com/chaiNNer-org/spandrel/pull/81 + return model + raise ValueError("No GFPGAN model found") + + def restore(self, np_image): + def restore_face(cropped_face_t): + assert self.net is not None + return self.net(cropped_face_t, return_rgb=False)[0] + + return self.restore_with_helper(np_image, restore_face) + + +def gfpgan_fix_faces(np_image): + if gfpgan_face_restorer: + return gfpgan_face_restorer.restore(np_image) + logger.warning("GFPGAN face restorer not set up") + return np_image + + +def setup_model(dirname: str) -> None: + global gfpgan_face_restorer + + try: + face_restoration_utils.patch_facexlib(dirname) + gfpgan_face_restorer = FaceRestorerGFPGAN(model_path=dirname) + shared.face_restorers.append(gfpgan_face_restorer) + except Exception: + errors.report("Error setting up GFPGAN", exc_info=True) diff --git a/modules/gitpython_hack.py b/modules/gitpython_hack.py new file mode 100644 index 0000000000000000000000000000000000000000..b55f0640e5ecb945ec72e9aeccd525c6dd9d7cb8 --- /dev/null +++ b/modules/gitpython_hack.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import io +import subprocess + +import git + + +class Git(git.Git): + """ + Git subclassed to never use persistent processes. + """ + + def _get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs): + raise NotImplementedError(f"Refusing to use persistent process: {attr_name} ({cmd_name} {args} {kwargs})") + + def get_object_header(self, ref: str | bytes) -> tuple[str, str, int]: + ret = subprocess.check_output( + [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch-check"], + input=self._prepare_ref(ref), + cwd=self._working_dir, + timeout=2, + ) + return self._parse_object_header(ret) + + def stream_object_data(self, ref: str) -> tuple[str, str, int, Git.CatFileContentStream]: + # Not really streaming, per se; this buffers the entire object in memory. + # Shouldn't be a problem for our use case, since we're only using this for + # object headers (commit objects). + ret = subprocess.check_output( + [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch"], + input=self._prepare_ref(ref), + cwd=self._working_dir, + timeout=30, + ) + bio = io.BytesIO(ret) + hexsha, typename, size = self._parse_object_header(bio.readline()) + return (hexsha, typename, size, self.CatFileContentStream(size, bio)) + + +class Repo(git.Repo): + GitCommandWrapperType = Git diff --git a/modules/gradio_extensons.py b/modules/gradio_extensons.py new file mode 100644 index 0000000000000000000000000000000000000000..34e38700a62fbb512c9b5867829737fd6d17a610 --- /dev/null +++ b/modules/gradio_extensons.py @@ -0,0 +1,83 @@ +import gradio as gr + +from modules import scripts, ui_tempdir, patches + + +def add_classes_to_gradio_component(comp): + """ + this adds gradio-* to the component for css styling (ie gradio-button to gr.Button), as well as some others + """ + + comp.elem_classes = [f"gradio-{comp.get_block_name()}", *(comp.elem_classes or [])] + + if getattr(comp, 'multiselect', False): + comp.elem_classes.append('multiselect') + + +def IOComponent_init(self, *args, **kwargs): + self.webui_tooltip = kwargs.pop('tooltip', None) + + if scripts.scripts_current is not None: + scripts.scripts_current.before_component(self, **kwargs) + + scripts.script_callbacks.before_component_callback(self, **kwargs) + + res = original_IOComponent_init(self, *args, **kwargs) + + add_classes_to_gradio_component(self) + + scripts.script_callbacks.after_component_callback(self, **kwargs) + + if scripts.scripts_current is not None: + scripts.scripts_current.after_component(self, **kwargs) + + return res + + +def Block_get_config(self): + config = original_Block_get_config(self) + + webui_tooltip = getattr(self, 'webui_tooltip', None) + if webui_tooltip: + config["webui_tooltip"] = webui_tooltip + + config.pop('example_inputs', None) + + return config + + +def BlockContext_init(self, *args, **kwargs): + if scripts.scripts_current is not None: + scripts.scripts_current.before_component(self, **kwargs) + + scripts.script_callbacks.before_component_callback(self, **kwargs) + + res = original_BlockContext_init(self, *args, **kwargs) + + add_classes_to_gradio_component(self) + + scripts.script_callbacks.after_component_callback(self, **kwargs) + + if scripts.scripts_current is not None: + scripts.scripts_current.after_component(self, **kwargs) + + return res + + +def Blocks_get_config_file(self, *args, **kwargs): + config = original_Blocks_get_config_file(self, *args, **kwargs) + + for comp_config in config["components"]: + if "example_inputs" in comp_config: + comp_config["example_inputs"] = {"serialized": []} + + return config + + +original_IOComponent_init = patches.patch(__name__, obj=gr.components.IOComponent, field="__init__", replacement=IOComponent_init) +original_Block_get_config = patches.patch(__name__, obj=gr.blocks.Block, field="get_config", replacement=Block_get_config) +original_BlockContext_init = patches.patch(__name__, obj=gr.blocks.BlockContext, field="__init__", replacement=BlockContext_init) +original_Blocks_get_config_file = patches.patch(__name__, obj=gr.blocks.Blocks, field="get_config_file", replacement=Blocks_get_config_file) + + +ui_tempdir.install_ui_tempdir_override() diff --git a/modules/hashes.py b/modules/hashes.py new file mode 100644 index 0000000000000000000000000000000000000000..7c06246c48218aa46a04c78b4061b624665125fd --- /dev/null +++ b/modules/hashes.py @@ -0,0 +1,84 @@ +import hashlib +import os.path + +from modules import shared +import modules.cache + +dump_cache = modules.cache.dump_cache +cache = modules.cache.cache + + +def calculate_sha256(filename): + hash_sha256 = hashlib.sha256() + blksize = 1024 * 1024 + + with open(filename, "rb") as f: + for chunk in iter(lambda: f.read(blksize), b""): + hash_sha256.update(chunk) + + return hash_sha256.hexdigest() + + +def sha256_from_cache(filename, title, use_addnet_hash=False): + hashes = cache("hashes-addnet") if use_addnet_hash else cache("hashes") + try: + ondisk_mtime = os.path.getmtime(filename) + except FileNotFoundError: + return None + + if title not in hashes: + return None + + cached_sha256 = hashes[title].get("sha256", None) + cached_mtime = hashes[title].get("mtime", 0) + + if ondisk_mtime > cached_mtime or cached_sha256 is None: + return None + + return cached_sha256 + + +def sha256(filename, title, use_addnet_hash=False): + hashes = cache("hashes-addnet") if use_addnet_hash else cache("hashes") + + sha256_value = sha256_from_cache(filename, title, use_addnet_hash) + if sha256_value is not None: + return sha256_value + + if shared.cmd_opts.no_hashing: + return None + + print(f"Calculating sha256 for {filename}: ", end='') + if use_addnet_hash: + with open(filename, "rb") as file: + sha256_value = addnet_hash_safetensors(file) + else: + sha256_value = calculate_sha256(filename) + print(f"{sha256_value}") + + hashes[title] = { + "mtime": os.path.getmtime(filename), + "sha256": sha256_value, + } + + dump_cache() + + return sha256_value + + +def addnet_hash_safetensors(b): + """kohya-ss hash for safetensors from https://github.com/kohya-ss/sd-scripts/blob/main/library/train_util.py""" + hash_sha256 = hashlib.sha256() + blksize = 1024 * 1024 + + b.seek(0) + header = b.read(8) + n = int.from_bytes(header, "little") + + offset = n + 8 + b.seek(offset) + for chunk in iter(lambda: b.read(blksize), b""): + hash_sha256.update(chunk) + + return hash_sha256.hexdigest() + diff --git a/modules/hat_model.py b/modules/hat_model.py new file mode 100644 index 0000000000000000000000000000000000000000..eca62f007737ba4e80676f71110d1935cb8f798a --- /dev/null +++ b/modules/hat_model.py @@ -0,0 +1,45 @@ +import os +import sys + +from modules import modelloader, devices +from modules.shared import opts +from modules.upscaler import Upscaler, UpscalerData +from modules.upscaler_utils import upscale_with_model +from modules_forge.forge_util import prepare_free_memory + + +class UpscalerHAT(Upscaler): + def __init__(self, dirname): + self.name = "HAT" + self.scalers = [] + self.user_path = dirname + super().__init__() + for file in self.find_models(ext_filter=[".pt", ".pth"]): + name = modelloader.friendly_name(file) + scale = 4 # TODO: scale might not be 4, but we can't know without loading the model + scaler_data = UpscalerData(name, file, upscaler=self, scale=scale) + self.scalers.append(scaler_data) + + def do_upscale(self, img, selected_model): + prepare_free_memory() + try: + model = self.load_model(selected_model) + except Exception as e: + print(f"Unable to load HAT model {selected_model}: {e}", file=sys.stderr) + return img + model.to(devices.device_esrgan) # TODO: should probably be device_hat + return upscale_with_model( + model, + img, + tile_size=opts.ESRGAN_tile, # TODO: should probably be HAT_tile + tile_overlap=opts.ESRGAN_tile_overlap, # TODO: should probably be HAT_tile_overlap + ) + + def load_model(self, path: str): + if not os.path.isfile(path): + raise FileNotFoundError(f"Model file {path} not found") + return modelloader.load_spandrel_model( + path, + device=devices.device_esrgan, # TODO: should probably be device_hat + expected_architecture='HAT', + ) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py new file mode 100644 index 0000000000000000000000000000000000000000..30086041107f2ca55169721c4ea1e7d06267d80c --- /dev/null +++ b/modules/hypernetworks/hypernetwork.py @@ -0,0 +1,782 @@ +import datetime +import glob +import html +import os +import inspect +from contextlib import closing + +import modules.textual_inversion.dataset +import torch +import tqdm +from einops import rearrange, repeat +from ldm.util import default +from modules import devices, sd_models, shared, sd_samplers, hashes, sd_hijack_checkpoint, errors +from modules.textual_inversion import textual_inversion, logging +from modules.textual_inversion.learn_schedule import LearnRateScheduler +from torch import einsum +from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_ + +from collections import deque +from statistics import stdev, mean + + +optimizer_dict = {optim_name : cls_obj for optim_name, cls_obj in inspect.getmembers(torch.optim, inspect.isclass) if optim_name != "Optimizer"} + +class HypernetworkModule(torch.nn.Module): + activation_dict = { + "linear": torch.nn.Identity, + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, + } + activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) + + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', + add_layer_norm=False, activate_output=False, dropout_structure=None): + super().__init__() + + self.multiplier = 1.0 + + assert layer_structure is not None, "layer_structure must not be None" + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" + + linears = [] + for i in range(len(layer_structure) - 1): + + # Add a fully-connected layer + linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + + # Add an activation func except last layer + if activation_func == "linear" or activation_func is None or (i >= len(layer_structure) - 2 and not activate_output): + pass + elif activation_func in self.activation_dict: + linears.append(self.activation_dict[activation_func]()) + else: + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') + + # Add layer normalization + if add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) + + # Everything should be now parsed into dropout structure, and applied here. + # Since we only have dropouts after layers, dropout structure should start with 0 and end with 0. + if dropout_structure is not None and dropout_structure[i+1] > 0: + assert 0 < dropout_structure[i+1] < 1, "Dropout probability should be 0 or float between 0 and 1!" + linears.append(torch.nn.Dropout(p=dropout_structure[i+1])) + # Code explanation : [1, 2, 1] -> dropout is missing when last_layer_dropout is false. [1, 2, 2, 1] -> [0, 0.3, 0, 0], when its True, [0, 0.3, 0.3, 0]. + + self.linear = torch.nn.Sequential(*linears) + + if state_dict is not None: + self.fix_old_state_dict(state_dict) + self.load_state_dict(state_dict) + else: + for layer in self.linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: + w, b = layer.weight.data, layer.bias.data + if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: + normal_(w, mean=0.0, std=0.01) + normal_(b, mean=0.0, std=0) + elif weight_init == 'XavierUniform': + xavier_uniform_(w) + zeros_(b) + elif weight_init == 'XavierNormal': + xavier_normal_(w) + zeros_(b) + elif weight_init == 'KaimingUniform': + kaiming_uniform_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + elif weight_init == 'KaimingNormal': + kaiming_normal_(w, nonlinearity='leaky_relu' if 'leakyrelu' == activation_func else 'relu') + zeros_(b) + else: + raise KeyError(f"Key {weight_init} is not defined as initialization!") + self.to(devices.device) + + def fix_old_state_dict(self, state_dict): + changes = { + 'linear1.bias': 'linear.0.bias', + 'linear1.weight': 'linear.0.weight', + 'linear2.bias': 'linear.1.bias', + 'linear2.weight': 'linear.1.weight', + } + + for fr, to in changes.items(): + x = state_dict.get(fr, None) + if x is None: + continue + + del state_dict[fr] + state_dict[to] = x + + def forward(self, x): + return x + self.linear(x) * (self.multiplier if not self.training else 1) + + def trainables(self): + layer_structure = [] + for layer in self.linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: + layer_structure += [layer.weight, layer.bias] + return layer_structure + + +#param layer_structure : sequence used for length, use_dropout : controlling boolean, last_layer_dropout : for compatibility check. +def parse_dropout_structure(layer_structure, use_dropout, last_layer_dropout): + if layer_structure is None: + layer_structure = [1, 2, 1] + if not use_dropout: + return [0] * len(layer_structure) + dropout_values = [0] + dropout_values.extend([0.3] * (len(layer_structure) - 3)) + if last_layer_dropout: + dropout_values.append(0.3) + else: + dropout_values.append(0) + dropout_values.append(0) + return dropout_values + + +class Hypernetwork: + filename = None + name = None + + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): + self.filename = None + self.name = name + self.layers = {} + self.step = 0 + self.sd_checkpoint = None + self.sd_checkpoint_name = None + self.layer_structure = layer_structure + self.activation_func = activation_func + self.weight_init = weight_init + self.add_layer_norm = add_layer_norm + self.use_dropout = use_dropout + self.activate_output = activate_output + self.last_layer_dropout = kwargs.get('last_layer_dropout', True) + self.dropout_structure = kwargs.get('dropout_structure', None) + if self.dropout_structure is None: + self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) + self.optimizer_name = None + self.optimizer_state_dict = None + self.optional_info = None + + for size in enable_sizes or []: + self.layers[size] = ( + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.activate_output, dropout_structure=self.dropout_structure), + ) + self.eval() + + def weights(self): + res = [] + for layers in self.layers.values(): + for layer in layers: + res += layer.parameters() + return res + + def train(self, mode=True): + for layers in self.layers.values(): + for layer in layers: + layer.train(mode=mode) + for param in layer.parameters(): + param.requires_grad = mode + + def to(self, device): + for layers in self.layers.values(): + for layer in layers: + layer.to(device) + + return self + + def set_multiplier(self, multiplier): + for layers in self.layers.values(): + for layer in layers: + layer.multiplier = multiplier + + return self + + def eval(self): + for layers in self.layers.values(): + for layer in layers: + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def save(self, filename): + state_dict = {} + optimizer_saved_dict = {} + + for k, v in self.layers.items(): + state_dict[k] = (v[0].state_dict(), v[1].state_dict()) + + state_dict['step'] = self.step + state_dict['name'] = self.name + state_dict['layer_structure'] = self.layer_structure + state_dict['activation_func'] = self.activation_func + state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['weight_initialization'] = self.weight_init + state_dict['sd_checkpoint'] = self.sd_checkpoint + state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name + state_dict['activate_output'] = self.activate_output + state_dict['use_dropout'] = self.use_dropout + state_dict['dropout_structure'] = self.dropout_structure + state_dict['last_layer_dropout'] = (self.dropout_structure[-2] != 0) if self.dropout_structure is not None else self.last_layer_dropout + state_dict['optional_info'] = self.optional_info if self.optional_info else None + + if self.optimizer_name is not None: + optimizer_saved_dict['optimizer_name'] = self.optimizer_name + + torch.save(state_dict, filename) + if shared.opts.save_optimizer_state and self.optimizer_state_dict: + optimizer_saved_dict['hash'] = self.shorthash() + optimizer_saved_dict['optimizer_state_dict'] = self.optimizer_state_dict + torch.save(optimizer_saved_dict, filename + '.optim') + + def load(self, filename): + self.filename = filename + if self.name is None: + self.name = os.path.splitext(os.path.basename(filename))[0] + + state_dict = torch.load(filename, map_location='cpu') + + self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + self.optional_info = state_dict.get('optional_info', None) + self.activation_func = state_dict.get('activation_func', None) + self.weight_init = state_dict.get('weight_initialization', 'Normal') + self.add_layer_norm = state_dict.get('is_layer_norm', False) + self.dropout_structure = state_dict.get('dropout_structure', None) + self.use_dropout = True if self.dropout_structure is not None and any(self.dropout_structure) else state_dict.get('use_dropout', False) + self.activate_output = state_dict.get('activate_output', True) + self.last_layer_dropout = state_dict.get('last_layer_dropout', False) + # Dropout structure should have same length as layer structure, Every digits should be in [0,1), and last digit must be 0. + if self.dropout_structure is None: + self.dropout_structure = parse_dropout_structure(self.layer_structure, self.use_dropout, self.last_layer_dropout) + + if shared.opts.print_hypernet_extra: + if self.optional_info is not None: + print(f" INFO:\n {self.optional_info}\n") + + print(f" Layer structure: {self.layer_structure}") + print(f" Activation function: {self.activation_func}") + print(f" Weight initialization: {self.weight_init}") + print(f" Layer norm: {self.add_layer_norm}") + print(f" Dropout usage: {self.use_dropout}" ) + print(f" Activate last layer: {self.activate_output}") + print(f" Dropout structure: {self.dropout_structure}") + + optimizer_saved_dict = torch.load(self.filename + '.optim', map_location='cpu') if os.path.exists(self.filename + '.optim') else {} + + if self.shorthash() == optimizer_saved_dict.get('hash', None): + self.optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None) + else: + self.optimizer_state_dict = None + if self.optimizer_state_dict: + self.optimizer_name = optimizer_saved_dict.get('optimizer_name', 'AdamW') + if shared.opts.print_hypernet_extra: + print("Loaded existing optimizer from checkpoint") + print(f"Optimizer name is {self.optimizer_name}") + else: + self.optimizer_name = "AdamW" + if shared.opts.print_hypernet_extra: + print("No saved optimizer exists in checkpoint") + + for size, sd in state_dict.items(): + if type(size) == int: + self.layers[size] = ( + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.activate_output, self.dropout_structure), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.activate_output, self.dropout_structure), + ) + + self.name = state_dict.get('name', self.name) + self.step = state_dict.get('step', 0) + self.sd_checkpoint = state_dict.get('sd_checkpoint', None) + self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) + self.eval() + + def shorthash(self): + sha256 = hashes.sha256(self.filename, f'hypernet/{self.name}') + + return sha256[0:10] if sha256 else None + + +def list_hypernetworks(path): + res = {} + for filename in sorted(glob.iglob(os.path.join(path, '**/*.pt'), recursive=True), key=str.lower): + name = os.path.splitext(os.path.basename(filename))[0] + # Prevent a hypothetical "None.pt" from being listed. + if name != "None": + res[name] = filename + return res + + +def load_hypernetwork(name): + path = shared.hypernetworks.get(name, None) + + if path is None: + return None + + try: + hypernetwork = Hypernetwork() + hypernetwork.load(path) + return hypernetwork + except Exception: + errors.report(f"Error loading hypernetwork {path}", exc_info=True) + return None + + +def load_hypernetworks(names, multipliers=None): + already_loaded = {} + + for hypernetwork in shared.loaded_hypernetworks: + if hypernetwork.name in names: + already_loaded[hypernetwork.name] = hypernetwork + + shared.loaded_hypernetworks.clear() + + for i, name in enumerate(names): + hypernetwork = already_loaded.get(name, None) + if hypernetwork is None: + hypernetwork = load_hypernetwork(name) + + if hypernetwork is None: + continue + + hypernetwork.set_multiplier(multipliers[i] if multipliers else 1.0) + shared.loaded_hypernetworks.append(hypernetwork) + + +def apply_single_hypernetwork(hypernetwork, context_k, context_v, layer=None): + hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context_k.shape[2], None) + + if hypernetwork_layers is None: + return context_k, context_v + + if layer is not None: + layer.hyper_k = hypernetwork_layers[0] + layer.hyper_v = hypernetwork_layers[1] + + context_k = devices.cond_cast_unet(hypernetwork_layers[0](devices.cond_cast_float(context_k))) + context_v = devices.cond_cast_unet(hypernetwork_layers[1](devices.cond_cast_float(context_v))) + return context_k, context_v + + +def apply_hypernetworks(hypernetworks, context, layer=None): + context_k = context + context_v = context + for hypernetwork in hypernetworks: + context_k, context_v = apply_single_hypernetwork(hypernetwork, context_k, context_v, layer) + + return context_k, context_v + + +def attention_CrossAttention_forward(self, x, context=None, mask=None, **kwargs): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + + context_k, context_v = apply_hypernetworks(shared.loaded_hypernetworks, context, self) + k = self.to_k(context_k) + v = self.to_v(context_v) + + q, k, v = (rearrange(t, 'b n (h d) -> (b h) n d', h=h) for t in (q, k, v)) + + sim = einsum('b i d, b j d -> b i j', q, k) * self.scale + + if mask is not None: + mask = rearrange(mask, 'b ... -> b (...)') + max_neg_value = -torch.finfo(sim.dtype).max + mask = repeat(mask, 'b j -> (b h) () j', h=h) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = sim.softmax(dim=-1) + + out = einsum('b i j, b j d -> b i d', attn, v) + out = rearrange(out, '(b h) n d -> b n (h d)', h=h) + return self.to_out(out) + + +def stack_conds(conds): + if len(conds) == 1: + return torch.stack(conds) + + # same as in reconstruct_multicond_batch + token_count = max([x.shape[0] for x in conds]) + for i in range(len(conds)): + if conds[i].shape[0] != token_count: + last_vector = conds[i][-1:] + last_vector_repeated = last_vector.repeat([token_count - conds[i].shape[0], 1]) + conds[i] = torch.vstack([conds[i], last_vector_repeated]) + + return torch.stack(conds) + + +def statistics(data): + if len(data) < 2: + std = 0 + else: + std = stdev(data) + total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std/ (len(data) ** 0.5):.3f})" + recent_data = data[-32:] + if len(recent_data) < 2: + std = 0 + else: + std = stdev(recent_data) + recent_information = f"recent 32 loss:{mean(recent_data):.3f}" + u"\u00B1" + f"({std / (len(recent_data) ** 0.5):.3f})" + return total_information, recent_information + + +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None): + # Remove illegal characters from name. + name = "".join( x for x in name if (x.isalnum() or x in "._- ")) + assert name, "Name cannot be empty!" + + fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") + if not overwrite_old: + assert not os.path.exists(fn), f"file {fn} already exists" + + if type(layer_structure) == str: + layer_structure = [float(x.strip()) for x in layer_structure.split(",")] + + if use_dropout and dropout_structure and type(dropout_structure) == str: + dropout_structure = [float(x.strip()) for x in dropout_structure.split(",")] + else: + dropout_structure = [0] * len(layer_structure) + + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( + name=name, + enable_sizes=[int(x) for x in enable_sizes], + layer_structure=layer_structure, + activation_func=activation_func, + weight_init=weight_init, + add_layer_norm=add_layer_norm, + use_dropout=use_dropout, + dropout_structure=dropout_structure + ) + hypernet.save(fn) + + shared.reload_hypernetworks() + + +def train_hypernetwork(id_task, hypernetwork_name: str, learn_rate: float, batch_size: int, gradient_step: int, data_root: str, log_directory: str, training_width: int, training_height: int, varsize: bool, steps: int, clip_grad_mode: str, clip_grad_value: float, shuffle_tags: bool, tag_drop_out: bool, latent_sampling_method: str, use_weight: bool, create_image_every: int, save_hypernetwork_every: int, template_filename: str, preview_from_txt2img: bool, preview_prompt: str, preview_negative_prompt: str, preview_steps: int, preview_sampler_name: str, preview_cfg_scale: float, preview_seed: int, preview_width: int, preview_height: int): + from modules import images, processing + + save_hypernetwork_every = save_hypernetwork_every or 0 + create_image_every = create_image_every or 0 + template_file = textual_inversion.textual_inversion_templates.get(template_filename, None) + textual_inversion.validate_train_inputs(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, template_file, template_filename, steps, save_hypernetwork_every, create_image_every, log_directory, name="hypernetwork") + template_file = template_file.path + + path = shared.hypernetworks.get(hypernetwork_name, None) + hypernetwork = Hypernetwork() + hypernetwork.load(path) + shared.loaded_hypernetworks = [hypernetwork] + + shared.state.job = "train-hypernetwork" + shared.state.textinfo = "Initializing hypernetwork training..." + shared.state.job_count = steps + + hypernetwork_name = hypernetwork_name.rsplit('(', 1)[0] + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') + + log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name) + unload = shared.opts.unload_models_when_training + + if save_hypernetwork_every > 0: + hypernetwork_dir = os.path.join(log_directory, "hypernetworks") + os.makedirs(hypernetwork_dir, exist_ok=True) + else: + hypernetwork_dir = None + + if create_image_every > 0: + images_dir = os.path.join(log_directory, "images") + os.makedirs(images_dir, exist_ok=True) + else: + images_dir = None + + checkpoint = sd_models.select_checkpoint() + + initial_step = hypernetwork.step or 0 + if initial_step >= steps: + shared.state.textinfo = "Model has already been trained beyond specified max steps" + return hypernetwork, filename + + scheduler = LearnRateScheduler(learn_rate, steps, initial_step) + + clip_grad = torch.nn.utils.clip_grad_value_ if clip_grad_mode == "value" else torch.nn.utils.clip_grad_norm_ if clip_grad_mode == "norm" else None + if clip_grad: + clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, initial_step, verbose=False) + + if shared.opts.training_enable_tensorboard: + tensorboard_writer = textual_inversion.tensorboard_setup(log_directory) + + # dataset loading may take a while, so input validations and early returns should be done before this + shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." + + pin_memory = shared.opts.pin_memory + + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize, use_weight=use_weight) + + if shared.opts.save_training_settings_to_txt: + saved_params = dict( + model_name=checkpoint.model_name, model_hash=checkpoint.shorthash, num_of_dataset_images=len(ds), + **{field: getattr(hypernetwork, field) for field in ['layer_structure', 'activation_func', 'weight_init', 'add_layer_norm', 'use_dropout', ]} + ) + logging.save_settings_to_file(log_directory, {**saved_params, **locals()}) + + latent_sampling_method = ds.latent_sampling_method + + dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory) + + old_parallel_processing_allowed = shared.parallel_processing_allowed + + if unload: + shared.parallel_processing_allowed = False + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + weights = hypernetwork.weights() + hypernetwork.train() + + # Here we use optimizer from saved HN, or we can specify as UI option. + if hypernetwork.optimizer_name in optimizer_dict: + optimizer = optimizer_dict[hypernetwork.optimizer_name](params=weights, lr=scheduler.learn_rate) + optimizer_name = hypernetwork.optimizer_name + else: + print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!") + optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate) + optimizer_name = 'AdamW' + + if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer. + try: + optimizer.load_state_dict(hypernetwork.optimizer_state_dict) + except RuntimeError as e: + print("Cannot resume from saved optimizer!") + print(e) + + scaler = torch.cuda.amp.GradScaler() + + batch_size = ds.batch_size + gradient_step = ds.gradient_step + # n steps = batch_size * gradient_step * n image processed + steps_per_epoch = len(ds) // batch_size // gradient_step + max_steps_per_epoch = len(ds) // batch_size - (len(ds) // batch_size) % gradient_step + loss_step = 0 + _loss_step = 0 #internal + # size = len(ds.indexes) + # loss_dict = defaultdict(lambda : deque(maxlen = 1024)) + loss_logging = deque(maxlen=len(ds) * 3) # this should be configurable parameter, this is 3 * epoch(dataset size) + # losses = torch.zeros((size,)) + # previous_mean_losses = [0] + # previous_mean_loss = 0 + # print("Mean loss of {} elements".format(size)) + + steps_without_grad = 0 + + last_saved_file = "" + last_saved_image = "" + forced_filename = "" + + pbar = tqdm.tqdm(total=steps - initial_step) + try: + sd_hijack_checkpoint.add() + + for _ in range((steps-initial_step) * gradient_step): + if scheduler.finished: + break + if shared.state.interrupted: + break + for j, batch in enumerate(dl): + # works as a drop_last=True for gradient accumulation + if j == max_steps_per_epoch: + break + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break + if shared.state.interrupted: + break + + if clip_grad: + clip_grad_sched.step(hypernetwork.step) + + with devices.autocast(): + x = batch.latent_sample.to(devices.device, non_blocking=pin_memory) + if use_weight: + w = batch.weight.to(devices.device, non_blocking=pin_memory) + if tag_drop_out != 0 or shuffle_tags: + shared.sd_model.cond_stage_model.to(devices.device) + c = shared.sd_model.cond_stage_model(batch.cond_text).to(devices.device, non_blocking=pin_memory) + shared.sd_model.cond_stage_model.to(devices.cpu) + else: + c = stack_conds(batch.cond).to(devices.device, non_blocking=pin_memory) + if use_weight: + loss = shared.sd_model.weighted_forward(x, c, w)[0] / gradient_step + del w + else: + loss = shared.sd_model.forward(x, c)[0] / gradient_step + del x + del c + + _loss_step += loss.item() + scaler.scale(loss).backward() + + # go back until we reach gradient accumulation steps + if (j + 1) % gradient_step != 0: + continue + loss_logging.append(_loss_step) + if clip_grad: + clip_grad(weights, clip_grad_sched.learn_rate) + + scaler.step(optimizer) + scaler.update() + hypernetwork.step += 1 + pbar.update() + optimizer.zero_grad(set_to_none=True) + loss_step = _loss_step + _loss_step = 0 + + steps_done = hypernetwork.step + 1 + + epoch_num = hypernetwork.step // steps_per_epoch + epoch_step = hypernetwork.step % steps_per_epoch + + description = f"Training hypernetwork [Epoch {epoch_num}: {epoch_step+1}/{steps_per_epoch}]loss: {loss_step:.7f}" + pbar.set_description(description) + if hypernetwork_dir is not None and steps_done % save_hypernetwork_every == 0: + # Before saving, change name to match current checkpoint. + hypernetwork_name_every = f'{hypernetwork_name}-{steps_done}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name_every}.pt') + hypernetwork.optimizer_name = optimizer_name + if shared.opts.save_optimizer_state: + hypernetwork.optimizer_state_dict = optimizer.state_dict() + save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, last_saved_file) + hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. + + + + if shared.opts.training_enable_tensorboard: + epoch_num = hypernetwork.step // len(ds) + epoch_step = hypernetwork.step - (epoch_num * len(ds)) + 1 + mean_loss = sum(loss_logging) / len(loss_logging) + textual_inversion.tensorboard_add(tensorboard_writer, loss=mean_loss, global_step=hypernetwork.step, step=epoch_step, learn_rate=scheduler.learn_rate, epoch_num=epoch_num) + + textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, steps_per_epoch, { + "loss": f"{loss_step:.7f}", + "learn_rate": scheduler.learn_rate + }) + + if images_dir is not None and steps_done % create_image_every == 0: + forced_filename = f'{hypernetwork_name}-{steps_done}' + last_saved_image = os.path.join(images_dir, forced_filename) + hypernetwork.eval() + rng_state = torch.get_rng_state() + cuda_rng_state = None + if torch.cuda.is_available(): + cuda_rng_state = torch.cuda.get_rng_state_all() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + p.disable_extra_networks = True + + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_name = sd_samplers.samplers_map[preview_sampler_name.lower()] + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = batch.cond_text[0] + p.steps = 20 + p.width = training_width + p.height = training_height + + preview_text = p.prompt + + with closing(p): + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images) > 0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + torch.set_rng_state(rng_state) + if torch.cuda.is_available(): + torch.cuda.set_rng_state_all(cuda_rng_state) + hypernetwork.train() + if image is not None: + shared.state.assign_current_image(image) + if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images: + textual_inversion.tensorboard_add_image(tensorboard_writer, + f"Validation at epoch {epoch_num}", image, + hypernetwork.step) + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False) + last_saved_image += f", prompt: {preview_text}" + + shared.state.job_no = hypernetwork.step + + shared.state.textinfo = f""" +

    +Loss: {loss_step:.7f}
    +Step: {steps_done}
    +Last prompt: {html.escape(batch.cond_text[0])}
    +Last saved hypernetwork: {html.escape(last_saved_file)}
    +Last saved image: {html.escape(last_saved_image)}
    +

    +""" + except Exception: + errors.report("Exception in training hypernetwork", exc_info=True) + finally: + pbar.leave = False + pbar.close() + hypernetwork.eval() + sd_hijack_checkpoint.remove() + + + + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') + hypernetwork.optimizer_name = optimizer_name + if shared.opts.save_optimizer_state: + hypernetwork.optimizer_state_dict = optimizer.state_dict() + save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename) + + del optimizer + hypernetwork.optimizer_state_dict = None # dereference it after saving, to save memory. + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + shared.parallel_processing_allowed = old_parallel_processing_allowed + + return hypernetwork, filename + +def save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename): + old_hypernetwork_name = hypernetwork.name + old_sd_checkpoint = hypernetwork.sd_checkpoint if hasattr(hypernetwork, "sd_checkpoint") else None + old_sd_checkpoint_name = hypernetwork.sd_checkpoint_name if hasattr(hypernetwork, "sd_checkpoint_name") else None + try: + hypernetwork.sd_checkpoint = checkpoint.shorthash + hypernetwork.sd_checkpoint_name = checkpoint.model_name + hypernetwork.name = hypernetwork_name + hypernetwork.save(filename) + except: + hypernetwork.sd_checkpoint = old_sd_checkpoint + hypernetwork.sd_checkpoint_name = old_sd_checkpoint_name + hypernetwork.name = old_hypernetwork_name + raise diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..351910461dadbf3bfe027e542e0fddf896352d17 --- /dev/null +++ b/modules/hypernetworks/ui.py @@ -0,0 +1,38 @@ +import html + +import gradio as gr +import modules.hypernetworks.hypernetwork +from modules import devices, sd_hijack, shared + +not_available = ["hardswish", "multiheadattention"] +keys = [x for x in modules.hypernetworks.hypernetwork.HypernetworkModule.activation_dict if x not in not_available] + + +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, dropout_structure=None): + filename = modules.hypernetworks.hypernetwork.create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout, dropout_structure) + + return gr.Dropdown.update(choices=sorted(shared.hypernetworks)), f"Created: {filename}", "" + + +def train_hypernetwork(*args): + shared.loaded_hypernetworks = [] + + assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible' + + try: + sd_hijack.undo_optimizations() + + hypernetwork, filename = modules.hypernetworks.hypernetwork.train_hypernetwork(*args) + + res = f""" +Training {'interrupted' if shared.state.interrupted else 'finished'} at {hypernetwork.step} steps. +Hypernetwork saved to {html.escape(filename)} +""" + return res, "" + except Exception: + raise + finally: + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + sd_hijack.apply_optimizations() + diff --git a/modules/images.py b/modules/images.py new file mode 100644 index 0000000000000000000000000000000000000000..0aa2d2efc59ce264ea52833ad79d596c329e613b --- /dev/null +++ b/modules/images.py @@ -0,0 +1,799 @@ +from __future__ import annotations + +import datetime + +import pytz +import io +import math +import os +from collections import namedtuple +import re + +import numpy as np +import piexif +import piexif.helper +from PIL import Image, ImageFont, ImageDraw, ImageColor, PngImagePlugin +import string +import json +import hashlib + +from modules import sd_samplers, shared, script_callbacks, errors +from modules.paths_internal import roboto_ttf_file +from modules.shared import opts + +LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + + +def get_font(fontsize: int): + try: + return ImageFont.truetype(opts.font or roboto_ttf_file, fontsize) + except Exception: + return ImageFont.truetype(roboto_ttf_file, fontsize) + + +def image_grid(imgs, batch_size=1, rows=None): + if rows is None: + if opts.n_rows > 0: + rows = opts.n_rows + elif opts.n_rows == 0: + rows = batch_size + elif opts.grid_prevent_empty_spots: + rows = math.floor(math.sqrt(len(imgs))) + while len(imgs) % rows != 0: + rows -= 1 + else: + rows = math.sqrt(len(imgs)) + rows = round(rows) + if rows > len(imgs): + rows = len(imgs) + + cols = math.ceil(len(imgs) / rows) + + params = script_callbacks.ImageGridLoopParams(imgs, cols, rows) + script_callbacks.image_grid_callback(params) + + w, h = imgs[0].size + grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color='black') + + for i, img in enumerate(params.imgs): + grid.paste(img, box=(i % params.cols * w, i // params.cols * h)) + + return grid + + +class Grid(namedtuple("_Grid", ["tiles", "tile_w", "tile_h", "image_w", "image_h", "overlap"])): + @property + def tile_count(self) -> int: + """ + The total number of tiles in the grid. + """ + return sum(len(row[2]) for row in self.tiles) + + +def split_grid(image: Image.Image, tile_w: int = 512, tile_h: int = 512, overlap: int = 64) -> Grid: + w, h = image.size + + non_overlap_width = tile_w - overlap + non_overlap_height = tile_h - overlap + + cols = math.ceil((w - overlap) / non_overlap_width) + rows = math.ceil((h - overlap) / non_overlap_height) + + dx = (w - tile_w) / (cols - 1) if cols > 1 else 0 + dy = (h - tile_h) / (rows - 1) if rows > 1 else 0 + + grid = Grid([], tile_w, tile_h, w, h, overlap) + for row in range(rows): + row_images = [] + + y = int(row * dy) + + if y + tile_h >= h: + y = h - tile_h + + for col in range(cols): + x = int(col * dx) + + if x + tile_w >= w: + x = w - tile_w + + tile = image.crop((x, y, x + tile_w, y + tile_h)) + + row_images.append([x, tile_w, tile]) + + grid.tiles.append([y, tile_h, row_images]) + + return grid + + +def combine_grid(grid): + def make_mask_image(r): + r = r * 255 / grid.overlap + r = r.astype(np.uint8) + return Image.fromarray(r, 'L') + + mask_w = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((1, grid.overlap)).repeat(grid.tile_h, axis=0)) + mask_h = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((grid.overlap, 1)).repeat(grid.image_w, axis=1)) + + combined_image = Image.new("RGB", (grid.image_w, grid.image_h)) + for y, h, row in grid.tiles: + combined_row = Image.new("RGB", (grid.image_w, h)) + for x, w, tile in row: + if x == 0: + combined_row.paste(tile, (0, 0)) + continue + + combined_row.paste(tile.crop((0, 0, grid.overlap, h)), (x, 0), mask=mask_w) + combined_row.paste(tile.crop((grid.overlap, 0, w, h)), (x + grid.overlap, 0)) + + if y == 0: + combined_image.paste(combined_row, (0, 0)) + continue + + combined_image.paste(combined_row.crop((0, 0, combined_row.width, grid.overlap)), (0, y), mask=mask_h) + combined_image.paste(combined_row.crop((0, grid.overlap, combined_row.width, h)), (0, y + grid.overlap)) + + return combined_image + + +class GridAnnotation: + def __init__(self, text='', is_active=True): + self.text = text + self.is_active = is_active + self.size = None + + +def draw_grid_annotations(im, width, height, hor_texts, ver_texts, margin=0): + + color_active = ImageColor.getcolor(opts.grid_text_active_color, 'RGB') + color_inactive = ImageColor.getcolor(opts.grid_text_inactive_color, 'RGB') + color_background = ImageColor.getcolor(opts.grid_background_color, 'RGB') + + def wrap(drawing, text, font, line_length): + lines = [''] + for word in text.split(): + line = f'{lines[-1]} {word}'.strip() + if drawing.textlength(line, font=font) <= line_length: + lines[-1] = line + else: + lines.append(word) + return lines + + def draw_texts(drawing, draw_x, draw_y, lines, initial_fnt, initial_fontsize): + for line in lines: + fnt = initial_fnt + fontsize = initial_fontsize + while drawing.multiline_textsize(line.text, font=fnt)[0] > line.allowed_width and fontsize > 0: + fontsize -= 1 + fnt = get_font(fontsize) + drawing.multiline_text((draw_x, draw_y + line.size[1] / 2), line.text, font=fnt, fill=color_active if line.is_active else color_inactive, anchor="mm", align="center") + + if not line.is_active: + drawing.line((draw_x - line.size[0] // 2, draw_y + line.size[1] // 2, draw_x + line.size[0] // 2, draw_y + line.size[1] // 2), fill=color_inactive, width=4) + + draw_y += line.size[1] + line_spacing + + fontsize = (width + height) // 25 + line_spacing = fontsize // 2 + + fnt = get_font(fontsize) + + pad_left = 0 if sum([sum([len(line.text) for line in lines]) for lines in ver_texts]) == 0 else width * 3 // 4 + + cols = im.width // width + rows = im.height // height + + assert cols == len(hor_texts), f'bad number of horizontal texts: {len(hor_texts)}; must be {cols}' + assert rows == len(ver_texts), f'bad number of vertical texts: {len(ver_texts)}; must be {rows}' + + calc_img = Image.new("RGB", (1, 1), color_background) + calc_d = ImageDraw.Draw(calc_img) + + for texts, allowed_width in zip(hor_texts + ver_texts, [width] * len(hor_texts) + [pad_left] * len(ver_texts)): + items = [] + texts + texts.clear() + + for line in items: + wrapped = wrap(calc_d, line.text, fnt, allowed_width) + texts += [GridAnnotation(x, line.is_active) for x in wrapped] + + for line in texts: + bbox = calc_d.multiline_textbbox((0, 0), line.text, font=fnt) + line.size = (bbox[2] - bbox[0], bbox[3] - bbox[1]) + line.allowed_width = allowed_width + + hor_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing for lines in hor_texts] + ver_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing * len(lines) for lines in ver_texts] + + pad_top = 0 if sum(hor_text_heights) == 0 else max(hor_text_heights) + line_spacing * 2 + + result = Image.new("RGB", (im.width + pad_left + margin * (cols-1), im.height + pad_top + margin * (rows-1)), color_background) + + for row in range(rows): + for col in range(cols): + cell = im.crop((width * col, height * row, width * (col+1), height * (row+1))) + result.paste(cell, (pad_left + (width + margin) * col, pad_top + (height + margin) * row)) + + d = ImageDraw.Draw(result) + + for col in range(cols): + x = pad_left + (width + margin) * col + width / 2 + y = pad_top / 2 - hor_text_heights[col] / 2 + + draw_texts(d, x, y, hor_texts[col], fnt, fontsize) + + for row in range(rows): + x = pad_left / 2 + y = pad_top + (height + margin) * row + height / 2 - ver_text_heights[row] / 2 + + draw_texts(d, x, y, ver_texts[row], fnt, fontsize) + + return result + + +def draw_prompt_matrix(im, width, height, all_prompts, margin=0): + prompts = all_prompts[1:] + boundary = math.ceil(len(prompts) / 2) + + prompts_horiz = prompts[:boundary] + prompts_vert = prompts[boundary:] + + hor_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_horiz)] for pos in range(1 << len(prompts_horiz))] + ver_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_vert)] for pos in range(1 << len(prompts_vert))] + + return draw_grid_annotations(im, width, height, hor_texts, ver_texts, margin) + + +def resize_image(resize_mode, im, width, height, upscaler_name=None): + """ + Resizes an image with the specified resize_mode, width, and height. + + Args: + resize_mode: The mode to use when resizing the image. + 0: Resize the image to the specified width and height. + 1: Resize the image to fill the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, cropping the excess. + 2: Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, filling empty with data from image. + im: The image to resize. + width: The width to resize the image to. + height: The height to resize the image to. + upscaler_name: The name of the upscaler to use. If not provided, defaults to opts.upscaler_for_img2img. + """ + + upscaler_name = upscaler_name or opts.upscaler_for_img2img + + def resize(im, w, h): + if upscaler_name is None or upscaler_name == "None" or im.mode == 'L': + return im.resize((w, h), resample=LANCZOS) + + scale = max(w / im.width, h / im.height) + + if scale > 1.0: + upscalers = [x for x in shared.sd_upscalers if x.name == upscaler_name] + if len(upscalers) == 0: + upscaler = shared.sd_upscalers[0] + print(f"could not find upscaler named {upscaler_name or ''}, using {upscaler.name} as a fallback") + else: + upscaler = upscalers[0] + + im = upscaler.scaler.upscale(im, scale, upscaler.data_path) + + if im.width != w or im.height != h: + im = im.resize((w, h), resample=LANCZOS) + + return im + + if resize_mode == 0: + res = resize(im, width, height) + + elif resize_mode == 1: + ratio = width / height + src_ratio = im.width / im.height + + src_w = width if ratio > src_ratio else im.width * height // im.height + src_h = height if ratio <= src_ratio else im.height * width // im.width + + resized = resize(im, src_w, src_h) + res = Image.new("RGB", (width, height)) + res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) + + else: + ratio = width / height + src_ratio = im.width / im.height + + src_w = width if ratio < src_ratio else im.width * height // im.height + src_h = height if ratio >= src_ratio else im.height * width // im.width + + resized = resize(im, src_w, src_h) + res = Image.new("RGB", (width, height)) + res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) + + if ratio < src_ratio: + fill_height = height // 2 - src_h // 2 + if fill_height > 0: + res.paste(resized.resize((width, fill_height), box=(0, 0, width, 0)), box=(0, 0)) + res.paste(resized.resize((width, fill_height), box=(0, resized.height, width, resized.height)), box=(0, fill_height + src_h)) + elif ratio > src_ratio: + fill_width = width // 2 - src_w // 2 + if fill_width > 0: + res.paste(resized.resize((fill_width, height), box=(0, 0, 0, height)), box=(0, 0)) + res.paste(resized.resize((fill_width, height), box=(resized.width, 0, resized.width, height)), box=(fill_width + src_w, 0)) + + return res + + +invalid_filename_chars = '#<>:"/\\|?*\n\r\t' +invalid_filename_prefix = ' ' +invalid_filename_postfix = ' .' +re_nonletters = re.compile(r'[\s' + string.punctuation + ']+') +re_pattern = re.compile(r"(.*?)(?:\[([^\[\]]+)\]|$)") +re_pattern_arg = re.compile(r"(.*)<([^>]*)>$") +max_filename_part_length = 128 +NOTHING_AND_SKIP_PREVIOUS_TEXT = object() + + +def sanitize_filename_part(text, replace_spaces=True): + if text is None: + return None + + if replace_spaces: + text = text.replace(' ', '_') + + text = text.translate({ord(x): '_' for x in invalid_filename_chars}) + text = text.lstrip(invalid_filename_prefix)[:max_filename_part_length] + text = text.rstrip(invalid_filename_postfix) + return text + + +class FilenameGenerator: + replacements = { + 'seed': lambda self: self.seed if self.seed is not None else '', + 'seed_first': lambda self: self.seed if self.p.batch_size == 1 else self.p.all_seeds[0], + 'seed_last': lambda self: NOTHING_AND_SKIP_PREVIOUS_TEXT if self.p.batch_size == 1 else self.p.all_seeds[-1], + 'steps': lambda self: self.p and self.p.steps, + 'cfg': lambda self: self.p and self.p.cfg_scale, + 'width': lambda self: self.image.width, + 'height': lambda self: self.image.height, + 'styles': lambda self: self.p and sanitize_filename_part(", ".join([style for style in self.p.styles if not style == "None"]) or "None", replace_spaces=False), + 'sampler': lambda self: self.p and sanitize_filename_part(self.p.sampler_name, replace_spaces=False), + 'model_hash': lambda self: getattr(self.p, "sd_model_hash", shared.sd_model.sd_model_hash), + 'model_name': lambda self: sanitize_filename_part(shared.sd_model.sd_checkpoint_info.name_for_extra, replace_spaces=False), + 'date': lambda self: datetime.datetime.now().strftime('%Y-%m-%d'), + 'datetime': lambda self, *args: self.datetime(*args), # accepts formats: [datetime], [datetime], [datetime